diff --git a/CMakeLists.txt b/CMakeLists.txt index 336b8bd06d3e4..1435859851a27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -241,7 +241,7 @@ include(CPack) # sure that we don't have any stray generated files lying around in the tree # (which would end up getting picked up by header search, instead of the correct # versions). -if( CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE ) +if( CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_CURRENT_BINARY_DIR AND NOT MSVC_IDE ) message(FATAL_ERROR "In-source builds are not allowed. CMake would overwrite the makefiles distributed with LLVM. Please create a directory and run cmake from there, passing the path @@ -435,6 +435,8 @@ endif( LLVM_USE_OPROFILE ) set(LLVM_USE_SANITIZER "" CACHE STRING "Define the sanitizer used to build binaries and tests.") +set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH + "Path to fuzzing library for linking with fuzz targets") option(LLVM_USE_SPLIT_DWARF "Use -gsplit-dwarf when compiling llvm." OFF) @@ -795,14 +797,14 @@ if(LLVM_USE_HOST_TOOLS) include(CrossCompile) endif(LLVM_USE_HOST_TOOLS) if(LLVM_TARGET_IS_CROSSCOMPILE_HOST) -# Dummy use to avoid CMake Wraning: Manually-specified variables were not used +# Dummy use to avoid CMake Warning: Manually-specified variables were not used # (this is a variable that CrossCompile sets on recursive invocations) endif() if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") # On FreeBSD, /usr/local/* is not used by default. In order to build LLVM # with libxml2, iconv.h, etc., we must add /usr/local paths. - include_directories("/usr/local/include") + include_directories(SYSTEM "/usr/local/include") link_directories("/usr/local/lib") endif(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)") @@ -1010,3 +1012,4 @@ endif() if (MSVC) include(InstallRequiredSystemLibraries) endif() + diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT index f2b8477a27c61..8bc1c5d69f80a 100644 --- a/CODE_OWNERS.TXT +++ b/CODE_OWNERS.TXT @@ -65,7 +65,7 @@ E: qcolombet@apple.com D: Loop Strength Reduction, Register allocators N: Simon Dardis -E: simon.dardis@imgtec.com +E: simon.dardis@mips.com D: MIPS Backend (lib/Target/Mips/*) N: Duncan P. N. Exon Smith diff --git a/RELEASE_TESTERS.TXT b/RELEASE_TESTERS.TXT index 9a01c725fb511..0505a4aecb9d7 100644 --- a/RELEASE_TESTERS.TXT +++ b/RELEASE_TESTERS.TXT @@ -47,6 +47,6 @@ T: ARM, AArch64 O: Linux N: Simon Dardis -E: simon.dardis@imgtec.com +E: simon.dardis@mips.com T: MIPS O: Linux diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index a1a16b99eb1a4..a1b4846f19ab1 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -157,10 +157,10 @@ if( NOT PURE_WINDOWS AND NOT LLVM_USE_SANITIZER MATCHES "Memory.*") set(HAVE_TERMINFO 0) endif() - find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c) + find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2) set(LLVM_LIBXML2_ENABLED 0) set(LIBXML2_FOUND 0) - if((LLVM_ENABLE_LIBXML2) AND (CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH)) + if((LLVM_ENABLE_LIBXML2) AND ((CMAKE_SYSTEM_NAME MATCHES "Linux") AND (ICONV_LIBRARY_PATH) OR APPLE)) find_package(LibXml2) if (LIBXML2_FOUND) set(LLVM_LIBXML2_ENABLED 1) @@ -267,8 +267,11 @@ endif() check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC) if( LLVM_USING_GLIBC ) add_definitions( -D_GNU_SOURCE ) + list(APPEND CMAKE_REQUIRED_DEFINITIONS "-D_GNU_SOURCE") endif() # This check requires _GNU_SOURCE +check_symbol_exists(sched_getaffinity sched.h HAVE_SCHED_GETAFFINITY) +check_symbol_exists(CPU_COUNT sched.h HAVE_CPU_COUNT) if(HAVE_LIBPTHREAD) check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP) check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 81e7211ef9749..3952d041344b4 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -263,14 +263,14 @@ endfunction() # function(add_windows_version_resource_file OUT_VAR) set(sources ${ARGN}) - if (MSVC) + if (MSVC AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") set(resource_file ${LLVM_SOURCE_DIR}/resources/windows_version_resource.rc) if(EXISTS ${resource_file}) set(sources ${sources} ${resource_file}) source_group("Resource Files" ${resource_file}) set(windows_resource_file ${resource_file} PARENT_SCOPE) endif() - endif(MSVC) + endif(MSVC AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") set(${OUT_VAR} ${sources} PARENT_SCOPE) endfunction(add_windows_version_resource_file) @@ -894,7 +894,12 @@ endmacro(add_llvm_utility name) macro(add_llvm_fuzzer name) cmake_parse_arguments(ARG "" "DUMMY_MAIN" "" ${ARGN}) - if( LLVM_USE_SANITIZE_COVERAGE ) + if( LLVM_LIB_FUZZING_ENGINE ) + set(LLVM_OPTIONAL_SOURCES ${ARG_DUMMY_MAIN}) + add_llvm_executable(${name} ${ARG_UNPARSED_ARGUMENTS}) + target_link_libraries(${name} ${LLVM_LIB_FUZZING_ENGINE}) + set_target_properties(${name} PROPERTIES FOLDER "Fuzzers") + elseif( LLVM_USE_SANITIZE_COVERAGE ) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer") set(LLVM_OPTIONAL_SOURCES ${ARG_DUMMY_MAIN}) add_llvm_executable(${name} ${ARG_UNPARSED_ARGUMENTS}) @@ -902,7 +907,7 @@ macro(add_llvm_fuzzer name) elseif( ARG_DUMMY_MAIN ) add_llvm_executable(${name} ${ARG_DUMMY_MAIN} ${ARG_UNPARSED_ARGUMENTS}) set_target_properties(${name} PROPERTIES FOLDER "Fuzzers") -endif() + endif() endmacro() macro(add_llvm_target target_name) diff --git a/docs/AMDGPUUsage.rst b/docs/AMDGPUUsage.rst index 42131f8d1a8c3..c135aec73fc27 100644 --- a/docs/AMDGPUUsage.rst +++ b/docs/AMDGPUUsage.rst @@ -84,38 +84,38 @@ names from both the *Processor* and *Alternative Processor* can be used. Processor Triple APU Support Products Architecture ========== =========== ============ ===== ======= ================== - **R600** [AMD-R6xx]_ + **Radeon HD 2000/3000 Series (R600)** [AMD-RADEON-HD-2000-3000]_ -------------------------------------------------------------------- r600 r600 dGPU r630 r600 dGPU rs880 r600 dGPU rv670 r600 dGPU - **R700** [AMD-R7xx]_ + **Radeon HD 4000 Series (R700)** [AMD-RADEON-HD-4000]_ -------------------------------------------------------------------- rv710 r600 dGPU rv730 r600 dGPU rv770 r600 dGPU - **Evergreen** [AMD-Evergreen]_ + **Radeon HD 5000 Series (Evergreen)** [AMD-RADEON-HD-5000]_ -------------------------------------------------------------------- cedar r600 dGPU redwood r600 dGPU sumo r600 dGPU juniper r600 dGPU cypress r600 dGPU - **Northern Islands** [AMD-Cayman-Trinity]_ + **Radeon HD 6000 Series (Northern Islands)** [AMD-RADEON-HD-6000]_ -------------------------------------------------------------------- barts r600 dGPU turks r600 dGPU caicos r600 dGPU cayman r600 dGPU - **GCN GFX6 (Southern Islands (SI))** [AMD-Souther-Islands]_ + **GCN GFX6 (Southern Islands (SI))** [AMD-GCN-GFX6]_ -------------------------------------------------------------------- gfx600 - tahiti amdgcn dGPU gfx601 - pitcairn amdgcn dGPU - verde - oland - hainan - **GCN GFX7 (Sea Islands (CI))** [AMD-Sea-Islands]_ + **GCN GFX7 (Sea Islands (CI))** [AMD-GCN-GFX7]_ -------------------------------------------------------------------- gfx700 - bonaire amdgcn dGPU - Radeon HD 7790 - Radeon HD 8770 @@ -148,7 +148,7 @@ names from both the *Processor* and *Alternative Processor* can be used. - A4-5100 - A6-5200 - A4 Pro-3340B - **GCN GFX8 (Volcanic Islands (VI))** [AMD-Volcanic-Islands]_ + **GCN GFX8 (Volcanic Islands (VI))** [AMD-GCN-GFX8]_ -------------------------------------------------------------------- gfx800 - iceland amdgcn dGPU - FirePro S7150 - FirePro S7100 @@ -189,7 +189,7 @@ names from both the *Processor* and *Alternative Processor* can be used. \ - polaris11 amdgcn dGPU ROCm - Radeon RX 460 gfx804 amdgcn dGPU Same as gfx803 gfx810 - stoney amdgcn APU - **GCN GFX9** [AMD-Vega]_ + **GCN GFX9** [AMD-GCN-GFX9]_ -------------------------------------------------------------------- gfx900 amdgcn dGPU - Radeon Vega Frontier Edition @@ -359,47 +359,71 @@ The AMDGPU backend uses the following ELF header: .. table:: AMDGPU ELF Header :name: amdgpu-elf-header-table - ========================== ========================= + ========================== =============================== Field Value - ========================== ========================= + ========================== =============================== ``e_ident[EI_CLASS]`` ``ELFCLASS64`` ``e_ident[EI_DATA]`` ``ELFDATA2LSB`` - ``e_ident[EI_OSABI]`` ``ELFOSABI_AMDGPU_HSA`` - ``e_ident[EI_ABIVERSION]`` ``ELFABIVERSION_AMDGPU_HSA`` + ``e_ident[EI_OSABI]`` ``ELFOSABI_AMDGPU_HSA``, + ``ELFOSABI_AMDGPU_PAL`` or + ``ELFOSABI_AMDGPU_MESA3D`` + ``e_ident[EI_ABIVERSION]`` ``ELFABIVERSION_AMDGPU_HSA``, + ``ELFABIVERSION_AMDGPU_PAL`` or + ``ELFABIVERSION_AMDGPU_MESA3D`` ``e_type`` ``ET_REL`` or ``ET_DYN`` ``e_machine`` ``EM_AMDGPU`` ``e_entry`` 0 ``e_flags`` 0 - ========================== ========================= + ========================== =============================== .. .. table:: AMDGPU ELF Header Enumeration Values :name: amdgpu-elf-header-enumeration-values-table - ============================ ===== - Name Value - ============================ ===== - ``EM_AMDGPU`` 224 - ``ELFOSABI_AMDGPU_HSA`` 64 - ``ELFABIVERSION_AMDGPU_HSA`` 1 - ============================ ===== + =============================== ===== + Name Value + =============================== ===== + ``EM_AMDGPU`` 224 + ``ELFOSABI_AMDGPU_HSA`` 64 + ``ELFOSABI_AMDGPU_PAL`` 65 + ``ELFOSABI_AMDGPU_MESA3D`` 66 + ``ELFABIVERSION_AMDGPU_HSA`` 1 + ``ELFABIVERSION_AMDGPU_PAL`` 0 + ``ELFABIVERSION_AMDGPU_MESA3D`` 0 + =============================== ===== ``e_ident[EI_CLASS]`` - The ELF class is always ``ELFCLASS64``. The AMDGPU backend only supports 64 bit - applications. + The ELF class is always ``ELFCLASS64``. The AMDGPU backend only supports 64 + bit applications. ``e_ident[EI_DATA]`` All AMDGPU targets use ELFDATA2LSB for little-endian byte ordering. ``e_ident[EI_OSABI]`` - The AMD GPU architecture specific OS ABI of ``ELFOSABI_AMDGPU_HSA`` is used to - specify that the code object conforms to the AMD HSA runtime ABI [HSA]_. + One of the following AMD GPU architecture specific OS ABIs: + + * ``ELFOSABI_AMDGPU_HSA`` is used to specify that the code object conforms to + the AMD HSA runtime ABI [HSA]_. + + * ``ELFOSABI_AMDGPU_PAL`` is used to specify that the code object conforms to + the AMD PAL runtime ABI. + + * ``ELFOSABI_AMDGPU_MESA3D`` is used to specify that the code object conforms + to the AMD MESA runtime ABI. ``e_ident[EI_ABIVERSION]`` - The AMD GPU architecture specific OS ABI version of - ``ELFABIVERSION_AMDGPU_HSA`` is used to specify the version of AMD HSA runtime - ABI to which the code object conforms. + The ABI version of the AMD GPU architecture specific OS ABI to which the code + object conforms: + + * ``ELFABIVERSION_AMDGPU_HSA`` is used to specify the version of AMD HSA + runtime ABI. + + * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL + runtime ABI. + + * ``ELFABIVERSION_AMDGPU_MESA3D`` is used to specify the version of AMD MESA + runtime ABI. ``e_type`` Can be one of the following values: @@ -499,7 +523,7 @@ be at least 4 to indicate at least 8 byte alignment. The AMDGPU backend code object uses the following ELF note records in the ``.note`` section. The *Description* column specifies the layout of the note -record’s ``desc`` field. All fields are consecutive bytes. Note records with +record's ``desc`` field. All fields are consecutive bytes. Note records with variable size strings have a corresponding ``*_size`` field that specifies the number of bytes, including the terminating null character, in the string. The string(s) come immediately after the preceding fields. @@ -649,7 +673,7 @@ Following notations are used for specifying relocation calculations: **G** Represents the offset into the global offset table at which the relocation - entry’s symbol will reside during execution. + entry's symbol will reside during execution. **GOT** Represents the address of the global offset table. @@ -660,7 +684,12 @@ Following notations are used for specifying relocation calculations: **S** Represents the value of the symbol whose index resides in the relocation - entry. + entry. Relocations not using this must specify a symbol index of ``STN_UNDEF``. + +**B** + Represents the base address of a loaded executable or shared object which is + the difference between the ELF address and the actual load address. Relocations + using this are only valid in executable or shared objects. The following relocation types are supported: @@ -682,6 +711,8 @@ The following relocation types are supported: ``R_AMDGPU_GOTPCREL32_HI`` 9 ``word32`` (G + GOT + A - P) >> 32 ``R_AMDGPU_REL32_LO`` 10 ``word32`` (S + A - P) & 0xFFFFFFFF ``R_AMDGPU_REL32_HI`` 11 ``word32`` (S + A - P) >> 32 + *reserved* 12 + ``R_AMDGPU_RELATIVE64`` 13 ``word64`` B + A ========================== ===== ========== ============================== .. _amdgpu-dwarf: @@ -865,7 +896,7 @@ non-AMD key names should be prefixed by "*vendor-name*.". See :ref:`amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table` for the mapping definition. - "Arguments" sequence of Sequence of mappings of the + "Args" sequence of Sequence of mappings of the mapping kernel arguments. See :ref:`amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table` for the definition of the mapping. @@ -906,6 +937,16 @@ non-AMD key names should be prefixed by "*vendor-name*.". Corresponds to the OpenCL ``vec_type_hint`` attribute. + + "RuntimeHandle" string The external symbol name + associated with a kernel. + OpenCL runtime allocates a + global buffer for the symbol + and saves the kernel's address + to it, which is used for + device side enqueueing. Only + available for device side + enqueued kernels. =================== ============== ========= ============================== .. @@ -1065,7 +1106,7 @@ non-AMD key names should be prefixed by "*vendor-name*.". .. TODO Does this apply to GlobalBuffer? - "ActualAcc" string The actual memory accesses + "ActualAccQual" string The actual memory accesses performed by the kernel on the kernel argument. Only present if "ValueKind" is "GlobalBuffer", @@ -1167,7 +1208,7 @@ non-AMD key names should be prefixed by "*vendor-name*.". registers used by each work-item for GFX6-GFX9 - "MaxFlatWorkgroupSize" integer Maximum flat + "MaxFlatWorkGroupSize" integer Maximum flat work-group size supported by the kernel in work-items. @@ -1190,7 +1231,8 @@ non-AMD key names should be prefixed by "*vendor-name*.". =================================== ============== ========= ============== String Key Value Type Required? Description =================================== ============== ========= ============== - "DebuggerABIVersion" string + "DebuggerABIVersion" sequence of + 2 integers "ReservedNumVGPRs" integer "ReservedFirstVGPR" integer "PrivateSegmentBufferSGPR" integer @@ -1198,7 +1240,7 @@ non-AMD key names should be prefixed by "*vendor-name*.". =================================== ============== ========= ============== .. TODO - Plan to remove the debug properties metadata. + Plan to remove the debug properties metadata. Kernel Dispatch ~~~~~~~~~~~~~~~ @@ -1233,7 +1275,7 @@ CPU host program, or from an HSA kernel executing on a GPU. for a memory region with the kernarg property for the kernel agent that will execute the kernel. It must be at least 16 byte aligned. 4. Kernel argument values are assigned to the kernel argument memory - allocation. The layout is defined in the *HSA Programmer’s Language Reference* + allocation. The layout is defined in the *HSA Programmer's Language Reference* [HSA]_. For AMDGPU the kernel execution directly accesses the kernel argument memory in the same way constant memory is accessed. (Note that the HSA specification allows an implementation to copy the kernel argument contents to @@ -1389,10 +1431,10 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. .. table:: Kernel Descriptor for GFX6-GFX9 :name: amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table - ======= ======= =============================== =========================== + ======= ======= =============================== ============================ Bits Size Field Name Description - ======= ======= =============================== =========================== - 31:0 4 bytes group_segment_fixed_size The amount of fixed local + ======= ======= =============================== ============================ + 31:0 4 bytes GroupSegmentFixedSize The amount of fixed local address space memory required for a work-group in bytes. This does not @@ -1401,7 +1443,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. space memory that may be added when the kernel is dispatched. - 63:32 4 bytes private_segment_fixed_size The amount of fixed + 63:32 4 bytes PrivateSegmentFixedSize The amount of fixed private address space memory required for a work-item in bytes. If @@ -1409,42 +1451,42 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. then additional space must be added to this value for the call stack. - 95:64 4 bytes max_flat_workgroup_size Maximum flat work-group + 95:64 4 bytes MaxFlatWorkGroupSize Maximum flat work-group size supported by the kernel in work-items. - 96 1 bit is_dynamic_call_stack Indicates if the generated + 96 1 bit IsDynamicCallStack Indicates if the generated machine code is using a dynamically sized call stack. - 97 1 bit is_xnack_enabled Indicates if the generated + 97 1 bit IsXNACKEnabled Indicates if the generated machine code is capable of suppoting XNACK. - 127:98 30 bits Reserved. Must be 0. - 191:128 8 bytes kernel_code_entry_byte_offset Byte offset (possibly + 127:98 30 bits Reserved, must be 0. + 191:128 8 bytes KernelCodeEntryByteOffset Byte offset (possibly negative) from base address of kernel descriptor to kernel's entry point instruction which must be 256 byte aligned. - 383:192 24 Reserved. Must be 0. + 383:192 24 Reserved, must be 0. bytes - 415:384 4 bytes compute_pgm_rsrc1 Compute Shader (CS) + 415:384 4 bytes ComputePgmRsrc1 Compute Shader (CS) program settings used by CP to set up ``COMPUTE_PGM_RSRC1`` configuration register. See - :ref:`amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table`. - 447:416 4 bytes compute_pgm_rsrc2 Compute Shader (CS) + :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx9-table`. + 447:416 4 bytes ComputePgmRsrc2 Compute Shader (CS) program settings used by CP to set up ``COMPUTE_PGM_RSRC2`` configuration register. See :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table`. - 448 1 bit enable_sgpr_private_segment Enable the setup of the - _buffer SGPR user data registers + 448 1 bit EnableSGPRPrivateSegmentBuffer Enable the setup of the + SGPR user data registers (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). @@ -1455,55 +1497,57 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. ``compute_pgm_rsrc2.user_sgpr.user_sgpr_count``. Any requests beyond 16 will be ignored. - 449 1 bit enable_sgpr_dispatch_ptr *see above* - 450 1 bit enable_sgpr_queue_ptr *see above* - 451 1 bit enable_sgpr_kernarg_segment_ptr *see above* - 452 1 bit enable_sgpr_dispatch_id *see above* - 453 1 bit enable_sgpr_flat_scratch_init *see above* - 454 1 bit enable_sgpr_private_segment *see above* - _size - 455 1 bit enable_sgpr_grid_workgroup Not implemented in CP and - _count_X should always be 0. - 456 1 bit enable_sgpr_grid_workgroup Not implemented in CP and - _count_Y should always be 0. - 457 1 bit enable_sgpr_grid_workgroup Not implemented in CP and - _count_Z should always be 0. - 463:458 6 bits Reserved. Must be 0. - 511:464 4 Reserved. Must be 0. + 449 1 bit EnableSGPRDispatchPtr *see above* + 450 1 bit EnableSGPRQueuePtr *see above* + 451 1 bit EnableSGPRKernargSegmentPtr *see above* + 452 1 bit EnableSGPRDispatchID *see above* + 453 1 bit EnableSGPRFlatScratchInit *see above* + 454 1 bit EnableSGPRPrivateSegmentSize *see above* + 455 1 bit EnableSGPRGridWorkgroupCountX Not implemented in CP and + should always be 0. + 456 1 bit EnableSGPRGridWorkgroupCountY Not implemented in CP and + should always be 0. + 457 1 bit EnableSGPRGridWorkgroupCountZ Not implemented in CP and + should always be 0. + 463:458 6 bits Reserved, must be 0. + 511:464 6 Reserved, must be 0. bytes 512 **Total size 64 bytes.** - ======= =================================================================== + ======= ==================================================================== .. .. table:: compute_pgm_rsrc1 for GFX6-GFX9 - :name: amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table + :name: amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx9-table ======= ======= =============================== =========================================================================== Bits Size Field Name Description ======= ======= =============================== =========================================================================== - 5:0 6 bits granulated_workitem_vgpr_count Number of vector registers + 5:0 6 bits GRANULATED_WORKITEM_VGPR_COUNT Number of vector registers used by each work-item, granularity is device specific: GFX6-9 - roundup((max-vgpg + 1) - / 4) - 1 + - max_vgpr 1..256 + - roundup((max_vgpg + 1) + / 4) - 1 Used by CP to set up ``COMPUTE_PGM_RSRC1.VGPRS``. - 9:6 4 bits granulated_wavefront_sgpr_count Number of scalar registers + 9:6 4 bits GRANULATED_WAVEFRONT_SGPR_COUNT Number of scalar registers used by a wavefront, granularity is device specific: GFX6-8 - roundup((max-sgpg + 1) - / 8) - 1 + - max_sgpr 1..112 + - roundup((max_sgpg + 1) + / 8) - 1 GFX9 - roundup((max-sgpg + 1) - / 16) - 1 + - max_sgpr 1..112 + - roundup((max_sgpg + 1) + / 16) - 1 Includes the special SGPRs for VCC, Flat Scratch (for @@ -1515,7 +1559,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC1.SGPRS``. - 11:10 2 bits priority Must be 0. + 11:10 2 bits PRIORITY Must be 0. Start executing wavefront at the specified priority. @@ -1523,7 +1567,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. CP is responsible for filling in ``COMPUTE_PGM_RSRC1.PRIORITY``. - 13:12 2 bits float_mode_round_32 Wavefront starts execution + 13:12 2 bits FLOAT_ROUND_MODE_32 Wavefront starts execution with specified rounding mode for single (32 bit) floating point @@ -1536,7 +1580,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. - 15:14 2 bits float_mode_round_16_64 Wavefront starts execution + 15:14 2 bits FLOAT_ROUND_MODE_16_64 Wavefront starts execution with specified rounding denorm mode for half/double (16 and 64 bit) floating point @@ -1549,7 +1593,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. - 17:16 2 bits float_mode_denorm_32 Wavefront starts execution + 17:16 2 bits FLOAT_DENORM_MODE_32 Wavefront starts execution with specified denorm mode for single (32 bit) floating point @@ -1562,7 +1606,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. - 19:18 2 bits float_mode_denorm_16_64 Wavefront starts execution + 19:18 2 bits FLOAT_DENORM_MODE_16_64 Wavefront starts execution with specified denorm mode for half/double (16 and 64 bit) floating point @@ -1575,7 +1619,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC1.FLOAT_MODE``. - 20 1 bit priv Must be 0. + 20 1 bit PRIV Must be 0. Start executing wavefront in privilege trap handler @@ -1584,10 +1628,10 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. CP is responsible for filling in ``COMPUTE_PGM_RSRC1.PRIV``. - 21 1 bit enable_dx10_clamp Wavefront starts execution + 21 1 bit ENABLE_DX10_CLAMP Wavefront starts execution with DX10 clamp mode enabled. Used by the vector - ALU to force DX-10 style + ALU to force DX10 style treatment of NaN's (when set, clamp NaN to zero, otherwise pass NaN @@ -1595,7 +1639,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC1.DX10_CLAMP``. - 22 1 bit debug_mode Must be 0. + 22 1 bit DEBUG_MODE Must be 0. Start executing wavefront in single step mode. @@ -1603,7 +1647,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. CP is responsible for filling in ``COMPUTE_PGM_RSRC1.DEBUG_MODE``. - 23 1 bit enable_ieee_mode Wavefront starts execution + 23 1 bit ENABLE_IEEE_MODE Wavefront starts execution with IEEE mode enabled. Floating point opcodes that support @@ -1618,7 +1662,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC1.IEEE_MODE``. - 24 1 bit bulky Must be 0. + 24 1 bit BULKY Must be 0. Only one work-group allowed to execute on a compute @@ -1627,7 +1671,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. CP is responsible for filling in ``COMPUTE_PGM_RSRC1.BULKY``. - 25 1 bit cdbg_user Must be 0. + 25 1 bit CDBG_USER Must be 0. Flag that can be used to control debugging code. @@ -1635,7 +1679,25 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. CP is responsible for filling in ``COMPUTE_PGM_RSRC1.CDBG_USER``. - 31:26 6 bits Reserved. Must be 0. + 26 1 bit FP16_OVFL GFX6-8 + Reserved, must be 0. + GFX9 + Wavefront starts execution + with specified fp16 overflow + mode. + + - If 0, fp16 overflow generates + +/-INF values. + - If 1, fp16 overflow that is the + result of an +/-INF input value + or divide by 0 produces a +/-INF, + otherwise clamps computed + overflow to +/-MAX_FP16 as + appropriate. + + Used by CP to set up + ``COMPUTE_PGM_RSRC1.FP16_OVFL``. + 31:27 5 bits Reserved, must be 0. 32 **Total size 4 bytes** ======= =================================================================================================================== @@ -1647,14 +1709,14 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. ======= ======= =============================== =========================================================================== Bits Size Field Name Description ======= ======= =============================== =========================================================================== - 0 1 bit enable_sgpr_private_segment Enable the setup of the - _wave_offset SGPR wave scratch offset + 0 1 bit ENABLE_SGPR_PRIVATE_SEGMENT Enable the setup of the + _WAVE_OFFSET SGPR wave scratch offset system register (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). Used by CP to set up ``COMPUTE_PGM_RSRC2.SCRATCH_EN``. - 5:1 5 bits user_sgpr_count The total number of SGPR + 5:1 5 bits USER_SGPR_COUNT The total number of SGPR user data registers requested. This number must match the number of user @@ -1662,7 +1724,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC2.USER_SGPR``. - 6 1 bit enable_trap_handler Set to 1 if code contains a + 6 1 bit ENABLE_TRAP_HANDLER Set to 1 if code contains a TRAP instruction which requires a trap handler to be enabled. @@ -1673,7 +1735,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. installed a trap handler regardless of the setting of this field. - 7 1 bit enable_sgpr_workgroup_id_x Enable the setup of the + 7 1 bit ENABLE_SGPR_WORKGROUP_ID_X Enable the setup of the system SGPR register for the work-group id in the X dimension (see @@ -1681,7 +1743,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC2.TGID_X_EN``. - 8 1 bit enable_sgpr_workgroup_id_y Enable the setup of the + 8 1 bit ENABLE_SGPR_WORKGROUP_ID_Y Enable the setup of the system SGPR register for the work-group id in the Y dimension (see @@ -1689,7 +1751,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC2.TGID_Y_EN``. - 9 1 bit enable_sgpr_workgroup_id_z Enable the setup of the + 9 1 bit ENABLE_SGPR_WORKGROUP_ID_Z Enable the setup of the system SGPR register for the work-group id in the Z dimension (see @@ -1697,14 +1759,14 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC2.TGID_Z_EN``. - 10 1 bit enable_sgpr_workgroup_info Enable the setup of the + 10 1 bit ENABLE_SGPR_WORKGROUP_INFO Enable the setup of the system SGPR register for work-group information (see :ref:`amdgpu-amdhsa-initial-kernel-execution-state`). Used by CP to set up ``COMPUTE_PGM_RSRC2.TGID_SIZE_EN``. - 12:11 2 bits enable_vgpr_workitem_id Enable the setup of the + 12:11 2 bits ENABLE_VGPR_WORKITEM_ID Enable the setup of the VGPR system registers used for the work-item ID. :ref:`amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table` @@ -1712,7 +1774,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. Used by CP to set up ``COMPUTE_PGM_RSRC2.TIDIG_CMP_CNT``. - 13 1 bit enable_exception_address_watch Must be 0. + 13 1 bit ENABLE_EXCEPTION_ADDRESS_WATCH Must be 0. Wavefront starts execution with address watch @@ -1728,7 +1790,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB`` according to what the runtime requests. - 14 1 bit enable_exception_memory Must be 0. + 14 1 bit ENABLE_EXCEPTION_MEMORY Must be 0. Wavefront starts execution with memory violation @@ -1747,7 +1809,7 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB`` according to what the runtime requests. - 23:15 9 bits granulated_lds_size Must be 0. + 23:15 9 bits GRANULATED_LDS_SIZE Must be 0. CP uses the rounded value from the dispatch packet, @@ -1768,8 +1830,8 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. GFX7-GFX9: roundup(lds-size / (128 * 4)) - 24 1 bit enable_exception_ieee_754_fp Wavefront starts execution - _invalid_operation with specified exceptions + 24 1 bit ENABLE_EXCEPTION_IEEE_754_FP Wavefront starts execution + _INVALID_OPERATION with specified exceptions enabled. Used by CP to set up @@ -1778,21 +1840,21 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. IEEE 754 FP Invalid Operation - 25 1 bit enable_exception_fp_denormal FP Denormal one or more - _source input operands is a + 25 1 bit ENABLE_EXCEPTION_FP_DENORMAL FP Denormal one or more + _SOURCE input operands is a denormal number - 26 1 bit enable_exception_ieee_754_fp IEEE 754 FP Division by - _division_by_zero Zero - 27 1 bit enable_exception_ieee_754_fp IEEE 754 FP FP Overflow - _overflow - 28 1 bit enable_exception_ieee_754_fp IEEE 754 FP Underflow - _underflow - 29 1 bit enable_exception_ieee_754_fp IEEE 754 FP Inexact - _inexact - 30 1 bit enable_exception_int_divide_by Integer Division by Zero - _zero (rcp_iflag_f32 instruction + 26 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP Division by + _DIVISION_BY_ZERO Zero + 27 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP FP Overflow + _OVERFLOW + 28 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP Underflow + _UNDERFLOW + 29 1 bit ENABLE_EXCEPTION_IEEE_754_FP IEEE 754 FP Inexact + _INEXACT + 30 1 bit ENABLE_EXCEPTION_INT_DIVIDE_BY Integer Division by Zero + _ZERO (rcp_iflag_f32 instruction only) - 31 1 bit Reserved. Must be 0. + 31 1 bit Reserved, must be 0. 32 **Total size 4 bytes.** ======= =================================================================================================================== @@ -1801,45 +1863,46 @@ CP microcode requires the Kernel descritor to be allocated on 64 byte alignment. .. table:: Floating Point Rounding Mode Enumeration Values :name: amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table - ===================================== ===== =============================== - Enumeration Name Value Description - ===================================== ===== =============================== - AMD_FLOAT_ROUND_MODE_NEAR_EVEN 0 Round Ties To Even - AMD_FLOAT_ROUND_MODE_PLUS_INFINITY 1 Round Toward +infinity - AMD_FLOAT_ROUND_MODE_MINUS_INFINITY 2 Round Toward -infinity - AMD_FLOAT_ROUND_MODE_ZERO 3 Round Toward 0 - ===================================== ===== =============================== + ====================================== ===== ============================== + Enumeration Name Value Description + ====================================== ===== ============================== + AMDGPU_FLOAT_ROUND_MODE_NEAR_EVEN 0 Round Ties To Even + AMDGPU_FLOAT_ROUND_MODE_PLUS_INFINITY 1 Round Toward +infinity + AMDGPU_FLOAT_ROUND_MODE_MINUS_INFINITY 2 Round Toward -infinity + AMDGPU_FLOAT_ROUND_MODE_ZERO 3 Round Toward 0 + ====================================== ===== ============================== .. .. table:: Floating Point Denorm Mode Enumeration Values :name: amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table - ===================================== ===== =============================== - Enumeration Name Value Description - ===================================== ===== =============================== - AMD_FLOAT_DENORM_MODE_FLUSH_SRC_DST 0 Flush Source and Destination - Denorms - AMD_FLOAT_DENORM_MODE_FLUSH_DST 1 Flush Output Denorms - AMD_FLOAT_DENORM_MODE_FLUSH_SRC 2 Flush Source Denorms - AMD_FLOAT_DENORM_MODE_FLUSH_NONE 3 No Flush - ===================================== ===== =============================== + ====================================== ===== ============================== + Enumeration Name Value Description + ====================================== ===== ============================== + AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC_DST 0 Flush Source and Destination + Denorms + AMDGPU_FLOAT_DENORM_MODE_FLUSH_DST 1 Flush Output Denorms + AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC 2 Flush Source Denorms + AMDGPU_FLOAT_DENORM_MODE_FLUSH_NONE 3 No Flush + ====================================== ===== ============================== .. .. table:: System VGPR Work-Item ID Enumeration Values :name: amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table - ===================================== ===== =============================== - Enumeration Name Value Description - ===================================== ===== =============================== - AMD_SYSTEM_VGPR_WORKITEM_ID_X 0 Set work-item X dimension ID. - AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y 1 Set work-item X and Y - dimensions ID. - AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z 2 Set work-item X, Y and Z - dimensions ID. - AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3 Undefined. - ===================================== ===== =============================== + ======================================== ===== ============================ + Enumeration Name Value Description + ======================================== ===== ============================ + AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X 0 Set work-item X dimension + ID. + AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y 1 Set work-item X and Y + dimensions ID. + AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z 2 Set work-item X, Y and Z + dimensions ID. + AMDGPU_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3 Undefined. + ======================================== ===== ============================ .. _amdgpu-amdhsa-initial-kernel-execution-state: @@ -1943,7 +2006,7 @@ SGPR register initial state is defined in The second SGPR is 32 bit byte size of a single - work-item’s scratch memory + work-item's scratch memory usage. CP obtains this from the runtime, and it is always a multiple of DWORD. @@ -2043,7 +2106,7 @@ SGPR register initial state is defined in then Work-Group Id Z 1 32 bit work-group id in Z (enable_sgpr_workgroup_id dimension of grid for _Z) wavefront. - then Work-Group Info 1 {first_wave, 14’b0000, + then Work-Group Info 1 {first_wave, 14'b0000, (enable_sgpr_workgroup ordered_append_term[10:0], _info) threadgroup_size_in_waves[5:0]} then Scratch Wave Offset 1 32 bit byte offset from base @@ -2181,9 +2244,6 @@ This section describes the mapping of LLVM memory model onto AMDGPU machine code .. TODO Update when implementation complete. - Support more relaxed OpenCL memory model to be controlled by environment - component of target triple. - The AMDGPU backend supports the memory synchronization scopes specified in :ref:`amdgpu-memory-scopes`. @@ -2200,19 +2260,23 @@ additional ``s_waitcnt`` instructions are required to ensure registers are defined before being used. These may be able to be combined with the memory model ``s_waitcnt`` instructions as described above. -The AMDGPU memory model supports both the HSA [HSA]_ memory model, and the -OpenCL [OpenCL]_ memory model. The HSA memory model uses a single happens-before -relation for all address spaces (see :ref:`amdgpu-address-spaces`). The OpenCL -memory model which has separate happens-before relations for the global and -local address spaces, and only a fence specifying both global and local address -space joins the relationships. Since the LLVM ``memfence`` instruction does not -allow an address space to be specified the OpenCL fence has to convervatively -assume both local and global address space was specified. However, optimizations -can often be done to eliminate the additional ``s_waitcnt``instructions when -there are no intervening corresponding ``ds/flat_load/store/atomic`` memory -instructions. The code sequences in the table indicate what can be omitted for -the OpenCL memory. The target triple environment is used to determine if the -source language is OpenCL (see :ref:`amdgpu-opencl`). +The AMDGPU backend supports the following memory models: + + HSA Memory Model [HSA]_ + The HSA memory model uses a single happens-before relation for all address + spaces (see :ref:`amdgpu-address-spaces`). + OpenCL Memory Model [OpenCL]_ + The OpenCL memory model which has separate happens-before relations for the + global and local address spaces. Only a fence specifying both global and + local address space, and seq_cst instructions join the relationships. Since + the LLVM ``memfence`` instruction does not allow an address space to be + specified the OpenCL fence has to convervatively assume both local and + global address space was specified. However, optimizations can often be + done to eliminate the additional ``s_waitcnt`` instructions when there are + no intervening memory instructions which access the corresponding address + space. The code sequences in the table indicate what can be omitted for the + OpenCL memory. The target triple environment is used to determine if the + source language is OpenCL (see :ref:`amdgpu-opencl`). ``ds/flat_load/store/atomic`` instructions to local memory are termed LDS operations. @@ -2244,11 +2308,11 @@ For GFX6-GFX9: that for GFX7-9 ``flat_load/store/atomic`` instructions can report out of vector memory order if they access LDS memory, and out of LDS operation order if they access global memory. -* The vector memory operations access a vector L1 cache shared by all wavefronts - on a CU. Therefore, no special action is required for coherence between - wavefronts in the same work-group. A ``buffer_wbinvl1_vol`` is required for - coherence between waves executing in different work-groups as they may be - executing on different CUs. +* The vector memory operations access a single vector L1 cache shared by all + SIMDs a CU. Therefore, no special action is required for coherence between the + lanes of a single wavefront, or for coherence between wavefronts in the same + work-group. A ``buffer_wbinvl1_vol`` is required for coherence between waves + executing in different work-groups as they may be executing on different CUs. * The scalar memory operations access a scalar L1 cache shared by all wavefronts on a group of CUs. The scalar and vector L1 caches are not coherent. However, scalar operations are used in a restricted way so do not impact the memory @@ -2312,45 +2376,62 @@ future wave that uses the same scratch area, or a function call that creates a frame at the same address, respectively. There is no need for a ``s_dcache_inv`` as all scalar writes are write-before-read in the same thread. -Scratch backing memory (which is used for the private address space) is accessed -with MTYPE NC_NV (non-coherenent non-volatile). Since the private address space -is only accessed by a single thread, and is always write-before-read, -there is never a need to invalidate these entries from the L1 cache. Hence all -cache invalidates are done as ``*_vol`` to only invalidate the volatile cache -lines. +Scratch backing memory (which is used for the private address space) +is accessed with MTYPE NC_NV (non-coherenent non-volatile). Since the private +address space is only accessed by a single thread, and is always +write-before-read, there is never a need to invalidate these entries from the L1 +cache. Hence all cache invalidates are done as ``*_vol`` to only invalidate the +volatile cache lines. On dGPU the kernarg backing memory is accessed as UC (uncached) to avoid needing -to invalidate the L2 cache. This also causes it to be treated as non-volatile -and so is not invalidated by ``*_vol``. On APU it is accessed as CC (cache -coherent) and so the L2 cache will coherent with the CPU and other agents. +to invalidate the L2 cache. This also causes it to be treated as +non-volatile and so is not invalidated by ``*_vol``. On APU it is accessed as CC +(cache coherent) and so the L2 cache will coherent with the CPU and other +agents. .. table:: AMDHSA Memory Model Code Sequences GFX6-GFX9 :name: amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table - ============ ============ ============== ========== ======================= + ============ ============ ============== ========== =============================== LLVM Instr LLVM Memory LLVM Memory AMDGPU AMDGPU Machine Code Ordering Sync Scope Address Space - ============ ============ ============== ========== ======================= + ============ ============ ============== ========== =============================== **Non-Atomic** - --------------------------------------------------------------------------- - load *none* *none* - global non-volatile - - generic 1. buffer/global/flat_load - volatile + ----------------------------------------------------------------------------------- + load *none* *none* - global - !volatile & !nontemporal + - generic + - private 1. buffer/global/flat_load + - constant + - volatile & !nontemporal + 1. buffer/global/flat_load glc=1 + + - nontemporal + + 1. buffer/global/flat_load + glc=1 slc=1 + load *none* *none* - local 1. ds_load - store *none* *none* - global 1. buffer/global/flat_store + store *none* *none* - global - !nontemporal - generic + - private 1. buffer/global/flat_store + - constant + - nontemporal + + 1. buffer/global/flat_stote + glc=1 slc=1 + store *none* *none* - local 1. ds_store **Unordered Atomic** - --------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- load atomic unordered *any* *any* *Same as non-atomic*. store atomic unordered *any* *any* *Same as non-atomic*. atomicrmw unordered *any* *any* *Same as monotonic atomic*. **Monotonic Atomic** - --------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- load atomic monotonic - singlethread - global 1. buffer/global/flat_load - wavefront - generic - workgroup @@ -2376,16 +2457,15 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. - wavefront - workgroup **Acquire Atomic** - --------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- load atomic acquire - singlethread - global 1. buffer/global/ds/flat_load - wavefront - local - generic - load atomic acquire - workgroup - global 1. buffer/global_load - load atomic acquire - workgroup - local 1. ds/flat_load - - generic 2. s_waitcnt lgkmcnt(0) + load atomic acquire - workgroup - global 1. buffer/global/flat_load + load atomic acquire - workgroup - local 1. ds_load + 2. s_waitcnt lgkmcnt(0) - - If OpenCL, omit - waitcnt. + - If OpenCL, omit. - Must happen before any following global/generic @@ -2398,8 +2478,23 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. older than the load atomic value being acquired. + load atomic acquire - workgroup - generic 1. flat_load + 2. s_waitcnt lgkmcnt(0) - load atomic acquire - agent - global 1. buffer/global_load + - If OpenCL, omit. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the load + atomic value being + acquired. + load atomic acquire - agent - global 1. buffer/global/flat_load - system glc=1 2. s_waitcnt vmcnt(0) @@ -2452,12 +2547,28 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. atomicrmw acquire - singlethread - global 1. buffer/global/ds/flat_atomic - wavefront - local - generic - atomicrmw acquire - workgroup - global 1. buffer/global_atomic - atomicrmw acquire - workgroup - local 1. ds/flat_atomic - - generic 2. waitcnt lgkmcnt(0) + atomicrmw acquire - workgroup - global 1. buffer/global/flat_atomic + atomicrmw acquire - workgroup - local 1. ds_atomic + 2. waitcnt lgkmcnt(0) - - If OpenCL, omit - waitcnt. + - If OpenCL, omit. + - Must happen before + any following + global/generic + load/load + atomic/store/store + atomic/atomicrmw. + - Ensures any + following global + data read is no + older than the + atomicrmw value + being acquired. + + atomicrmw acquire - workgroup - generic 1. flat_atomic + 2. waitcnt lgkmcnt(0) + + - If OpenCL, omit. - Must happen before any following global/generic @@ -2471,7 +2582,7 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. atomicrmw value being acquired. - atomicrmw acquire - agent - global 1. buffer/global_atomic + atomicrmw acquire - agent - global 1. buffer/global/flat_atomic - system 2. s_waitcnt vmcnt(0) - Must happen before @@ -2528,9 +2639,8 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. - If OpenCL and address space is - not generic, omit - waitcnt. However, - since LLVM + not generic, omit. + - However, since LLVM currently has no address space on the fence need to @@ -2569,14 +2679,14 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. value read by the fence-paired-atomic. - fence acquire - agent *none* 1. s_waitcnt vmcnt(0) & - - system lgkmcnt(0) + fence acquire - agent *none* 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) - If OpenCL and address space is not generic, omit lgkmcnt(0). - However, since LLVM + - However, since LLVM currently has no address space on the fence need to @@ -2608,7 +2718,7 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. - s_waitcnt lgkmcnt(0) must happen after any preceding - group/generic load + local/generic load atomic/atomicrmw with an equal or wider sync scope @@ -2635,8 +2745,8 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. 2. buffer_wbinvl1_vol - - Must happen before - any following global/generic + - Must happen before any + following global/generic load/load atomic/store/store atomic/atomicrmw. @@ -2646,14 +2756,13 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. global data. **Release Atomic** - --------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- store atomic release - singlethread - global 1. buffer/global/ds/flat_store - wavefront - local - generic store atomic release - workgroup - global 1. s_waitcnt lgkmcnt(0) - - generic - - If OpenCL, omit - waitcnt. + + - If OpenCL, omit. - Must happen after any preceding local/generic @@ -2673,8 +2782,29 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. 2. buffer/global/flat_store store atomic release - workgroup - local 1. ds_store - store atomic release - agent - global 1. s_waitcnt vmcnt(0) & - - system - generic lgkmcnt(0) + store atomic release - workgroup - generic 1. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + store. + - Ensures that all + memory operations + to local have + completed before + performing the + store that is being + released. + + 2. flat_store + store atomic release - agent - global 1. s_waitcnt lgkmcnt(0) & + - system - generic vmcnt(0) - If OpenCL, omit lgkmcnt(0). @@ -2706,7 +2836,7 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. store. - Ensures that all memory operations - to global have + to memory have completed before performing the store that is being @@ -2717,9 +2847,8 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. - wavefront - local - generic atomicrmw release - workgroup - global 1. s_waitcnt lgkmcnt(0) - - generic - - If OpenCL, omit - waitcnt. + + - If OpenCL, omit. - Must happen after any preceding local/generic @@ -2739,8 +2868,29 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. 2. buffer/global/flat_atomic atomicrmw release - workgroup - local 1. ds_atomic - atomicrmw release - agent - global 1. s_waitcnt vmcnt(0) & - - system - generic lgkmcnt(0) + atomicrmw release - workgroup - generic 1. s_waitcnt lgkmcnt(0) + + - If OpenCL, omit. + - Must happen after + any preceding + local/generic + load/store/load + atomic/store + atomic/atomicrmw. + - Must happen before + the following + atomicrmw. + - Ensures that all + memory operations + to local have + completed before + performing the + atomicrmw that is + being released. + + 2. flat_atomic + atomicrmw release - agent - global 1. s_waitcnt lgkmcnt(0) & + - system - generic vmcnt(0) - If OpenCL, omit lgkmcnt(0). @@ -2778,23 +2928,29 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. the atomicrmw that is being released. - 2. buffer/global/ds/flat_atomic* + 2. buffer/global/ds/flat_atomic fence release - singlethread *none* *none* - wavefront fence release - workgroup *none* 1. s_waitcnt lgkmcnt(0) - If OpenCL and address space is - not generic, omit - waitcnt. However, - since LLVM + not generic, omit. + - However, since LLVM currently has no address space on the fence need to conservatively - always generate - (see comment for - previous fence). + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. - Must happen after any preceding local/generic @@ -2819,21 +2975,32 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. following fence-paired-atomic. - fence release - agent *none* 1. s_waitcnt vmcnt(0) & - - system lgkmcnt(0) + fence release - agent *none* 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) - If OpenCL and address space is not generic, omit lgkmcnt(0). - However, since LLVM + - If OpenCL and + address space is + local, omit + vmcnt(0). + - However, since LLVM currently has no address space on the fence need to conservatively - always generate - (see comment for - previous fence). + always generate. If + fence had an + address space then + set to address + space of OpenCL + fence flag, or to + generic if both + local and global + flags are + specified. - Could be split into separate s_waitcnt vmcnt(0) and @@ -2869,21 +3036,20 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. fence-paired-atomic). - Ensures that all memory operations - to global have + have completed before performing the following fence-paired-atomic. **Acquire-Release Atomic** - --------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- atomicrmw acq_rel - singlethread - global 1. buffer/global/ds/flat_atomic - wavefront - local - generic atomicrmw acq_rel - workgroup - global 1. s_waitcnt lgkmcnt(0) - - If OpenCL, omit - waitcnt. + - If OpenCL, omit. - Must happen after any preceding local/generic @@ -2901,12 +3067,11 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. atomicrmw that is being released. - 2. buffer/global_atomic + 2. buffer/global/flat_atomic atomicrmw acq_rel - workgroup - local 1. ds_atomic 2. s_waitcnt lgkmcnt(0) - - If OpenCL, omit - waitcnt. + - If OpenCL, omit. - Must happen before any following global/generic @@ -2922,8 +3087,7 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. atomicrmw acq_rel - workgroup - generic 1. s_waitcnt lgkmcnt(0) - - If OpenCL, omit - waitcnt. + - If OpenCL, omit. - Must happen after any preceding local/generic @@ -2944,8 +3108,7 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. 2. flat_atomic 3. s_waitcnt lgkmcnt(0) - - If OpenCL, omit - waitcnt. + - If OpenCL, omit. - Must happen before any following global/generic @@ -2958,8 +3121,9 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. older than the load atomic value being acquired. - atomicrmw acq_rel - agent - global 1. s_waitcnt vmcnt(0) & - - system lgkmcnt(0) + + atomicrmw acq_rel - agent - global 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) - If OpenCL, omit lgkmcnt(0). @@ -2997,7 +3161,7 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. atomicrmw that is being released. - 2. buffer/global_atomic + 2. buffer/global/flat_atomic 3. s_waitcnt vmcnt(0) - Must happen before @@ -3021,8 +3185,8 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. will not see stale global data. - atomicrmw acq_rel - agent - generic 1. s_waitcnt vmcnt(0) & - - system lgkmcnt(0) + atomicrmw acq_rel - agent - generic 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) - If OpenCL, omit lgkmcnt(0). @@ -3093,8 +3257,8 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. - If OpenCL and address space is - not generic, omit - waitcnt. However, + not generic, omit. + - However, since LLVM currently has no address space on @@ -3132,8 +3296,8 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. stronger than unordered (this is termed the - fence-paired-atomic) - has completed + acquire-fence-paired-atomic + ) has completed before following global memory operations. This @@ -3153,19 +3317,19 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. stronger than unordered (this is termed the - fence-paired-atomic). - This satisfies the + release-fence-paired-atomic + ). This satisfies the requirements of release. - fence acq_rel - agent *none* 1. s_waitcnt vmcnt(0) & - - system lgkmcnt(0) + fence acq_rel - agent *none* 1. s_waitcnt lgkmcnt(0) & + - system vmcnt(0) - If OpenCL and address space is not generic, omit lgkmcnt(0). - However, since LLVM + - However, since LLVM currently has no address space on the fence need to @@ -3210,8 +3374,8 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. stronger than unordered (this is termed the - fence-paired-atomic) - has completed + acquire-fence-paired-atomic + ) has completed before invalidating the cache. This satisfies the @@ -3231,8 +3395,8 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. stronger than unordered (this is termed the - fence-paired-atomic). - This satisfies the + release-fence-paired-atomic + ). This satisfies the requirements of release. @@ -3253,13 +3417,103 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. acquire. **Sequential Consistent Atomic** - --------------------------------------------------------------------------- + ----------------------------------------------------------------------------------- load atomic seq_cst - singlethread - global *Same as corresponding - - wavefront - local load atomic acquire*. - - workgroup - generic - load atomic seq_cst - agent - global 1. s_waitcnt vmcnt(0) - - system - local - - generic - Must happen after + - wavefront - local load atomic acquire, + - generic except must generated + all instructions even + for OpenCL.* + load atomic seq_cst - workgroup - global 1. s_waitcnt lgkmcnt(0) + - generic + - Must + happen after + preceding + global/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + lgkmcnt(0) and so do + not need to be + considered.) + - Ensures any + preceding + sequential + consistent local + memory instructions + have completed + before executing + this sequentially + consistent + instruction. This + prevents reordering + a seq_cst store + followed by a + seq_cst load. (Note + that seq_cst is + stronger than + acquire/release as + the reordering of + load acquire + followed by a store + release is + prevented by the + waitcnt of + the release, but + there is nothing + preventing a store + release followed by + load acquire from + competing out of + order.) + + 2. *Following + instructions same as + corresponding load + atomic acquire, + except must generated + all instructions even + for OpenCL.* + load atomic seq_cst - workgroup - local *Same as corresponding + load atomic acquire, + except must generated + all instructions even + for OpenCL.* + load atomic seq_cst - agent - global 1. s_waitcnt lgkmcnt(0) & + - system - generic vmcnt(0) + + - Could be split into + separate s_waitcnt + vmcnt(0) + and s_waitcnt + lgkmcnt(0) to allow + them to be + independently moved + according to the + following rules. + - waitcnt lgkmcnt(0) + must happen after + preceding + global/generic load + atomic/store + atomic/atomicrmw + with memory + ordering of seq_cst + and with equal or + wider sync scope. + (Note that seq_cst + fences have their + own s_waitcnt + lgkmcnt(0) and so do + not need to be + considered.) + - waitcnt vmcnt(0) + must happen after preceding global/generic load atomic/store @@ -3287,7 +3541,7 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. prevents reordering a seq_cst store followed by a - seq_cst load (Note + seq_cst load. (Note that seq_cst is stronger than acquire/release as @@ -3296,7 +3550,7 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. followed by a store release is prevented by the - waitcnt vmcnt(0) of + waitcnt of the release, but there is nothing preventing a store @@ -3308,24 +3562,36 @@ coherent) and so the L2 cache will coherent with the CPU and other agents. 2. *Following instructions same as corresponding load - atomic acquire*. - + atomic acquire, + except must generated + all instructions even + for OpenCL.* store atomic seq_cst - singlethread - global *Same as corresponding - - wavefront - local store atomic release*. - - workgroup - generic + - wavefront - local store atomic release, + - workgroup - generic except must generated + all instructions even + for OpenCL.* store atomic seq_cst - agent - global *Same as corresponding - - system - generic store atomic release*. + - system - generic store atomic release, + except must generated + all instructions even + for OpenCL.* atomicrmw seq_cst - singlethread - global *Same as corresponding - - wavefront - local atomicrmw acq_rel*. - - workgroup - generic + - wavefront - local atomicrmw acq_rel, + - workgroup - generic except must generated + all instructions even + for OpenCL.* atomicrmw seq_cst - agent - global *Same as corresponding - - system - generic atomicrmw acq_rel*. + - system - generic atomicrmw acq_rel, + except must generated + all instructions even + for OpenCL.* fence seq_cst - singlethread *none* *Same as corresponding - - wavefront fence acq_rel*. - - workgroup - - agent - - system - ============ ============ ============== ========== ======================= + - wavefront fence acq_rel, + - workgroup except must generated + - agent all instructions even + - system for OpenCL.* + ============ ============ ============== ========== =============================== The memory order also adds the single thread optimization constrains defined in table @@ -3466,8 +3732,7 @@ It supports AMDGCN GFX6-GFX8. This section describes general syntax for instructions and operands. For more information about instructions, their semantics and supported combinations of operands, refer to one of instruction set architecture manuals -[AMD-Souther-Islands]_, [AMD-Sea-Islands]_, [AMD-Volcanic-Islands]_ and -[AMD-Vega]_. +[AMD-GCN-GFX6]_, [AMD-GCN-GFX7]_, [AMD-GCN-GFX8]_ and [AMD-GCN-GFX9]_. An instruction has the following syntax (register operands are normally comma-separated while extra operands are space-separated): @@ -3736,7 +4001,7 @@ used. The default value for all keys is 0, with the following exceptions: - *kernel_code_entry_byte_offset* defaults to 256. - *wavefront_size* defaults to 6. - *kernarg_segment_alignment*, *group_segment_alignment*, and - *private_segment_alignment* default to 4. Note that alignments are specified + *private_segment_alignment* default to 4. Note that alignments are specified as a power of two, so a value of **n** means an alignment of 2^ **n**. The *.amd_kernel_code_t* directive must be placed immediately after the @@ -3783,14 +4048,14 @@ Here is an example of a minimal amd_kernel_code_t specification: Additional Documentation ======================== -.. [AMD-R6xx] `AMD R6xx shader ISA `__ -.. [AMD-R7xx] `AMD R7xx shader ISA `__ -.. [AMD-Evergreen] `AMD Evergreen shader ISA `__ -.. [AMD-Cayman-Trinity] `AMD Cayman/Trinity shader ISA `__ -.. [AMD-Souther-Islands] `AMD Southern Islands Series ISA `__ -.. [AMD-Sea-Islands] `AMD Sea Islands Series ISA `_ -.. [AMD-Volcanic-Islands] `AMD GCN3 Instruction Set Architecture `__ -.. [AMD-Vega] `AMD "Vega" Instruction Set Architecture `__ +.. [AMD-RADEON-HD-2000-3000] `AMD R6xx shader ISA `__ +.. [AMD-RADEON-HD-4000] `AMD R7xx shader ISA `__ +.. [AMD-RADEON-HD-5000] `AMD Evergreen shader ISA `__ +.. [AMD-RADEON-HD-6000] `AMD Cayman/Trinity shader ISA `__ +.. [AMD-GCN-GFX6] `AMD Southern Islands Series ISA `__ +.. [AMD-GCN-GFX7] `AMD Sea Islands Series ISA `_ +.. [AMD-GCN-GFX8] `AMD GCN3 Instruction Set Architecture `__ +.. [AMD-GCN-GFX9] `AMD "Vega" Instruction Set Architecture `__ .. [AMD-OpenCL_Programming-Guide] `AMD Accelerated Parallel Processing OpenCL Programming Guide `_ .. [AMD-APP-SDK] `AMD Accelerated Parallel Processing APP SDK Documentation `__ .. [AMD-ROCm] `ROCm: Open Platform for Development, Discovery and Education Around GPU Computing `__ @@ -3798,7 +4063,7 @@ Additional Documentation .. [HSA] `Heterogeneous System Architecture (HSA) Foundation `__ .. [ELF] `Executable and Linkable Format (ELF) `__ .. [DWARF] `DWARF Debugging Information Format `__ -.. [YAML] `YAML Ain’t Markup Language (YAML™) Version 1.2 `__ +.. [YAML] `YAML Ain't Markup Language (YAML™) Version 1.2 `__ .. [OpenCL] `The OpenCL Specification Version 2.0 `__ .. [HRF] `Heterogeneous-race-free Memory Models `__ .. [AMD-AMDGPU-Compute-Application-Binary-Interface] `AMDGPU Compute Application Binary Interface `__ diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 4437610146c45..f1f93c7a228b0 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -112,6 +112,7 @@ if (LLVM_ENABLE_SPHINX) if (${SPHINX_OUTPUT_MAN}) add_sphinx_target(man llvm) + add_sphinx_target(man llvm-dwarfdump) endif() endif() diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst index 8830c394b212f..44cc57cebafe6 100644 --- a/docs/CommandGuide/FileCheck.rst +++ b/docs/CommandGuide/FileCheck.rst @@ -397,10 +397,11 @@ All FileCheck directives take a pattern to match. For most uses of FileCheck, fixed string matching is perfectly sufficient. For some things, a more flexible form of matching is desired. To support this, FileCheck allows you to specify regular expressions in matching strings, -surrounded by double braces: ``{{yourregex}}``. Because we want to use fixed -string matching for a majority of what we do, FileCheck has been designed to -support mixing and matching fixed string matching with regular expressions. -This allows you to write things like this: +surrounded by double braces: ``{{yourregex}}``. FileCheck implements a POSIX +regular expression matcher; it supports Extended POSIX regular expressions +(ERE). Because we want to use fixed string matching for a majority of what we +do, FileCheck has been designed to support mixing and matching fixed string +matching with regular expressions. This allows you to write things like this: .. code-block:: llvm @@ -434,7 +435,7 @@ The first check line matches a regex ``%[a-z]+`` and captures it into the variable ``REGISTER``. The second line verifies that whatever is in ``REGISTER`` occurs later in the file after an "``andw``". :program:`FileCheck` variable references are always contained in ``[[ ]]`` pairs, and their names can -be formed with the regex ``[a-zA-Z][a-zA-Z0-9]*``. If a colon follows the name, +be formed with the regex ``[a-zA-Z_][a-zA-Z0-9_]*``. If a colon follows the name, then it is a definition of the variable; otherwise, it is a use. :program:`FileCheck` variables can be defined multiple times, and uses always diff --git a/docs/CommandGuide/llvm-dwarfdump.rst b/docs/CommandGuide/llvm-dwarfdump.rst index 30c18adb77134..a3b62664cbe54 100644 --- a/docs/CommandGuide/llvm-dwarfdump.rst +++ b/docs/CommandGuide/llvm-dwarfdump.rst @@ -1,30 +1,142 @@ -llvm-dwarfdump - print contents of DWARF sections -================================================= +llvm-dwarfdump - dump and verify DWARF debug information +======================================================== SYNOPSIS -------- -:program:`llvm-dwarfdump` [*options*] [*filenames...*] +:program:`llvm-dwarfdump` [*options*] [*filename ...*] DESCRIPTION ----------- -:program:`llvm-dwarfdump` parses DWARF sections in the object files -and prints their contents in human-readable form. +:program:`llvm-dwarfdump` parses DWARF sections in object files, +archives, and `.dSYM` bundles and prints their contents in +human-readable form. Only the .debug_info section is printed unless one of +the section-specific options or :option:`--all` is specified. OPTIONS ------- -.. option:: -debug-dump=section +.. option:: -a, --all - Specify the DWARF section to dump. - For example, use ``abbrev`` to dump the contents of ``.debug_abbrev`` section, - ``loc.dwo`` to dump the contents of ``.debug_loc.dwo`` etc. - See ``llvm-dwarfdump --help`` for the complete list of supported sections. - Use ``all`` to dump all DWARF sections. It is the default. + Disassemble all supported DWARF sections. + +.. option:: --arch= + + Dump DWARF debug information for the specified CPU architecture. + Architectures may be specified by name or by number. This + option can be specified multiple times, once for each desired + architecture. All CPU architectures will be printed by + default. + +.. option:: -c, --show-children + + Show a debug info entry's children when using + the :option:`--debug-info`, :option:`--find`, + and :option:`--name` options. + +.. option:: -f , --find= + + Search for the exact text in the accelerator tables + and print the matching debug information entries. + When there is no accelerator tables or the name of the DIE + you are looking for is not found in the accelerator tables, + try using the slower but more complete :option:`--name` option. + +.. option:: -F, --show-form + + Show DWARF form types after the DWARF attribute types. + +.. option:: -h, --help + + Show help and usage for this command. + +.. option:: -i, --ignore-case + + Ignore case distinctions in when searching entries by name + or by regular expression. + +.. option:: -n , --name= + + Find and print all debug info entries whose name + (`DW_AT_name` attribute) matches the exact text in + . Use the :option:`--regex` option to have + become a regular expression for more flexible + pattern matching. + +.. option:: --lookup=
+ + Lookup
in the debug information and print out the file, + function, block, and line table details. + +.. option:: -o , --out-file= + + Redirect output to a file specified by . + +.. option:: -p, --show-parents + + Show a debug info entry's parent objects when using the + :option:`--debug-info`, :option:`--find`, and + :option:`--name` options. + +.. option:: -r , --recurse-depth= + + Only recurse to a maximum depth of when dumping debug info + entries. + +.. option:: --statistics + + Collect debug info quality metrics and print the results + as machine-readable single-line JSON output. + +.. option:: -x, --regex + + Treat any strings as regular expressions when searching + instead of just as an exact string match. + +.. option:: -u, --uuid + + Show the UUID for each architecture. + +.. option:: --diff + + Dump the output in a format that is more friendly for comparing + DWARF output from two different files. + +.. option:: -v, --verbose + + Display verbose information when dumping. This can help to debug + DWARF issues. + +.. option:: --verify + + Verify the structure of the DWARF information by verifying the + compile unit chains, DIE relationships graph, address + ranges, and more. + +.. option:: --version + + Display the version of the tool. + +.. option:: --debug-abbrev, --debug-aranges, --debug-cu-index, --debug-frame [=], --debug-gnu-pubnames, --debug-gnu-pubtypes, --debug-info [=], --debug-line [=], --debug-loc [=], --debug-macro, --debug-pubnames, --debug-pubtypes, --debug-ranges, --debug-str, --debug-str-offsets, --debug-tu-index, --debug-types, --eh-frame, --gdb-index, --apple-names, --apple-types, --apple-namespaces, --apple-objc + + Dump the specified DWARF section by name. Only the + `.debug_info` section is shown by default. Some entries + support adding an `=` as a way to provide an + optional offset of the exact entry to dump within the + respective section. When an offset is provided, only the + entry at that offset will be dumped, else the entire + section will be dumped. Children of items at a specific + offset can be dumped by also using the + :option:`--show-children` option where applicable. EXIT STATUS ----------- :program:`llvm-dwarfdump` returns 0 if the input files were parsed and dumped successfully. Otherwise, it returns 1. + +SEE ALSO +-------- + +:manpage:`dsymutil(1)` diff --git a/docs/FuzzingLLVM.rst b/docs/FuzzingLLVM.rst new file mode 100644 index 0000000000000..e6ebeaf80cb47 --- /dev/null +++ b/docs/FuzzingLLVM.rst @@ -0,0 +1,252 @@ +================================ +Fuzzing LLVM libraries and tools +================================ + +.. contents:: + :local: + :depth: 2 + +Introduction +============ + +The LLVM tree includes a number of fuzzers for various components. These are +built on top of :doc:`LibFuzzer `. + + +Available Fuzzers +================= + +clang-fuzzer +------------ + +A |generic fuzzer| that tries to compile textual input as C++ code. Some of the +bugs this fuzzer has reported are `on bugzilla`__ and `on OSS Fuzz's +tracker`__. + +__ https://llvm.org/pr23057 +__ https://bugs.chromium.org/p/oss-fuzz/issues/list?q=proj-llvm+clang-fuzzer + +clang-proto-fuzzer +------------------ + +A |protobuf fuzzer| that compiles valid C++ programs generated from a protobuf +class that describes a subset of the C++ language. + +This fuzzer accepts clang command line options after `ignore_remaining_args=1`. +For example, the following command will fuzz clang with a higher optimization +level: + +.. code-block:: shell + + % bin/clang-proto-fuzzer -ignore_remaining_args=1 -O3 + +clang-format-fuzzer +------------------- + +A |generic fuzzer| that runs clang-format_ on C++ text fragments. Some of the +bugs this fuzzer has reported are `on bugzilla`__ +and `on OSS Fuzz's tracker`__. + +.. _clang-format: https://clang.llvm.org/docs/ClangFormat.html +__ https://llvm.org/pr23052 +__ https://bugs.chromium.org/p/oss-fuzz/issues/list?q=proj-llvm+clang-format-fuzzer + +llvm-as-fuzzer +-------------- + +A |generic fuzzer| that tries to parse text as :doc:`LLVM assembly `. +Some of the bugs this fuzzer has reported are `on bugzilla`__. + +__ https://llvm.org/pr24639 + +llvm-dwarfdump-fuzzer +--------------------- + +A |generic fuzzer| that interprets inputs as object files and runs +:doc:`llvm-dwarfdump ` on them. Some of the bugs +this fuzzer has reported are `on OSS Fuzz's tracker`__ + +__ https://bugs.chromium.org/p/oss-fuzz/issues/list?q=proj-llvm+llvm-dwarfdump-fuzzer + +llvm-demangle-fuzzer +--------------------- + +A |generic fuzzer| for the Itanium demangler used in various LLVM tools. We've +fuzzed __cxa_demangle to death, why not fuzz LLVM's implementation of the same +function! + +llvm-isel-fuzzer +---------------- + +A |LLVM IR fuzzer| aimed at finding bugs in instruction selection. + +This fuzzer accepts flags after `ignore_remaining_args=1`. The flags match +those of :doc:`llc ` and the triple is required. For example, +the following command would fuzz AArch64 with :doc:`GlobalISel`: + +.. code-block:: shell + + % bin/llvm-isel-fuzzer -ignore_remaining_args=1 -mtriple aarch64 -global-isel -O0 + +Some flags can also be specified in the binary name itself in order to support +OSS Fuzz, which has trouble with required arguments. To do this, you can copy +or move ``llvm-isel-fuzzer`` to ``llvm-isel-fuzzer--x-y-z``, separating options +from the binary name using "--". The valid options are architecture names +(``aarch64``, ``x86_64``), optimization levels (``O0``, ``O2``), or specific +keywords, like ``gisel`` for enabling global instruction selection. In this +mode, the same example could be run like so: + +.. code-block:: shell + + % bin/llvm-isel-fuzzer--aarch64-O0-gisel + +llvm-mc-assemble-fuzzer +----------------------- + +A |generic fuzzer| that fuzzes the MC layer's assemblers by treating inputs as +target specific assembly. + +Note that this fuzzer has an unusual command line interface which is not fully +compatible with all of libFuzzer's features. Fuzzer arguments must be passed +after ``--fuzzer-args``, and any ``llc`` flags must use two dashes. For +example, to fuzz the AArch64 assembler you might use the following command: + +.. code-block:: console + + llvm-mc-fuzzer --triple=aarch64-linux-gnu --fuzzer-args -max_len=4 + +This scheme will likely change in the future. + +llvm-mc-disassemble-fuzzer +-------------------------- + +A |generic fuzzer| that fuzzes the MC layer's disassemblers by treating inputs +as assembled binary data. + +Note that this fuzzer has an unusual command line interface which is not fully +compatible with all of libFuzzer's features. See the notes above about +``llvm-mc-assemble-fuzzer`` for details. + + +.. |generic fuzzer| replace:: :ref:`generic fuzzer ` +.. |protobuf fuzzer| + replace:: :ref:`libprotobuf-mutator based fuzzer ` +.. |LLVM IR fuzzer| + replace:: :ref:`structured LLVM IR fuzzer ` + + +Mutators and Input Generators +============================= + +The inputs for a fuzz target are generated via random mutations of a +:ref:`corpus `. There are a few options for the kinds of +mutations that a fuzzer in LLVM might want. + +.. _fuzzing-llvm-generic: + +Generic Random Fuzzing +---------------------- + +The most basic form of input mutation is to use the built in mutators of +LibFuzzer. These simply treat the input corpus as a bag of bits and make random +mutations. This type of fuzzer is good for stressing the surface layers of a +program, and is good at testing things like lexers, parsers, or binary +protocols. + +Some of the in-tree fuzzers that use this type of mutator are `clang-fuzzer`_, +`clang-format-fuzzer`_, `llvm-as-fuzzer`_, `llvm-dwarfdump-fuzzer`_, +`llvm-mc-assemble-fuzzer`_, and `llvm-mc-disassemble-fuzzer`_. + +.. _fuzzing-llvm-protobuf: + +Structured Fuzzing using ``libprotobuf-mutator`` +------------------------------------------------ + +We can use libprotobuf-mutator_ in order to perform structured fuzzing and +stress deeper layers of programs. This works by defining a protobuf class that +translates arbitrary data into structurally interesting input. Specifically, we +use this to work with a subset of the C++ language and perform mutations that +produce valid C++ programs in order to exercise parts of clang that are more +interesting than parser error handling. + +To build this kind of fuzzer you need `protobuf`_ and its dependencies +installed, and you need to specify some extra flags when configuring the build +with :doc:`CMake `. For example, `clang-proto-fuzzer`_ can be enabled by +adding ``-DCLANG_ENABLE_PROTO_FUZZER=ON`` to the flags described in +:ref:`building-fuzzers`. + +The only in-tree fuzzer that uses ``libprotobuf-mutator`` today is +`clang-proto-fuzzer`_. + +.. _libprotobuf-mutator: https://github.com/google/libprotobuf-mutator +.. _protobuf: https://github.com/google/protobuf + +.. _fuzzing-llvm-ir: + +Structured Fuzzing of LLVM IR +----------------------------- + +We also use a more direct form of structured fuzzing for fuzzers that take +:doc:`LLVM IR ` as input. This is achieved through the ``FuzzMutate`` +library, which was `discussed at EuroLLVM 2017`_. + +The ``FuzzMutate`` library is used to structurally fuzz backends in +`llvm-isel-fuzzer`_. + +.. _discussed at EuroLLVM 2017: https://www.youtube.com/watch?v=UBbQ_s6hNgg + + +Building and Running +==================== + +.. _building-fuzzers: + +Configuring LLVM to Build Fuzzers +--------------------------------- + +Fuzzers will be built and linked to libFuzzer by default as long as you build +LLVM with sanitizer coverage enabled. You would typically also enable at least +one sanitizer to find bugs faster. The most common way to build the fuzzers is +by adding the following two flags to your CMake invocation: +``-DLLVM_USE_SANITIZER=Address -DLLVM_USE_SANITIZE_COVERAGE=On``. + +.. note:: If you have ``compiler-rt`` checked out in an LLVM tree when building + with sanitizers, you'll want to specify ``-DLLVM_BUILD_RUNTIME=Off`` + to avoid building the sanitizers themselves with sanitizers enabled. + +Continuously Running and Finding Bugs +------------------------------------- + +There used to be a public buildbot running LLVM fuzzers continuously, and while +this did find issues, it didn't have a very good way to report problems in an +actionable way. Because of this, we're moving towards using `OSS Fuzz`_ more +instead. + +You can browse the `LLVM project issue list`_ for the bugs found by +`LLVM on OSS Fuzz`_. These are also mailed to the `llvm-bugs mailing +list`_. + +.. _OSS Fuzz: https://github.com/google/oss-fuzz +.. _LLVM project issue list: + https://bugs.chromium.org/p/oss-fuzz/issues/list?q=Proj-llvm +.. _LLVM on OSS Fuzz: + https://github.com/google/oss-fuzz/blob/master/projects/llvm +.. _llvm-bugs mailing list: + http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs + + +Utilities for Writing Fuzzers +============================= + +There are some utilities available for writing fuzzers in LLVM. + +Some helpers for handling the command line interface are available in +``include/llvm/FuzzMutate/FuzzerCLI.h``, including functions to parse command +line options in a consistent way and to implement standalone main functions so +your fuzzer can be built and tested when not built against libFuzzer. + +There is also some handling of the CMake config for fuzzers, where you should +use the ``add_llvm_fuzzer`` to set up fuzzer targets. This function works +similarly to functions such as ``add_llvm_tool``, but they take care of linking +to LibFuzzer when appropriate and can be passed the ``DUMMY_MAIN`` argument to +enable standalone testing. diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst index 0cb415ad764e5..a90a4b05dd114 100644 --- a/docs/GettingStarted.rst +++ b/docs/GettingStarted.rst @@ -52,6 +52,12 @@ Here's the short story for getting up and running quickly with LLVM: * ``cd llvm/tools`` * ``svn co http://llvm.org/svn/llvm-project/cfe/trunk clang`` +#. Checkout Extra Clang Tools **[Optional]**: + + * ``cd where-you-want-llvm-to-live`` + * ``cd llvm/tools/clang/tools`` + * ``svn co http://llvm.org/svn/llvm-project/clang-tools-extra/trunk extra`` + #. Checkout LLD linker **[Optional]**: * ``cd where-you-want-llvm-to-live`` @@ -91,9 +97,9 @@ Here's the short story for getting up and running quickly with LLVM: #. Configure and build LLVM and Clang: - *Warning:* Make sure you've checked out *all of* the source code + *Warning:* Make sure you've checked out *all of* the source code before trying to configure with cmake. cmake does not pickup newly - added source directories in incremental builds. + added source directories in incremental builds. The build uses `CMake `_. LLVM requires CMake 3.4.3 to build. It is generally recommended to use a recent CMake, especially if you're @@ -137,8 +143,8 @@ Here's the short story for getting up and running quickly with LLVM: * CMake will generate build targets for each tool and library, and most LLVM sub-projects generate their own ``check-`` target. - * Running a serial build will be *slow*. Make sure you run a - parallel build; for ``make``, use ``make -j``. + * Running a serial build will be *slow*. Make sure you run a + parallel build; for ``make``, use ``make -j``. * For more information see `CMake `_ @@ -146,7 +152,7 @@ Here's the short story for getting up and running quickly with LLVM: `below`_. Consult the `Getting Started with LLVM`_ section for detailed information on -configuring and compiling LLVM. Go to `Directory Layout`_ to learn about the +configuring and compiling LLVM. Go to `Directory Layout`_ to learn about the layout of the source code tree. Requirements @@ -191,10 +197,10 @@ Windows x64 x86-64 Visual Studio Note that Debug builds require a lot of time and disk space. An LLVM-only build will need about 1-3 GB of space. A full build of LLVM and Clang will need around 15-20 GB of disk space. The exact space requirements will vary by system. (It -is so large because of all the debugging information and the fact that the -libraries are statically linked into multiple tools). +is so large because of all the debugging information and the fact that the +libraries are statically linked into multiple tools). -If you you are space-constrained, you can build only selected tools or only +If you you are space-constrained, you can build only selected tools or only selected targets. The Release build requires considerably less space. The LLVM suite *may* compile on other platforms, but it is not guaranteed to do @@ -512,43 +518,43 @@ clone of LLVM via: .. code-block:: console - % git clone http://llvm.org/git/llvm.git + % git clone https://git.llvm.org/git/llvm.git/ If you want to check out clang too, run: .. code-block:: console % cd llvm/tools - % git clone http://llvm.org/git/clang.git + % git clone https://git.llvm.org/git/clang.git/ If you want to check out compiler-rt (required to build the sanitizers), run: .. code-block:: console % cd llvm/projects - % git clone http://llvm.org/git/compiler-rt.git + % git clone https://git.llvm.org/git/compiler-rt.git/ If you want to check out libomp (required for OpenMP support), run: .. code-block:: console % cd llvm/projects - % git clone http://llvm.org/git/openmp.git + % git clone https://git.llvm.org/git/openmp.git/ If you want to check out libcxx and libcxxabi (optional), run: .. code-block:: console % cd llvm/projects - % git clone http://llvm.org/git/libcxx.git - % git clone http://llvm.org/git/libcxxabi.git + % git clone https://git.llvm.org/git/libcxx.git/ + % git clone https://git.llvm.org/git/libcxxabi.git/ If you want to check out the Test Suite Source Code (optional), run: .. code-block:: console % cd llvm/projects - % git clone http://llvm.org/git/test-suite.git + % git clone https://git.llvm.org/git/test-suite.git/ Since the upstream repository is in Subversion, you should use ``git pull --rebase`` instead of ``git pull`` to avoid generating a non-linear history @@ -622,7 +628,7 @@ To set up clone from which you can submit code using ``git-svn``, run: .. code-block:: console - % git clone http://llvm.org/git/llvm.git + % git clone https://git.llvm.org/git/llvm.git/ % cd llvm % git svn init https://llvm.org/svn/llvm-project/llvm/trunk --username= % git config svn-remote.svn.fetch :refs/remotes/origin/master @@ -630,7 +636,7 @@ To set up clone from which you can submit code using ``git-svn``, run: # If you have clang too: % cd tools - % git clone http://llvm.org/git/clang.git + % git clone https://git.llvm.org/git/clang.git/ % cd clang % git svn init https://llvm.org/svn/llvm-project/cfe/trunk --username= % git config svn-remote.svn.fetch :refs/remotes/origin/master @@ -1010,7 +1016,7 @@ Directory Layout ================ One useful source of information about the LLVM source base is the LLVM `doxygen -`_ documentation available at +`_ documentation available at ``_. The following is a brief introduction to code layout: @@ -1026,13 +1032,13 @@ Public header files exported from the LLVM library. The three main subdirectorie ``llvm/include/llvm`` - All LLVM-specific header files, and subdirectories for different portions of + All LLVM-specific header files, and subdirectories for different portions of LLVM: ``Analysis``, ``CodeGen``, ``Target``, ``Transforms``, etc... ``llvm/include/llvm/Support`` - Generic support libraries provided with LLVM but not necessarily specific to - LLVM. For example, some C++ STL utilities and a Command Line option processing + Generic support libraries provided with LLVM but not necessarily specific to + LLVM. For example, some C++ STL utilities and a Command Line option processing library store header files here. ``llvm/include/llvm/Config`` @@ -1045,12 +1051,12 @@ Public header files exported from the LLVM library. The three main subdirectorie ``llvm/lib`` ------------ -Most source files are here. By putting code in libraries, LLVM makes it easy to +Most source files are here. By putting code in libraries, LLVM makes it easy to share code among the `tools`_. ``llvm/lib/IR/`` - Core LLVM source files that implement core classes like Instruction and + Core LLVM source files that implement core classes like Instruction and BasicBlock. ``llvm/lib/AsmParser/`` @@ -1063,23 +1069,23 @@ share code among the `tools`_. ``llvm/lib/Analysis/`` - A variety of program analyses, such as Call Graphs, Induction Variables, + A variety of program analyses, such as Call Graphs, Induction Variables, Natural Loop Identification, etc. ``llvm/lib/Transforms/`` - IR-to-IR program transformations, such as Aggressive Dead Code Elimination, - Sparse Conditional Constant Propagation, Inlining, Loop Invariant Code Motion, + IR-to-IR program transformations, such as Aggressive Dead Code Elimination, + Sparse Conditional Constant Propagation, Inlining, Loop Invariant Code Motion, Dead Global Elimination, and many others. ``llvm/lib/Target/`` - Files describing target architectures for code generation. For example, + Files describing target architectures for code generation. For example, ``llvm/lib/Target/X86`` holds the X86 machine description. ``llvm/lib/CodeGen/`` - The major parts of the code generator: Instruction Selector, Instruction + The major parts of the code generator: Instruction Selector, Instruction Scheduling, and Register Allocation. ``llvm/lib/MC/`` @@ -1088,7 +1094,7 @@ share code among the `tools`_. ``llvm/lib/ExecutionEngine/`` - Libraries for directly executing bitcode at runtime in interpreted and + Libraries for directly executing bitcode at runtime in interpreted and JIT-compiled scenarios. ``llvm/lib/Support/`` @@ -1099,7 +1105,7 @@ share code among the `tools`_. ``llvm/projects`` ----------------- -Projects not strictly part of LLVM but shipped with LLVM. This is also the +Projects not strictly part of LLVM but shipped with LLVM. This is also the directory for creating your own LLVM-based projects which leverage the LLVM build system. @@ -1112,8 +1118,8 @@ are intended to run quickly and cover a lot of territory without being exhaustiv ``test-suite`` -------------- -A comprehensive correctness, performance, and benchmarking test suite for LLVM. -Comes in a separate Subversion module because not every LLVM user is interested +A comprehensive correctness, performance, and benchmarking test suite for LLVM. +Comes in a separate Subversion module because not every LLVM user is interested in such a comprehensive suite. For details see the :doc:`Testing Guide ` document. @@ -1194,7 +1200,7 @@ because they are code generators for parts of the infrastructure. ``emacs/`` - Emacs and XEmacs syntax highlighting for LLVM assembly files and TableGen + Emacs and XEmacs syntax highlighting for LLVM assembly files and TableGen description files. See the ``README`` for information on using them. ``getsrcs.sh`` diff --git a/docs/GlobalISel.rst b/docs/GlobalISel.rst index c124911978c5e..8746685491c7c 100644 --- a/docs/GlobalISel.rst +++ b/docs/GlobalISel.rst @@ -503,16 +503,69 @@ The simple API consists of: This target-provided method is responsible for mutating (or replacing) a possibly-generic MI into a fully target-specific equivalent. It is also responsible for doing the necessary constraining of gvregs into the -appropriate register classes. +appropriate register classes as well as passing through COPY instructions to +the register allocator. The ``InstructionSelector`` can fold other instructions into the selected MI, by walking the use-def chain of the vreg operands. As GlobalISel is Global, this folding can occur across basic blocks. -``TODO``: -Currently, the Select pass is implemented with hand-written c++, similar to -FastISel, rather than backed by tblgen'erated pattern-matching. -We intend to eventually reuse SelectionDAG patterns. +SelectionDAG Rule Imports +^^^^^^^^^^^^^^^^^^^^^^^^^ + +TableGen will import SelectionDAG rules and provide the following function to +execute them: + + .. code-block:: c++ + + bool selectImpl(MachineInstr &MI) + +The ``--stats`` option can be used to determine what proportion of rules were +successfully imported. The easiest way to use this is to copy the +``-gen-globalisel`` tablegen command from ``ninja -v`` and modify it. + +Similarly, the ``--warn-on-skipped-patterns`` option can be used to obtain the +reasons that rules weren't imported. This can be used to focus on the most +important rejection reasons. + +PatLeaf Predicates +^^^^^^^^^^^^^^^^^^ + +PatLeafs cannot be imported because their C++ is implemented in terms of +``SDNode`` objects. PatLeafs that handle immediate predicates should be +replaced by ``ImmLeaf``, ``IntImmLeaf``, or ``FPImmLeaf`` as appropriate. + +There's no standard answer for other PatLeafs. Some standard predicates have +been baked into TableGen but this should not generally be done. + +Custom SDNodes +^^^^^^^^^^^^^^ + +Custom SDNodes should be mapped to Target Pseudos using ``GINodeEquiv``. This +will cause the instruction selector to import them but you will also need to +ensure the target pseudo is introduced to the MIR before the instruction +selector. Any preceeding pass is suitable but the legalizer will be a +particularly common choice. + +ComplexPatterns +^^^^^^^^^^^^^^^ + +ComplexPatterns cannot be imported because their C++ is implemented in terms of +``SDNode`` objects. GlobalISel versions should be defined with +``GIComplexOperandMatcher`` and mapped to ComplexPattern with +``GIComplexPatternEquiv``. + +The following predicates are useful for porting ComplexPattern: + +* isBaseWithConstantOffset() - Check for base+offset structures +* isOperandImmEqual() - Check for a particular constant +* isObviouslySafeToFold() - Check for reasons an instruction can't be sunk and folded into another. + +There are some important points for the C++ implementation: + +* Don't modify MIR in the predicate +* Renderer lambdas should capture by value to avoid use-after-free. They will be used after the predicate returns. +* Only create instructions in a renderer lambda. GlobalISel won't clean up things you create but don't use. .. _maintainability: @@ -636,3 +689,14 @@ Additionally: * ``TargetPassConfig`` --- create the passes constituting the pipeline, including additional passes not included in the :ref:`pipeline`. + +.. _other_resources: + +Resources +========= + +* `Global Instruction Selection - A Proposal by Quentin Colombet @LLVMDevMeeting 2015 `_ +* `Global Instruction Selection - Status by Quentin Colombet, Ahmed Bougacha, and Tim Northover @LLVMDevMeeting 2016 `_ +* `GlobalISel - LLVM's Latest Instruction Selection Framework by Diana Picus @FOSDEM17 `_ +* GlobalISel: Past, Present, and Future by Quentin Colombet and Ahmed Bougacha @LLVMDevMeeting 2017 +* Head First into GlobalISel by Daniel Sanders, Aditya Nandakumar, and Justin Bogner @LLVMDevMeeting 2017 diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 9fd7965cb51eb..99a2ffa40d60a 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -3162,14 +3162,11 @@ that does not have side effects (e.g. load and call are not supported). The following is the syntax for constant expressions: ``trunc (CST to TYPE)`` - Truncate a constant to another type. The bit size of CST must be - larger than the bit size of TYPE. Both types must be integers. + Perform the :ref:`trunc operation ` on constants. ``zext (CST to TYPE)`` - Zero extend a constant to another type. The bit size of CST must be - smaller than the bit size of TYPE. Both types must be integers. + Perform the :ref:`zext operation ` on constants. ``sext (CST to TYPE)`` - Sign extend a constant to another type. The bit size of CST must be - smaller than the bit size of TYPE. Both types must be integers. + Perform the :ref:`sext operation ` on constants. ``fptrunc (CST to TYPE)`` Truncate a floating point constant to another floating point type. The size of CST must be larger than the size of TYPE. Both types @@ -3203,19 +3200,14 @@ The following is the syntax for constant expressions: be scalars, or vectors of the same number of elements. If the value won't fit in the floating point type, the results are undefined. ``ptrtoint (CST to TYPE)`` - Convert a pointer typed constant to the corresponding integer - constant. ``TYPE`` must be an integer type. ``CST`` must be of - pointer type. The ``CST`` value is zero extended, truncated, or - unchanged to make it fit in ``TYPE``. + Perform the :ref:`ptrtoint operation ` on constants. ``inttoptr (CST to TYPE)`` - Convert an integer constant to a pointer constant. TYPE must be a - pointer type. CST must be of integer type. The CST value is zero - extended, truncated, or unchanged to make it fit in a pointer size. + Perform the :ref:`inttoptr operation ` on constants. This one is *really* dangerous! ``bitcast (CST to TYPE)`` - Convert a constant, CST, to another TYPE. The constraints of the - operands are the same as those for the :ref:`bitcast - instruction `. + Convert a constant, CST, to another TYPE. + The constraints of the operands are the same as those for the + :ref:`bitcast instruction `. ``addrspacecast (CST to TYPE)`` Convert a constant pointer or constant vector of pointer, CST, to another TYPE in a different address space. The constraints of the operands are the @@ -3228,9 +3220,9 @@ The following is the syntax for constant expressions: ``select (COND, VAL1, VAL2)`` Perform the :ref:`select operation ` on constants. ``icmp COND (VAL1, VAL2)`` - Performs the :ref:`icmp operation ` on constants. + Perform the :ref:`icmp operation ` on constants. ``fcmp COND (VAL1, VAL2)`` - Performs the :ref:`fcmp operation ` on constants. + Perform the :ref:`fcmp operation ` on constants. ``extractelement (VAL, IDX)`` Perform the :ref:`extractelement operation ` on constants. @@ -4878,6 +4870,23 @@ Example (assuming 64-bit pointers): !0 = !{ i64 0, i64 256 } !1 = !{ i64 -1, i64 -1 } +'``callees``' Metadata +^^^^^^^^^^^^^^^^^^^^^^ + +``callees`` metadata may be attached to indirect call sites. If ``callees`` +metadata is attached to a call site, and any callee is not among the set of +functions provided by the metadata, the behavior is undefined. The intent of +this metadata is to facilitate optimizations such as indirect-call promotion. +For example, in the code below, the call instruction may only target the +``add`` or ``sub`` functions: + +.. code-block:: llvm + + %result = call i64 %binop(i64 %x, i64 %y), !callees !0 + + ... + !0 = !{i64 (i64, i64)* @add, i64 (i64, i64)* @sub} + '``unpredictable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -8059,6 +8068,8 @@ The instructions in this category are the conversion instructions (casting) which all take a single operand and a type. They perform various bit conversions on the operand. +.. _i_trunc: + '``trunc .. to``' Instruction ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -8101,6 +8112,8 @@ Example: %Z = trunc i32 122 to i1 ; yields i1:false %W = trunc <2 x i16> to <2 x i8> ; yields +.. _i_zext: + '``zext .. to``' Instruction ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -8141,6 +8154,8 @@ Example: %Y = zext i1 true to i32 ; yields i32:1 %Z = zext <2 x i16> to <2 x i32> ; yields +.. _i_sext: + '``sext .. to``' Instruction ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -12262,7 +12277,7 @@ Debugger Intrinsics The LLVM debugger intrinsics (which all start with ``llvm.dbg.`` prefix), are described in the `LLVM Source Level -Debugging `_ +Debugging `_ document. Exception Handling Intrinsics @@ -12270,7 +12285,7 @@ Exception Handling Intrinsics The LLVM exception handling intrinsics (which all start with ``llvm.eh.`` prefix), are described in the `LLVM Exception -Handling `_ document. +Handling `_ document. .. _int_trampoline: diff --git a/docs/LibFuzzer.rst b/docs/LibFuzzer.rst index c4baa2127c18f..2ae84afeed84c 100644 --- a/docs/LibFuzzer.rst +++ b/docs/LibFuzzer.rst @@ -42,10 +42,10 @@ This installs the Clang binary as ``./third_party/llvm-build/Release+Asserts/bin/clang``) The libFuzzer code resides in the LLVM repository, and requires a recent Clang -compiler to build (and is used to `fuzz various parts of LLVM itself`_). -However the fuzzer itself does not (and should not) depend on any part of LLVM -infrastructure and can be used for other projects without requiring the rest -of LLVM. +compiler to build (and is used to :doc:`fuzz various parts of LLVM itself +`). However the fuzzer itself does not (and should not) depend on +any part of LLVM infrastructure and can be used for other projects without +requiring the rest of LLVM. Getting Started @@ -137,6 +137,8 @@ Finally, link with ``libFuzzer.a``:: clang -fsanitize-coverage=trace-pc-guard -fsanitize=address your_lib.cc fuzz_target.cc libFuzzer.a -o my_fuzzer +.. _libfuzzer-corpus: + Corpus ------ @@ -627,66 +629,6 @@ which was configured with ``-DLIBFUZZER_ENABLE_TESTS=ON`` flag. ninja check-fuzzer -Fuzzing components of LLVM -========================== -.. contents:: - :local: - :depth: 1 - -To build any of the LLVM fuzz targets use the build instructions above. - -clang-format-fuzzer -------------------- -The inputs are random pieces of C++-like text. - -.. code-block:: console - - ninja clang-format-fuzzer - mkdir CORPUS_DIR - ./bin/clang-format-fuzzer CORPUS_DIR - -Optionally build other kinds of binaries (ASan+Debug, MSan, UBSan, etc). - -Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=23052 - -clang-fuzzer ------------- - -The behavior is very similar to ``clang-format-fuzzer``. - -Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=23057 - -llvm-as-fuzzer --------------- - -Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=24639 - -llvm-mc-fuzzer --------------- - -This tool fuzzes the MC layer. Currently it is only able to fuzz the -disassembler but it is hoped that assembly, and round-trip verification will be -added in future. - -When run in dissassembly mode, the inputs are opcodes to be disassembled. The -fuzzer will consume as many instructions as possible and will stop when it -finds an invalid instruction or runs out of data. - -Please note that the command line interface differs slightly from that of other -fuzzers. The fuzzer arguments should follow ``--fuzzer-args`` and should have -a single dash, while other arguments control the operation mode and target in a -similar manner to ``llvm-mc`` and should have two dashes. For example: - -.. code-block:: console - - llvm-mc-fuzzer --triple=aarch64-linux-gnu --disassemble --fuzzer-args -max_len=4 -jobs=10 - -Buildbot --------- - -A buildbot continuously runs the above fuzzers for LLVM components, with results -shown at http://lab.llvm.org:8011/builders/sanitizer-x86_64-linux-fuzzer . - FAQ ========================= @@ -808,4 +750,4 @@ Trophies .. _`value profile`: #value-profile .. _`caller-callee pairs`: http://clang.llvm.org/docs/SanitizerCoverage.html#caller-callee-coverage .. _BoringSSL: https://boringssl.googlesource.com/boringssl/ -.. _`fuzz various parts of LLVM itself`: `Fuzzing components of LLVM`_ + diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index d1ee80a7b8a44..719d3997594ea 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -495,7 +495,7 @@ that inherits from the ErrorInfo utility, E.g.: Error printFormattedFile(StringRef Path) { if () - return make_error(Path); + return make_error(Path); // print file contents. return Error::success(); } diff --git a/docs/XRay.rst b/docs/XRay.rst index 3009c8794a272..9e08c35880396 100644 --- a/docs/XRay.rst +++ b/docs/XRay.rst @@ -262,6 +262,8 @@ supports the following subcommands: only converts to YAML. - ``graph``: Generates a DOT graph of the function call relationships between functions found in an XRay trace. +- ``stack``: Reconstructs function call stacks from a timeline of function + calls in an XRay trace. These subcommands use various library components found as part of the XRay libraries, distributed with the LLVM distribution. These are: @@ -274,7 +276,7 @@ libraries, distributed with the LLVM distribution. These are: associated with edges and vertices. - ``llvm/XRay/InstrumentationMap.h``: A convenient tool for analyzing the instrumentation map in XRay-instrumented object files and binaries. The - ``extract`` subcommand uses this particular library. + ``extract`` and ``stack`` subcommands uses this particular library. Future Work =========== @@ -282,13 +284,17 @@ Future Work There are a number of ongoing efforts for expanding the toolset building around the XRay instrumentation system. -Trace Analysis --------------- - -We have more subcommands and modes that we're thinking of developing, in the -following forms: +Trace Analysis Tools +-------------------- -- ``stack``: Reconstruct the function call stacks in a timeline. +- Work is in progress to integrate with or develop tools to visualize findings + from an XRay trace. Particularly, the ``stack`` tool is being expanded to + output formats that allow graphing and exploring the duration of time in each + call stack. +- With a large instrumented binary, the size of generated XRay traces can + quickly become unwieldy. We are working on integrating pruning techniques and + heuristics for the analysis tools to sift through the traces and surface only + relevant information. More Platforms -------------- diff --git a/docs/XRayExample.rst b/docs/XRayExample.rst index fff5bbe623ed1..718b302a50327 100644 --- a/docs/XRayExample.rst +++ b/docs/XRayExample.rst @@ -195,6 +195,70 @@ Given the above two files we can re-build by providing those two files as arguments to clang as ``-fxray-always-instrument=always-instrument.txt`` or ``-fxray-never-instrument=never-instrument.txt``. +The XRay stack tool +------------------- + +Given a trace, and optionally an instrumentation map, the ``llvm-xray stack`` +command can be used to analyze a call stack graph constructed from the function +call timeline. + +The simplest way to use the command is simply to output the top stacks by call +count and time spent. + +:: + + $ llvm-xray stack xray-log.llc.5rqxkU -instr_map ./bin/llc + + Unique Stacks: 3069 + Top 10 Stacks by leaf sum: + + Sum: 9633790 + lvl function count sum + #0 main 1 58421550 + #1 compileModule(char**, llvm::LLVMContext&) 1 51440360 + #2 llvm::legacy::PassManagerImpl::run(llvm::Module&) 1 40535375 + #3 llvm::FPPassManager::runOnModule(llvm::Module&) 2 39337525 + #4 llvm::FPPassManager::runOnFunction(llvm::Function&) 6 39331465 + #5 llvm::PMDataManager::verifyPreservedAnalysis(llvm::Pass*) 399 16628590 + #6 llvm::PMTopLevelManager::findAnalysisPass(void const*) 4584 15155600 + #7 llvm::PMDataManager::findAnalysisPass(void const*, bool) 32088 9633790 + + ..etc.. + +In the default mode, identical stacks on different threads are independently +aggregated. In a multithreaded program, you may end up having identical call +stacks fill your list of top calls. + +To address this, you may specify the ``-aggregate-threads`` or +``-per-thread-stacks`` flags. ``-per-thread-stacks`` treats the thread id as an +implicit root in each call stack tree, while ``-aggregate-threads`` combines +identical stacks from all threads. + +Flame Graph Generation +---------------------- + +The ``llvm-xray stack`` tool may also be used to generate flamegraphs for +visualizing your instrumented invocations. The tool does not generate the graphs +themselves, but instead generates a format that can be used with Brendan Gregg's +FlameGraph tool, currently available on `github +`_. + +To generate output for a flamegraph, a few more options are necessary. + +- ``-all-stacks`` - Emits all of the stacks instead of just the top stacks. +- ``-stack-format`` - Choose the flamegraph output format 'flame'. +- ``-aggregation-type`` - Choose the metric to graph. + +You may pipe the command output directly to the flamegraph tool to obtain an +svg file. + +:: + + $llvm-xray stack xray-log.llc.5rqxkU -instr_map ./bin/llc -stack-format=flame -aggregation-type=time -all-stacks | \ + /path/to/FlameGraph/flamegraph.pl > flamegraph.svg + +If you open the svg in a browser, mouse events allow exploring the call stacks. + Further Exploration ------------------- diff --git a/docs/index.rst b/docs/index.rst index 212143ac79ead..955607a751cd9 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -183,6 +183,7 @@ For developers of applications which use LLVM as a library. ProgrammersManual Extensions LibFuzzer + FuzzingLLVM ScudoHardenedAllocator OptBisect @@ -228,6 +229,9 @@ For developers of applications which use LLVM as a library. :doc:`LibFuzzer` A library for writing in-process guided fuzzers. +:doc:`FuzzingLLVM` + Information on writing and using Fuzzers to find bugs in LLVM. + :doc:`ScudoHardenedAllocator` A library that implements a security-hardened `malloc()`. diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h index 3af7425dd268a..7705b1864dc30 100644 --- a/include/llvm-c/Transforms/IPO.h +++ b/include/llvm-c/Transforms/IPO.h @@ -34,6 +34,9 @@ void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM); /** See llvm::createConstantMergePass function. */ void LLVMAddConstantMergePass(LLVMPassManagerRef PM); +/** See llvm::createCalledValuePropagationPass function. */ +void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM); + /** See llvm::createDeadArgEliminationPass function. */ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM); diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index 9c5e392c48087..6c0b6ae78ae32 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -1119,6 +1119,21 @@ class APFloat : public APFloatBase { llvm_unreachable("Unexpected semantics"); } + /// We don't rely on operator== working on double values, as + /// it returns true for things that are clearly not equal, like -0.0 and 0.0. + /// As such, this method can be used to do an exact bit-for-bit comparison of + /// two floating point values. + /// + /// We leave the version with the double argument here because it's just so + /// convenient to write "2.0" and the like. Without this function we'd + /// have to duplicate its logic everywhere it's called. + bool isExactlyValue(double V) const { + bool ignored; + APFloat Tmp(V); + Tmp.convert(getSemantics(), APFloat::rmNearestTiesToEven, &ignored); + return bitwiseIsEqual(Tmp); + } + unsigned int convertToHexString(char *DST, unsigned int HexDigits, bool UpperCase, roundingMode RM) const { APFLOAT_DISPATCH_ON_SEMANTICS( diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h index 925ebafc3feda..5f7a769ddac44 100644 --- a/include/llvm/ADT/ArrayRef.h +++ b/include/llvm/ADT/ArrayRef.h @@ -294,7 +294,7 @@ namespace llvm { using reverse_iterator = std::reverse_iterator; /// Construct an empty MutableArrayRef. - /*implicit*/ MutableArrayRef() : ArrayRef() {} + /*implicit*/ MutableArrayRef() = default; /// Construct an empty MutableArrayRef from None. /*implicit*/ MutableArrayRef(NoneType) : ArrayRef() {} diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h index e68ef5f53d106..99147fec4d4c7 100644 --- a/include/llvm/ADT/BitVector.h +++ b/include/llvm/ADT/BitVector.h @@ -911,7 +911,7 @@ class BitVector { size_t getBitCapacity() const { return Bits.size() * BITWORD_SIZE; } }; -static inline size_t capacity_in_bytes(const BitVector &X) { +inline size_t capacity_in_bytes(const BitVector &X) { return X.getMemorySize(); } diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index 2c547e3b6e5f2..ba60b7972a8fc 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -1214,9 +1214,8 @@ class DenseMapIterator : DebugEpochBase::HandleBase { } }; -template -static inline size_t -capacity_in_bytes(const DenseMap &X) { +template +inline size_t capacity_in_bytes(const DenseMap &X) { return X.getMemorySize(); } diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h index f7e100bb4e12c..eb5a336990007 100644 --- a/include/llvm/ADT/PointerIntPair.h +++ b/include/llvm/ADT/PointerIntPair.h @@ -47,7 +47,7 @@ class PointerIntPair { intptr_t Value; public: - PointerIntPair() : Value(0) {} + constexpr PointerIntPair() : Value(0) {} PointerIntPair(PointerTy PtrVal, IntType IntVal) { setPointerAndInt(PtrVal, IntVal); } diff --git a/include/llvm/ADT/PointerSumType.h b/include/llvm/ADT/PointerSumType.h index 062544eedf84b..1a49e062dc2a6 100644 --- a/include/llvm/ADT/PointerSumType.h +++ b/include/llvm/ADT/PointerSumType.h @@ -65,7 +65,7 @@ template class PointerSumType { typedef detail::PointerSumTypeHelper HelperT; public: - PointerSumType() : Value(0) {} + constexpr PointerSumType() : Value(0) {} /// A typed constructor for a specific tagged member of the sum type. template diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index 6c238df284aa9..1d1eb601a334b 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -902,6 +902,13 @@ auto partition(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range)) { return std::partition(std::begin(Range), std::end(Range), P); } +/// Provide wrappers to std::lower_bound which take ranges instead of having to +/// pass begin/end explicitly. +template +auto lower_bound(R &&Range, ForwardIt I) -> decltype(std::begin(Range)) { + return std::lower_bound(std::begin(Range), std::end(Range), I); +} + /// \brief Given a range of type R, iterate the entire range and return a /// SmallVector with elements of the vector. This is useful, for example, /// when you want to iterate a range and then sort the results. diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index 87283729cb440..78ea613af693b 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -15,6 +15,7 @@ #ifndef LLVM_ADT_SMALLPTRSET_H #define LLVM_ADT_SMALLPTRSET_H +#include "llvm/ADT/EpochTracker.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ReverseIteration.h" #include "llvm/Support/type_traits.h" @@ -46,7 +47,7 @@ namespace llvm { /// (-2), to allow deletion. The hash table is resized when the table is 3/4 or /// more. When this happens, the table is doubled in size. /// -class SmallPtrSetImplBase { +class SmallPtrSetImplBase : public DebugEpochBase { friend class SmallPtrSetIteratorImpl; protected: @@ -92,6 +93,7 @@ class SmallPtrSetImplBase { size_type size() const { return NumNonEmpty - NumTombstones; } void clear() { + incrementEpoch(); // If the capacity of the array is huge, and the # elements used is small, // shrink the array. if (!isSmall()) { @@ -138,12 +140,14 @@ class SmallPtrSetImplBase { if (LastTombstone != nullptr) { *LastTombstone = Ptr; --NumTombstones; + incrementEpoch(); return std::make_pair(LastTombstone, true); } // Nope, there isn't. If we stay small, just 'pushback' now. if (NumNonEmpty < CurArraySize) { SmallArray[NumNonEmpty++] = Ptr; + incrementEpoch(); return std::make_pair(SmallArray + (NumNonEmpty - 1), true); } // Otherwise, hit the big set case, which will call grow. @@ -259,8 +263,9 @@ class SmallPtrSetIteratorImpl { }; /// SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet. -template -class SmallPtrSetIterator : public SmallPtrSetIteratorImpl { +template +class SmallPtrSetIterator : public SmallPtrSetIteratorImpl, + DebugEpochBase::HandleBase { using PtrTraits = PointerLikeTypeTraits; public: @@ -270,12 +275,14 @@ class SmallPtrSetIterator : public SmallPtrSetIteratorImpl { using difference_type = std::ptrdiff_t; using iterator_category = std::forward_iterator_tag; - explicit SmallPtrSetIterator(const void *const *BP, const void *const *E) - : SmallPtrSetIteratorImpl(BP, E) {} + explicit SmallPtrSetIterator(const void *const *BP, const void *const *E, + const DebugEpochBase &Epoch) + : SmallPtrSetIteratorImpl(BP, E), DebugEpochBase::HandleBase(&Epoch) {} // Most methods provided by baseclass. const PtrTy operator*() const { + assert(isHandleInSync() && "invalid iterator access!"); if (shouldReverseIterate()) { assert(Bucket > End); return PtrTraits::getFromVoidPointer(const_cast(Bucket[-1])); @@ -285,6 +292,7 @@ class SmallPtrSetIterator : public SmallPtrSetIteratorImpl { } inline SmallPtrSetIterator& operator++() { // Preincrement + assert(isHandleInSync() && "invalid iterator access!"); if (shouldReverseIterate()) { --Bucket; RetreatIfNotValid(); @@ -397,8 +405,8 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase { /// Create an iterator that dereferences to same place as the given pointer. iterator makeIterator(const void *const *P) const { if (shouldReverseIterate()) - return iterator(P == EndPointer() ? CurArray : P + 1, CurArray); - return iterator(P, EndPointer()); + return iterator(P == EndPointer() ? CurArray : P + 1, CurArray, *this); + return iterator(P, EndPointer(), *this); } }; diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index b3c2d836d167e..a9ac98d1ad4c9 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -927,8 +927,8 @@ class SmallVector : public SmallVectorImpl { } }; -template -static inline size_t capacity_in_bytes(const SmallVector &X) { +template +inline size_t capacity_in_bytes(const SmallVector &X) { return X.capacity_in_bytes(); } diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index cc32bf43f29c8..a01246f910cbe 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -33,18 +33,16 @@ class raw_ostream; /// hexdigit - Return the hexadecimal character for the /// given number \p X (which should be less than 16). -static inline char hexdigit(unsigned X, bool LowerCase = false) { +inline char hexdigit(unsigned X, bool LowerCase = false) { const char HexChar = LowerCase ? 'a' : 'A'; return X < 10 ? '0' + X : HexChar + X - 10; } /// Construct a string ref from a boolean. -static inline StringRef toStringRef(bool B) { - return StringRef(B ? "true" : "false"); -} +inline StringRef toStringRef(bool B) { return StringRef(B ? "true" : "false"); } /// Construct a string ref from an array ref of unsigned chars. -static inline StringRef toStringRef(ArrayRef Input) { +inline StringRef toStringRef(ArrayRef Input) { return StringRef(reinterpret_cast(Input.begin()), Input.size()); } @@ -52,14 +50,29 @@ static inline StringRef toStringRef(ArrayRef Input) { /// value. /// /// If \p C is not a valid hex digit, -1U is returned. -static inline unsigned hexDigitValue(char C) { +inline unsigned hexDigitValue(char C) { if (C >= '0' && C <= '9') return C-'0'; if (C >= 'a' && C <= 'f') return C-'a'+10U; if (C >= 'A' && C <= 'F') return C-'A'+10U; return -1U; } -static inline std::string utohexstr(uint64_t X, bool LowerCase = false) { +/// Checks if character \p C is one of the 10 decimal digits. +inline bool isDigit(char C) { return C >= '0' && C <= '9'; } + +/// Checks if character \p C is a hexadecimal numeric character. +inline bool isHexDigit(char C) { return hexDigitValue(C) != -1U; } + +/// Checks if character \p C is a valid letter as classified by "C" locale. +inline bool isAlpha(char C) { + return ('a' <= C && C <= 'z') || ('A' <= C && C <= 'Z'); +} + +/// Checks whether character \p C is either a decimal digit or an uppercase or +/// lowercase letter as classified by "C" locale. +inline bool isAlnum(char C) { return isAlpha(C) || isDigit(C); } + +inline std::string utohexstr(uint64_t X, bool LowerCase = false) { char Buffer[17]; char *BufPtr = std::end(Buffer); @@ -94,7 +107,7 @@ inline std::string toHex(ArrayRef Input) { return toHex(toStringRef(Input)); } -static inline uint8_t hexFromNibbles(char MSB, char LSB) { +inline uint8_t hexFromNibbles(char MSB, char LSB) { unsigned U1 = hexDigitValue(MSB); unsigned U2 = hexDigitValue(LSB); assert(U1 != -1U && U2 != -1U); @@ -104,7 +117,7 @@ static inline uint8_t hexFromNibbles(char MSB, char LSB) { /// Convert hexadecimal string \p Input to its binary representation. /// The return string is half the size of \p Input. -static inline std::string fromHex(StringRef Input) { +inline std::string fromHex(StringRef Input) { if (Input.empty()) return std::string(); @@ -157,7 +170,7 @@ inline bool to_float(const Twine &T, long double &Num) { return detail::to_float(T, Num, strtold); } -static inline std::string utostr(uint64_t X, bool isNeg = false) { +inline std::string utostr(uint64_t X, bool isNeg = false) { char Buffer[21]; char *BufPtr = std::end(Buffer); @@ -172,7 +185,7 @@ static inline std::string utostr(uint64_t X, bool isNeg = false) { return std::string(BufPtr, std::end(Buffer)); } -static inline std::string itostr(int64_t X) { +inline std::string itostr(int64_t X) { if (X < 0) return utostr(static_cast(-X), true); else @@ -206,14 +219,14 @@ void SplitString(StringRef Source, // FIXME: Investigate whether a modified bernstein hash function performs // better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx // X*33+c -> X*33^c -static inline unsigned HashString(StringRef Str, unsigned Result = 0) { +inline unsigned HashString(StringRef Str, unsigned Result = 0) { for (StringRef::size_type i = 0, e = Str.size(); i != e; ++i) Result = Result * 33 + (unsigned char)Str[i]; return Result; } /// Returns the English suffix for an ordinal integer (-st, -nd, -rd, -th). -static inline StringRef getOrdinalSuffix(unsigned Val) { +inline StringRef getOrdinalSuffix(unsigned Val) { // It is critically important that we do this perfectly for // user-written sequences with over 100 elements. switch (Val % 100) { diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index 4e1e218da78d8..cb73dcabdbd64 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -179,7 +179,8 @@ class Triple { WatchOS, // Apple watchOS Mesa3D, Contiki, - LastOSType = Contiki + AMDPAL, // AMD PAL Runtime + LastOSType = AMDPAL }; enum EnvironmentType { UnknownEnvironment, @@ -204,7 +205,8 @@ class Triple { AMDOpenCL, CoreCLR, OpenCL, - LastEnvironmentType = OpenCL + Simulator, // Simulator variants of other systems, e.g., Apple's iOS + LastEnvironmentType = Simulator }; enum ObjectFormatType { UnknownObjectFormat, @@ -469,6 +471,10 @@ class Triple { return isMacOSX() || isiOS() || isWatchOS(); } + bool isSimulatorEnvironment() const { + return getEnvironment() == Triple::Simulator; + } + bool isOSNetBSD() const { return getOS() == Triple::NetBSD; } @@ -495,6 +501,8 @@ class Triple { return getOS() == Triple::ELFIAMCU; } + bool isOSUnknown() const { return getOS() == Triple::UnknownOS; } + bool isGNUEnvironment() const { EnvironmentType Env = getEnvironment(); return Env == Triple::GNU || Env == Triple::GNUABIN32 || diff --git a/include/llvm/ADT/iterator.h b/include/llvm/ADT/iterator.h index 15720a67c047b..711f8f2216209 100644 --- a/include/llvm/ADT/iterator.h +++ b/include/llvm/ADT/iterator.h @@ -70,10 +70,10 @@ class iterator_facade_base ReferenceT> { protected: enum { - IsRandomAccess = - std::is_base_of::value, - IsBidirectional = - std::is_base_of::value, + IsRandomAccess = std::is_base_of::value, + IsBidirectional = std::is_base_of::value, }; /// A proxy object for computing a reference via indirecting a copy of an diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h index 42034741b8e3c..cb314e3766cfe 100644 --- a/include/llvm/Analysis/ConstantFolding.h +++ b/include/llvm/Analysis/ConstantFolding.h @@ -79,6 +79,12 @@ ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL); +/// \brief Attempt to constant fold a select instruction with the specified +/// operands. The constant result is returned if successful; if not, null is +/// returned. +Constant *ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, + Constant *V2); + /// \brief Attempt to constant fold a cast with the specified operand. If it /// fails, it returns a constant expression of the specified operand. Constant *ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, diff --git a/include/llvm/Analysis/IndirectCallSiteVisitor.h b/include/llvm/Analysis/IndirectCallSiteVisitor.h index 3c40cc0235cc0..dde56a143c510 100644 --- a/include/llvm/Analysis/IndirectCallSiteVisitor.h +++ b/include/llvm/Analysis/IndirectCallSiteVisitor.h @@ -27,7 +27,7 @@ struct PGOIndirectCallSiteVisitor }; // Helper function that finds all indirect call sites. -static inline std::vector findIndirectCallSites(Function &F) { +inline std::vector findIndirectCallSites(Function &F) { PGOIndirectCallSiteVisitor ICV; ICV.visit(F); return ICV.IndirectCallInsts; diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index de7247cb64a06..985f3880ed3a3 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -16,7 +16,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraphSCCPass.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include #include diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 4d3da873ac55e..ab2c847986337 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -76,7 +76,7 @@ template class LoopBase { SmallPtrSet DenseBlockSet; -#if !defined(NDEBUG) || !LLVM_ENABLE_ABI_BREAKING_CHECKS +#if LLVM_ENABLE_ABI_BREAKING_CHECKS /// Indicator that this loop is no longer a valid loop. bool IsInvalid = false; #endif @@ -165,15 +165,19 @@ template class LoopBase { return Blocks.size(); } -#ifndef NDEBUG /// Return true if this loop is no longer valid. The only valid use of this /// helper is "assert(L.isInvalid())" or equivalent, since IsInvalid is set to - /// false by the destructor. In other words, if this accessor returns false, + /// true by the destructor. In other words, if this accessor returns true, /// the caller has already triggered UB by calling this accessor; and so it - /// can only be called in a context where a return value of false indicates a + /// can only be called in a context where a return value of true indicates a /// programmer error. - bool isInvalid() const { return IsInvalid; } + bool isInvalid() const { +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + return IsInvalid; +#else + return false; #endif + } /// True if terminator in the block can branch to another block that is /// outside of the current loop. @@ -392,7 +396,9 @@ template class LoopBase { for (auto *SubLoop : SubLoops) SubLoop->~LoopT(); +#if LLVM_ENABLE_ABI_BREAKING_CHECKS IsInvalid = true; +#endif SubLoops.clear(); Blocks.clear(); DenseBlockSet.clear(); @@ -514,6 +520,14 @@ class Loop : public LoopBase { /// operand should be the node itself. void setLoopID(MDNode *LoopID) const; + /// Add llvm.loop.unroll.disable to this loop's loop id metadata. + /// + /// Remove existing unroll metadata and add unroll disable metadata to + /// indicate the loop has already been unrolled. This prevents a loop + /// from being unrolled more than is directed by a pragma if the loop + /// unrolling pass is run more than once (which it generally is). + void setLoopAlreadyUnrolled(); + /// Return true if no exit block for the loop has a predecessor that is /// outside the loop. bool hasDedicatedExits() const; diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index 67f0fecb5763c..7d53e34938b79 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -92,8 +92,7 @@ bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. const CallInst *extractMallocCall(const Value *I, const TargetLibraryInfo *TLI); -static inline CallInst *extractMallocCall(Value *I, - const TargetLibraryInfo *TLI) { +inline CallInst *extractMallocCall(Value *I, const TargetLibraryInfo *TLI) { return const_cast(extractMallocCall((const Value*)I, TLI)); } @@ -127,8 +126,7 @@ Value *getMallocArraySize(CallInst *CI, const DataLayout &DL, /// extractCallocCall - Returns the corresponding CallInst if the instruction /// is a calloc call. const CallInst *extractCallocCall(const Value *I, const TargetLibraryInfo *TLI); -static inline CallInst *extractCallocCall(Value *I, - const TargetLibraryInfo *TLI) { +inline CallInst *extractCallocCall(Value *I, const TargetLibraryInfo *TLI) { return const_cast(extractCallocCall((const Value*)I, TLI)); } @@ -140,7 +138,7 @@ static inline CallInst *extractCallocCall(Value *I, /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *isFreeCall(const Value *I, const TargetLibraryInfo *TLI); -static inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { +inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) { return const_cast(isFreeCall((const Value*)I, TLI)); } diff --git a/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/include/llvm/Analysis/OptimizationRemarkEmitter.h similarity index 96% rename from include/llvm/Analysis/OptimizationDiagnosticInfo.h rename to include/llvm/Analysis/OptimizationRemarkEmitter.h index 750b5791760da..26f32acdcda54 100644 --- a/include/llvm/Analysis/OptimizationDiagnosticInfo.h +++ b/include/llvm/Analysis/OptimizationRemarkEmitter.h @@ -1,4 +1,4 @@ -//===- OptimizationDiagnosticInfo.h - Optimization Diagnostic ---*- C++ -*-===// +//===- OptimizationRemarkEmitter.h - Optimization Diagnostic ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -164,11 +164,5 @@ class OptimizationRemarkEmitterAnalysis /// \brief Run the analysis pass over a function and produce BFI. Result run(Function &F, FunctionAnalysisManager &AM); }; - -namespace yaml { -template <> struct MappingTraits { - static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag); -}; -} } #endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 5409949c6fb1d..96309debd84a6 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -1741,6 +1741,16 @@ class ScalarEvolution { const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride, bool Equality); + /// Compute the maximum backedge count based on the range of values + /// permitted by Start, End, and Stride. This is for loops of the form + /// {Start, +, Stride} LT End. + /// + /// Precondition: the induction variable is known to be positive. We *don't* + /// assert these preconditions so please be careful. + const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride, + const SCEV *End, unsigned BitWidth, + bool IsSigned); + /// Verify if an linear IV with positive stride can overflow when in a /// less-than comparison, knowing the invariant term of the comparison, /// the stride and the knowledge of NSW/NUW flags on the recurrence. @@ -1761,10 +1771,18 @@ class ScalarEvolution { const SCEV *getOrCreateMulExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags); + /// Find all of the loops transitively used in \p S, and update \c LoopUsers + /// accordingly. + void addToLoopUseLists(const SCEV *S); + FoldingSet UniqueSCEVs; FoldingSet UniquePreds; BumpPtrAllocator SCEVAllocator; + /// This maps loops to a list of SCEV expressions that (transitively) use said + /// loop. + DenseMap> LoopUsers; + /// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression /// they can be rewritten into under certain predicates. DenseMap, diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h index 015e0c44cb45d..1b8df03b3a1be 100644 --- a/include/llvm/Analysis/SparsePropagation.h +++ b/include/llvm/Analysis/SparsePropagation.h @@ -15,37 +15,35 @@ #ifndef LLVM_ANALYSIS_SPARSEPROPAGATION_H #define LLVM_ANALYSIS_SPARSEPROPAGATION_H -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" #include -#include -#include + +#define DEBUG_TYPE "sparseprop" namespace llvm { -class Argument; -class BasicBlock; -class Constant; -class Function; -class Instruction; -class PHINode; -class raw_ostream; +/// A template for translating between LLVM Values and LatticeKeys. Clients must +/// provide a specialization of LatticeKeyInfo for their LatticeKey type. +template struct LatticeKeyInfo { + // static inline Value *getValueFromLatticeKey(LatticeKey Key); + // static inline LatticeKey getLatticeKeyFromValue(Value *V); +}; + +template > class SparseSolver; -class TerminatorInst; -class Value; -template class SmallVectorImpl; /// AbstractLatticeFunction - This class is implemented by the dataflow instance -/// to specify what the lattice values are and how they handle merges etc. -/// This gives the client the power to compute lattice values from instructions, -/// constants, etc. The requirement is that lattice values must all fit into -/// a void*. If a void* is not sufficient, the implementation should use this -/// pointer to be a pointer into a uniquing set or something. -/// -class AbstractLatticeFunction { -public: - using LatticeVal = void *; - +/// to specify what the lattice values are and how they handle merges etc. This +/// gives the client the power to compute lattice values from instructions, +/// constants, etc. The current requirement is that lattice values must be +/// copyable. At the moment, nothing tries to avoid copying. Additionally, +/// lattice keys must be able to be used as keys of a mapping data structure. +/// Internally, the generic solver currently uses a DenseMap to map lattice keys +/// to lattice values. If the lattice key is a non-standard type, a +/// specialization of DenseMapInfo must be provided. +template class AbstractLatticeFunction { private: LatticeVal UndefVal, OverdefinedVal, UntrackedVal; @@ -57,40 +55,27 @@ class AbstractLatticeFunction { UntrackedVal = untrackedVal; } - virtual ~AbstractLatticeFunction(); + virtual ~AbstractLatticeFunction() = default; LatticeVal getUndefVal() const { return UndefVal; } LatticeVal getOverdefinedVal() const { return OverdefinedVal; } LatticeVal getUntrackedVal() const { return UntrackedVal; } - /// IsUntrackedValue - If the specified Value is something that is obviously - /// uninteresting to the analysis (and would always return UntrackedVal), - /// this function can return true to avoid pointless work. - virtual bool IsUntrackedValue(Value *V) { return false; } + /// IsUntrackedValue - If the specified LatticeKey is obviously uninteresting + /// to the analysis (i.e., it would always return UntrackedVal), this + /// function can return true to avoid pointless work. + virtual bool IsUntrackedValue(LatticeKey Key) { return false; } - /// ComputeConstant - Given a constant value, compute and return a lattice - /// value corresponding to the specified constant. - virtual LatticeVal ComputeConstant(Constant *C) { - return getOverdefinedVal(); // always safe + /// ComputeLatticeVal - Compute and return a LatticeVal corresponding to the + /// given LatticeKey. + virtual LatticeVal ComputeLatticeVal(LatticeKey Key) { + return getOverdefinedVal(); } /// IsSpecialCasedPHI - Given a PHI node, determine whether this PHI node is /// one that the we want to handle through ComputeInstructionState. virtual bool IsSpecialCasedPHI(PHINode *PN) { return false; } - /// GetConstant - If the specified lattice value is representable as an LLVM - /// constant value, return it. Otherwise return null. The returned value - /// must be in the same LLVM type as Val. - virtual Constant *GetConstant(LatticeVal LV, Value *Val, SparseSolver &SS) { - return nullptr; - } - - /// ComputeArgument - Given a formal argument value, compute and return a - /// lattice value corresponding to the specified argument. - virtual LatticeVal ComputeArgument(Argument *I) { - return getOverdefinedVal(); // always safe - } - /// MergeValues - Compute and return the merge of the two specified lattice /// values. Merging should only move one direction down the lattice to /// guarantee convergence (toward overdefined). @@ -98,63 +83,80 @@ class AbstractLatticeFunction { return getOverdefinedVal(); // always safe, never useful. } - /// ComputeInstructionState - Given an instruction and a vector of its operand - /// values, compute the result value of the instruction. - virtual LatticeVal ComputeInstructionState(Instruction &I, SparseSolver &SS) { - return getOverdefinedVal(); // always safe, never useful. + /// ComputeInstructionState - Compute the LatticeKeys that change as a result + /// of executing instruction \p I. Their associated LatticeVals are store in + /// \p ChangedValues. + virtual void + ComputeInstructionState(Instruction &I, + DenseMap &ChangedValues, + SparseSolver &SS) = 0; + + /// PrintLatticeVal - Render the given LatticeVal to the specified stream. + virtual void PrintLatticeVal(LatticeVal LV, raw_ostream &OS); + + /// PrintLatticeKey - Render the given LatticeKey to the specified stream. + virtual void PrintLatticeKey(LatticeKey Key, raw_ostream &OS); + + /// GetValueFromLatticeVal - If the given LatticeVal is representable as an + /// LLVM value, return it; otherwise, return nullptr. If a type is given, the + /// returned value must have the same type. This function is used by the + /// generic solver in attempting to resolve branch and switch conditions. + virtual Value *GetValueFromLatticeVal(LatticeVal LV, Type *Ty = nullptr) { + return nullptr; } - - /// PrintValue - Render the specified lattice value to the specified stream. - virtual void PrintValue(LatticeVal V, raw_ostream &OS); }; /// SparseSolver - This class is a general purpose solver for Sparse Conditional /// Propagation with a programmable lattice function. +template class SparseSolver { - using LatticeVal = AbstractLatticeFunction::LatticeVal; - /// LatticeFunc - This is the object that knows the lattice and how to do + /// LatticeFunc - This is the object that knows the lattice and how to /// compute transfer functions. - AbstractLatticeFunction *LatticeFunc; + AbstractLatticeFunction *LatticeFunc; - DenseMap ValueState; // The state each value is in. - SmallPtrSet BBExecutable; // The bbs that are executable. + /// ValueState - Holds the LatticeVals associated with LatticeKeys. + DenseMap ValueState; - std::vector InstWorkList; // Worklist of insts to process. + /// BBExecutable - Holds the basic blocks that are executable. + SmallPtrSet BBExecutable; - std::vector BBWorkList; // The BasicBlock work list + /// ValueWorkList - Holds values that should be processed. + SmallVector ValueWorkList; + + /// BBWorkList - Holds basic blocks that should be processed. + SmallVector BBWorkList; + + using Edge = std::pair; /// KnownFeasibleEdges - Entries in this set are edges which have already had /// PHI nodes retriggered. - using Edge = std::pair; std::set KnownFeasibleEdges; public: - explicit SparseSolver(AbstractLatticeFunction *Lattice) + explicit SparseSolver( + AbstractLatticeFunction *Lattice) : LatticeFunc(Lattice) {} SparseSolver(const SparseSolver &) = delete; SparseSolver &operator=(const SparseSolver &) = delete; - ~SparseSolver() { delete LatticeFunc; } /// Solve - Solve for constants and executable blocks. - void Solve(Function &F); + void Solve(); - void Print(Function &F, raw_ostream &OS) const; + void Print(raw_ostream &OS) const; - /// getLatticeState - Return the LatticeVal object that corresponds to the - /// value. If an value is not in the map, it is returned as untracked, - /// unlike the getOrInitValueState method. - LatticeVal getLatticeState(Value *V) const { - DenseMap::const_iterator I = ValueState.find(V); + /// getExistingValueState - Return the LatticeVal object corresponding to the + /// given value from the ValueState map. If the value is not in the map, + /// UntrackedVal is returned, unlike the getValueState method. + LatticeVal getExistingValueState(LatticeKey Key) const { + auto I = ValueState.find(Key); return I != ValueState.end() ? I->second : LatticeFunc->getUntrackedVal(); } - /// getOrInitValueState - Return the LatticeVal object that corresponds to the - /// value, initializing the value's state if it hasn't been entered into the - /// map yet. This function is necessary because not all values should start - /// out in the underdefined state... Arguments should be overdefined, and - /// constants should be marked as constants. - LatticeVal getOrInitValueState(Value *V); + /// getValueState - Return the LatticeVal object corresponding to the given + /// value from the ValueState map. If the value is not in the map, its state + /// is initialized. + LatticeVal getValueState(LatticeKey Key); /// isEdgeFeasible - Return true if the control flow edge from the 'From' /// basic block to the 'To' basic block is currently feasible. If @@ -171,15 +173,16 @@ class SparseSolver { return BBExecutable.count(BB); } -private: - /// UpdateState - When the state for some instruction is potentially updated, - /// this function notices and adds I to the worklist if needed. - void UpdateState(Instruction &Inst, LatticeVal V); - /// MarkBlockExecutable - This method can be used by clients to mark all of /// the blocks that are known to be intrinsically live in the processed unit. void MarkBlockExecutable(BasicBlock *BB); +private: + /// UpdateState - When the state of some LatticeKey is potentially updated to + /// the given LatticeVal, this function notices and adds the LLVM value + /// corresponding the key to the work list, if needed. + void UpdateState(LatticeKey Key, LatticeVal LV); + /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB /// work list if it is not already executable. void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest); @@ -194,6 +197,334 @@ class SparseSolver { void visitTerminatorInst(TerminatorInst &TI); }; +//===----------------------------------------------------------------------===// +// AbstractLatticeFunction Implementation +//===----------------------------------------------------------------------===// + +template +void AbstractLatticeFunction::PrintLatticeVal( + LatticeVal V, raw_ostream &OS) { + if (V == UndefVal) + OS << "undefined"; + else if (V == OverdefinedVal) + OS << "overdefined"; + else if (V == UntrackedVal) + OS << "untracked"; + else + OS << "unknown lattice value"; +} + +template +void AbstractLatticeFunction::PrintLatticeKey( + LatticeKey Key, raw_ostream &OS) { + OS << "unknown lattice key"; +} + +//===----------------------------------------------------------------------===// +// SparseSolver Implementation +//===----------------------------------------------------------------------===// + +template +LatticeVal +SparseSolver::getValueState(LatticeKey Key) { + auto I = ValueState.find(Key); + if (I != ValueState.end()) + return I->second; // Common case, in the map + + if (LatticeFunc->IsUntrackedValue(Key)) + return LatticeFunc->getUntrackedVal(); + LatticeVal LV = LatticeFunc->ComputeLatticeVal(Key); + + // If this value is untracked, don't add it to the map. + if (LV == LatticeFunc->getUntrackedVal()) + return LV; + return ValueState[Key] = LV; +} + +template +void SparseSolver::UpdateState(LatticeKey Key, + LatticeVal LV) { + auto I = ValueState.find(Key); + if (I != ValueState.end() && I->second == LV) + return; // No change. + + // Update the state of the given LatticeKey and add its corresponding LLVM + // value to the work list. + ValueState[Key] = LV; + if (Value *V = KeyInfo::getValueFromLatticeKey(Key)) + ValueWorkList.push_back(V); +} + +template +void SparseSolver::MarkBlockExecutable( + BasicBlock *BB) { + if (!BBExecutable.insert(BB).second) + return; + DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); + BBWorkList.push_back(BB); // Add the block to the work list! +} + +template +void SparseSolver::markEdgeExecutable( + BasicBlock *Source, BasicBlock *Dest) { + if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) + return; // This edge is already known to be executable! + + DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() << " -> " + << Dest->getName() << "\n"); + + if (BBExecutable.count(Dest)) { + // The destination is already executable, but we just made an edge + // feasible that wasn't before. Revisit the PHI nodes in the block + // because they have potentially new operands. + for (BasicBlock::iterator I = Dest->begin(); isa(I); ++I) + visitPHINode(*cast(I)); + } else { + MarkBlockExecutable(Dest); + } +} + +template +void SparseSolver::getFeasibleSuccessors( + TerminatorInst &TI, SmallVectorImpl &Succs, bool AggressiveUndef) { + Succs.resize(TI.getNumSuccessors()); + if (TI.getNumSuccessors() == 0) + return; + + if (BranchInst *BI = dyn_cast(&TI)) { + if (BI->isUnconditional()) { + Succs[0] = true; + return; + } + + LatticeVal BCValue; + if (AggressiveUndef) + BCValue = + getValueState(KeyInfo::getLatticeKeyFromValue(BI->getCondition())); + else + BCValue = getExistingValueState( + KeyInfo::getLatticeKeyFromValue(BI->getCondition())); + + if (BCValue == LatticeFunc->getOverdefinedVal() || + BCValue == LatticeFunc->getUntrackedVal()) { + // Overdefined condition variables can branch either way. + Succs[0] = Succs[1] = true; + return; + } + + // If undefined, neither is feasible yet. + if (BCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = + dyn_cast_or_null(LatticeFunc->GetValueFromLatticeVal( + BCValue, BI->getCondition()->getType())); + if (!C || !isa(C)) { + // Non-constant values can go either way. + Succs[0] = Succs[1] = true; + return; + } + + // Constant condition variables mean the branch can only go a single way + Succs[C->isNullValue()] = true; + return; + } + + if (TI.isExceptional()) { + Succs.assign(Succs.size(), true); + return; + } + + if (isa(TI)) { + Succs.assign(Succs.size(), true); + return; + } + + SwitchInst &SI = cast(TI); + LatticeVal SCValue; + if (AggressiveUndef) + SCValue = getValueState(KeyInfo::getLatticeKeyFromValue(SI.getCondition())); + else + SCValue = getExistingValueState( + KeyInfo::getLatticeKeyFromValue(SI.getCondition())); + + if (SCValue == LatticeFunc->getOverdefinedVal() || + SCValue == LatticeFunc->getUntrackedVal()) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + + // If undefined, neither is feasible yet. + if (SCValue == LatticeFunc->getUndefVal()) + return; + + Constant *C = dyn_cast_or_null(LatticeFunc->GetValueFromLatticeVal( + SCValue, SI.getCondition()->getType())); + if (!C || !isa(C)) { + // All destinations are executable! + Succs.assign(TI.getNumSuccessors(), true); + return; + } + SwitchInst::CaseHandle Case = *SI.findCaseValue(cast(C)); + Succs[Case.getSuccessorIndex()] = true; +} + +template +bool SparseSolver::isEdgeFeasible( + BasicBlock *From, BasicBlock *To, bool AggressiveUndef) { + SmallVector SuccFeasible; + TerminatorInst *TI = From->getTerminator(); + getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); + + for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) + if (TI->getSuccessor(i) == To && SuccFeasible[i]) + return true; + + return false; +} + +template +void SparseSolver::visitTerminatorInst( + TerminatorInst &TI) { + SmallVector SuccFeasible; + getFeasibleSuccessors(TI, SuccFeasible, true); + + BasicBlock *BB = TI.getParent(); + + // Mark all feasible successors executable... + for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) + if (SuccFeasible[i]) + markEdgeExecutable(BB, TI.getSuccessor(i)); +} + +template +void SparseSolver::visitPHINode(PHINode &PN) { + // The lattice function may store more information on a PHINode than could be + // computed from its incoming values. For example, SSI form stores its sigma + // functions as PHINodes with a single incoming value. + if (LatticeFunc->IsSpecialCasedPHI(&PN)) { + DenseMap ChangedValues; + LatticeFunc->ComputeInstructionState(PN, ChangedValues, *this); + for (auto &ChangedValue : ChangedValues) + if (ChangedValue.second != LatticeFunc->getUntrackedVal()) + UpdateState(ChangedValue.first, ChangedValue.second); + return; + } + + LatticeKey Key = KeyInfo::getLatticeKeyFromValue(&PN); + LatticeVal PNIV = getValueState(Key); + LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); + + // If this value is already overdefined (common) just return. + if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal()) + return; // Quick exit + + // Super-extra-high-degree PHI nodes are unlikely to ever be interesting, + // and slow us down a lot. Just mark them overdefined. + if (PN.getNumIncomingValues() > 64) { + UpdateState(Key, Overdefined); + return; + } + + // Look at all of the executable operands of the PHI node. If any of them + // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the + // transfer function to give us the merge of the incoming values. + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { + // If the edge is not yet known to be feasible, it doesn't impact the PHI. + if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true)) + continue; + + // Merge in this value. + LatticeVal OpVal = + getValueState(KeyInfo::getLatticeKeyFromValue(PN.getIncomingValue(i))); + if (OpVal != PNIV) + PNIV = LatticeFunc->MergeValues(PNIV, OpVal); + + if (PNIV == Overdefined) + break; // Rest of input values don't matter. + } + + // Update the PHI with the compute value, which is the merge of the inputs. + UpdateState(Key, PNIV); +} + +template +void SparseSolver::visitInst(Instruction &I) { + // PHIs are handled by the propagation logic, they are never passed into the + // transfer functions. + if (PHINode *PN = dyn_cast(&I)) + return visitPHINode(*PN); + + // Otherwise, ask the transfer function what the result is. If this is + // something that we care about, remember it. + DenseMap ChangedValues; + LatticeFunc->ComputeInstructionState(I, ChangedValues, *this); + for (auto &ChangedValue : ChangedValues) + if (ChangedValue.second != LatticeFunc->getUntrackedVal()) + UpdateState(ChangedValue.first, ChangedValue.second); + + if (TerminatorInst *TI = dyn_cast(&I)) + visitTerminatorInst(*TI); +} + +template +void SparseSolver::Solve() { + // Process the work lists until they are empty! + while (!BBWorkList.empty() || !ValueWorkList.empty()) { + // Process the value work list. + while (!ValueWorkList.empty()) { + Value *V = ValueWorkList.back(); + ValueWorkList.pop_back(); + + DEBUG(dbgs() << "\nPopped off V-WL: " << *V << "\n"); + + // "V" got into the work list because it made a transition. See if any + // users are both live and in need of updating. + for (User *U : V->users()) + if (Instruction *Inst = dyn_cast(U)) + if (BBExecutable.count(Inst->getParent())) // Inst is executable? + visitInst(*Inst); + } + + // Process the basic block work list. + while (!BBWorkList.empty()) { + BasicBlock *BB = BBWorkList.back(); + BBWorkList.pop_back(); + + DEBUG(dbgs() << "\nPopped off BBWL: " << *BB); + + // Notify all instructions in this basic block that they are newly + // executable. + for (Instruction &I : *BB) + visitInst(I); + } + } +} + +template +void SparseSolver::Print( + raw_ostream &OS) const { + if (ValueState.empty()) + return; + + LatticeKey Key; + LatticeVal LV; + + OS << "ValueState:\n"; + for (auto &Entry : ValueState) { + std::tie(Key, LV) = Entry; + if (LV == LatticeFunc->getUntrackedVal()) + continue; + OS << "\t"; + LatticeFunc->PrintLatticeVal(LV, OS); + OS << ": "; + LatticeFunc->PrintLatticeKey(Key, OS); + OS << "\n"; + } +} } // end namespace llvm +#undef DEBUG_TYPE + #endif // LLVM_ANALYSIS_SPARSEPROPAGATION_H diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index afc16e89da6d8..f4808b2df0017 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -489,6 +489,13 @@ class TargetTransformInfo { /// would typically be allowed using throughput or size cost models. bool hasDivRemOp(Type *DataType, bool IsSigned) const; + /// Return true if the given instruction (assumed to be a memory access + /// instruction) has a volatile variant. If that's the case then we can avoid + /// addrspacecast to generic AS for volatile loads/stores. Default + /// implementation returns false, which prevents address space inference for + /// volatile loads/stores. + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const; + /// Return true if target doesn't mind addresses in vectors. bool prefersVectorizedAddressing() const; @@ -967,6 +974,7 @@ class TargetTransformInfo::Concept { virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; + virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, @@ -1192,6 +1200,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { bool hasDivRemOp(Type *DataType, bool IsSigned) override { return Impl.hasDivRemOp(DataType, IsSigned); } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override { + return Impl.hasVolatileVariant(I, AddrSpace); + } bool prefersVectorizedAddressing() override { return Impl.prefersVectorizedAddressing(); } diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index b3b3e07b4dcdf..2a1664e1487cc 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -188,6 +188,8 @@ class TargetTransformInfoImplBase { } bool isLoweredToCall(const Function *F) { + assert(F && "A concrete function must be provided to this routine."); + // FIXME: These should almost certainly not be handled here, and instead // handled with the help of TLI or the target itself. This was largely // ported from existing analysis heuristics here so that such refactorings @@ -253,6 +255,8 @@ class TargetTransformInfoImplBase { bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } + bool prefersVectorizedAddressing() { return true; } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, @@ -718,10 +722,10 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { // Assumes the address space is 0 when Ptr is nullptr. unsigned AS = (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); + if (static_cast(this)->isLegalAddressingMode( TargetType, const_cast(BaseGV), - static_cast(BaseOffset.getLimitedValue()), HasBaseReg, - Scale, AS)) + BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS)) return TTI::TCC_Free; return TTI::TCC_Basic; } @@ -798,7 +802,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { // A real function call is much slower. if (auto *CI = dyn_cast(I)) { const Function *F = CI->getCalledFunction(); - if (static_cast(this)->isLoweredToCall(F)) + if (!F || static_cast(this)->isLoweredToCall(F)) return 40; // Some intrinsics return a value and a flag, we use the value type // to decide its latency. diff --git a/include/llvm/Analysis/ValueLatticeUtils.h b/include/llvm/Analysis/ValueLatticeUtils.h new file mode 100644 index 0000000000000..02072672e56ed --- /dev/null +++ b/include/llvm/Analysis/ValueLatticeUtils.h @@ -0,0 +1,41 @@ +//===-- ValueLatticeUtils.h - Utils for solving lattices --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares common functions useful for performing data-flow analyses +// that propagate values across function boundaries. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_VALUELATTICEUTILS_H +#define LLVM_ANALYSIS_VALUELATTICEUTILS_H + +namespace llvm { + +class Function; +class GlobalVariable; + +/// Determine if the values of the given function's arguments can be tracked +/// interprocedurally. The value of an argument can be tracked if the function +/// has local linkage and its address is not taken. +bool canTrackArgumentsInterprocedurally(Function *F); + +/// Determine if the values of the given function's returns can be tracked +/// interprocedurally. Return values can be tracked if the function has an +/// exact definition and it doesn't have the "naked" attribute. Naked functions +/// may contain assembly code that returns untrackable values. +bool canTrackReturnsInterprocedurally(Function *F); + +/// Determine if the value maintained in the given global variable can be +/// tracked interprocedurally. A value can be tracked if the global variable +/// has local linkage and is only used by non-volatile loads and stores. +bool canTrackGlobalVariableInterprocedurally(GlobalVariable *GV); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_VALUELATTICEUTILS_H diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index 603b3a210b82c..2fbfd3d2ffcda 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -220,9 +220,9 @@ class Value; /// pointer plus a constant offset. Return the base and offset to the caller. Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL); - static inline const Value * - GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset, - const DataLayout &DL) { + inline const Value *GetPointerBaseWithConstantOffset(const Value *Ptr, + int64_t &Offset, + const DataLayout &DL) { return GetPointerBaseWithConstantOffset(const_cast(Ptr), Offset, DL); } @@ -283,9 +283,8 @@ class Value; /// be stripped off. Value *GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup = 6); - static inline const Value *GetUnderlyingObject(const Value *V, - const DataLayout &DL, - unsigned MaxLookup = 6) { + inline const Value *GetUnderlyingObject(const Value *V, const DataLayout &DL, + unsigned MaxLookup = 6) { return GetUnderlyingObject(const_cast(V), DL, MaxLookup); } @@ -323,7 +322,7 @@ class Value; /// This is a wrapper around GetUnderlyingObjects and adds support for basic /// ptrtoint+arithmetic+inttoptr sequences. - void getUnderlyingObjectsForCodeGen(const Value *V, + bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl &Objects, const DataLayout &DL); @@ -506,7 +505,7 @@ class Value; /// SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp = nullptr); - static inline SelectPatternResult + inline SelectPatternResult matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS, Instruction::CastOps *CastOp = nullptr) { Value *L = const_cast(LHS); diff --git a/include/llvm/AsmParser/Parser.h b/include/llvm/AsmParser/Parser.h index 768b089b8a2a6..5f02e488e5b1b 100644 --- a/include/llvm/AsmParser/Parser.h +++ b/include/llvm/AsmParser/Parser.h @@ -36,10 +36,12 @@ class Type; /// \param Context Context in which to allocate globals info. /// \param Slots The optional slot mapping that will be initialized during /// parsing. -std::unique_ptr parseAssemblyFile(StringRef Filename, - SMDiagnostic &Error, - LLVMContext &Context, - SlotMapping *Slots = nullptr); +/// \param UpgradeDebugInfo Run UpgradeDebugInfo, which runs the Verifier. +/// This option should only be set to false by llvm-as +/// for use inside the LLVM testuite! +std::unique_ptr +parseAssemblyFile(StringRef Filename, SMDiagnostic &Error, LLVMContext &Context, + SlotMapping *Slots = nullptr, bool UpgradeDebugInfo = true); /// The function is a secondary interface to the LLVM Assembly Parser. It parses /// an ASCII string that (presumably) contains LLVM Assembly code. It returns a @@ -52,10 +54,14 @@ std::unique_ptr parseAssemblyFile(StringRef Filename, /// \param Context Context in which to allocate globals info. /// \param Slots The optional slot mapping that will be initialized during /// parsing. +/// \param UpgradeDebugInfo Run UpgradeDebugInfo, which runs the Verifier. +/// This option should only be set to false by llvm-as +/// for use inside the LLVM testuite! std::unique_ptr parseAssemblyString(StringRef AsmString, SMDiagnostic &Error, LLVMContext &Context, - SlotMapping *Slots = nullptr); + SlotMapping *Slots = nullptr, + bool UpgradeDebugInfo = true); /// parseAssemblyFile and parseAssemblyString are wrappers around this function. /// \brief Parse LLVM Assembly from a MemoryBuffer. @@ -63,9 +69,13 @@ std::unique_ptr parseAssemblyString(StringRef AsmString, /// \param Err Error result info. /// \param Slots The optional slot mapping that will be initialized during /// parsing. +/// \param UpgradeDebugInfo Run UpgradeDebugInfo, which runs the Verifier. +/// This option should only be set to false by llvm-as +/// for use inside the LLVM testuite! std::unique_ptr parseAssembly(MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, - SlotMapping *Slots = nullptr); + SlotMapping *Slots = nullptr, + bool UpgradeDebugInfo = true); /// This function is the low-level interface to the LLVM Assembly Parser. /// This is kept as an independent function instead of being inlined into @@ -78,8 +88,12 @@ std::unique_ptr parseAssembly(MemoryBufferRef F, SMDiagnostic &Err, /// \param Slots The optional slot mapping that will be initialized during /// parsing. /// \return true on error. +/// \param UpgradeDebugInfo Run UpgradeDebugInfo, which runs the Verifier. +/// This option should only be set to false by llvm-as +/// for use inside the LLVM testuite! bool parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err, - SlotMapping *Slots = nullptr); + SlotMapping *Slots = nullptr, + bool UpgradeDebugInfo = true); /// Parse a type and a constant value in the given string. /// diff --git a/include/llvm/BinaryFormat/Dwarf.def b/include/llvm/BinaryFormat/Dwarf.def index 8214fe2e1209d..3ade3ea0d338e 100644 --- a/include/llvm/BinaryFormat/Dwarf.def +++ b/include/llvm/BinaryFormat/Dwarf.def @@ -176,6 +176,8 @@ HANDLE_DW_TAG(0x4103, class_template, 0, GNU) HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU) HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU) HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU) +HANDLE_DW_TAG(0x4109, GNU_call_site, 0, GNU) +HANDLE_DW_TAG(0x410a, GNU_call_site_parameter, 0, GNU) HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE) HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND) HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND) @@ -338,6 +340,8 @@ HANDLE_DW_AT(0x2106, body_end, 0, GNU) HANDLE_DW_AT(0x2107, GNU_vector, 0, GNU) HANDLE_DW_AT(0x2110, GNU_template_name, 0, GNU) HANDLE_DW_AT(0x210f, GNU_odr_signature, 0, GNU) +HANDLE_DW_AT(0x2111, GNU_call_site_value, 0, GNU) +HANDLE_DW_AT(0x2117, GNU_all_call_sites, 0, GNU) HANDLE_DW_AT(0x2119, GNU_macros, 0, GNU) // Extensions for Fission proposal. HANDLE_DW_AT(0x2130, GNU_dwo_name, 0, GNU) diff --git a/include/llvm/BinaryFormat/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h index 37651f4c66705..a0e5367b412c3 100644 --- a/include/llvm/BinaryFormat/Dwarf.h +++ b/include/llvm/BinaryFormat/Dwarf.h @@ -325,6 +325,32 @@ enum UnitType : unsigned char { DW_UT_hi_user = 0xff }; +inline bool isUnitType(uint8_t UnitType) { + switch (UnitType) { + case DW_UT_compile: + case DW_UT_type: + case DW_UT_partial: + case DW_UT_skeleton: + case DW_UT_split_compile: + case DW_UT_split_type: + return true; + default: + return false; + } +} + +inline bool isUnitType(dwarf::Tag T) { + switch (T) { + case DW_TAG_compile_unit: + case DW_TAG_type_unit: + case DW_TAG_partial_unit: + case DW_TAG_skeleton_unit: + return true; + default: + return false; + } +} + // Constants for the DWARF v5 Accelerator Table Proposal enum AcceleratorTable { // Data layout descriptors. @@ -491,6 +517,9 @@ struct PubIndexEntryDescriptor { /// Constants that define the DWARF format as 32 or 64 bit. enum DwarfFormat : uint8_t { DWARF32, DWARF64 }; +/// The Bernstein hash function used by the accelerator tables. +uint32_t djbHash(StringRef Buffer); + } // End of namespace dwarf } // End of namespace llvm diff --git a/include/llvm/BinaryFormat/ELF.h b/include/llvm/BinaryFormat/ELF.h index dba0367da7167..5cedd99fdc012 100644 --- a/include/llvm/BinaryFormat/ELF.h +++ b/include/llvm/BinaryFormat/ELF.h @@ -335,29 +335,33 @@ enum { // OS ABI identification. enum { - ELFOSABI_NONE = 0, // UNIX System V ABI - ELFOSABI_HPUX = 1, // HP-UX operating system - ELFOSABI_NETBSD = 2, // NetBSD - ELFOSABI_GNU = 3, // GNU/Linux - ELFOSABI_LINUX = 3, // Historical alias for ELFOSABI_GNU. - ELFOSABI_HURD = 4, // GNU/Hurd - ELFOSABI_SOLARIS = 6, // Solaris - ELFOSABI_AIX = 7, // AIX - ELFOSABI_IRIX = 8, // IRIX - ELFOSABI_FREEBSD = 9, // FreeBSD - ELFOSABI_TRU64 = 10, // TRU64 UNIX - ELFOSABI_MODESTO = 11, // Novell Modesto - ELFOSABI_OPENBSD = 12, // OpenBSD - ELFOSABI_OPENVMS = 13, // OpenVMS - ELFOSABI_NSK = 14, // Hewlett-Packard Non-Stop Kernel - ELFOSABI_AROS = 15, // AROS - ELFOSABI_FENIXOS = 16, // FenixOS - ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI - ELFOSABI_C6000_ELFABI = 64, // Bare-metal TMS320C6000 - ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime - ELFOSABI_C6000_LINUX = 65, // Linux TMS320C6000 - ELFOSABI_ARM = 97, // ARM - ELFOSABI_STANDALONE = 255 // Standalone (embedded) application + ELFOSABI_NONE = 0, // UNIX System V ABI + ELFOSABI_HPUX = 1, // HP-UX operating system + ELFOSABI_NETBSD = 2, // NetBSD + ELFOSABI_GNU = 3, // GNU/Linux + ELFOSABI_LINUX = 3, // Historical alias for ELFOSABI_GNU. + ELFOSABI_HURD = 4, // GNU/Hurd + ELFOSABI_SOLARIS = 6, // Solaris + ELFOSABI_AIX = 7, // AIX + ELFOSABI_IRIX = 8, // IRIX + ELFOSABI_FREEBSD = 9, // FreeBSD + ELFOSABI_TRU64 = 10, // TRU64 UNIX + ELFOSABI_MODESTO = 11, // Novell Modesto + ELFOSABI_OPENBSD = 12, // OpenBSD + ELFOSABI_OPENVMS = 13, // OpenVMS + ELFOSABI_NSK = 14, // Hewlett-Packard Non-Stop Kernel + ELFOSABI_AROS = 15, // AROS + ELFOSABI_FENIXOS = 16, // FenixOS + ELFOSABI_CLOUDABI = 17, // Nuxi CloudABI + ELFOSABI_FIRST_ARCH = 64, // First architecture-specific OS ABI + ELFOSABI_AMDGPU_HSA = 64, // AMD HSA runtime + ELFOSABI_AMDGPU_PAL = 65, // AMD PAL runtime + ELFOSABI_AMDGPU_MESA3D = 66, // AMD GCN GPUs (GFX6+) for MESA runtime + ELFOSABI_ARM = 97, // ARM + ELFOSABI_C6000_ELFABI = 64, // Bare-metal TMS320C6000 + ELFOSABI_C6000_LINUX = 65, // Linux TMS320C6000 + ELFOSABI_STANDALONE = 255, // Standalone (embedded) application + ELFOSABI_LAST_ARCH = 255 // Last Architecture-specific OS ABI }; #define ELF_RELOC(name, value) name = value, @@ -643,6 +647,15 @@ enum { #include "ELFRelocs/WebAssembly.def" }; +// AMDGPU specific e_flags. +enum : unsigned { + // AMDGPU machine architectures. + EF_AMDGPU_ARCH_NONE = 0x00000000, // None/unknown. + EF_AMDGPU_ARCH_R600 = 0x00000001, // AMD HD2XXX-HD6XXX GPUs. + EF_AMDGPU_ARCH_GCN = 0x00000002, // AMD GCN GFX6+ GPUs. + EF_AMDGPU_ARCH = 0x0000000f // EF_AMDGPU_ARCH_XXX selection mask. +}; + // ELF Relocation types for AMDGPU enum { #include "ELFRelocs/AMDGPU.def" @@ -717,6 +730,10 @@ enum : unsigned { SHT_GROUP = 17, // Section group. SHT_SYMTAB_SHNDX = 18, // Indices for SHN_XINDEX entries. SHT_LOOS = 0x60000000, // Lowest operating system-specific type. + // Android packed relocation section types. + // https://android.googlesource.com/platform/bionic/+/6f12bfece5dcc01325e0abba56a46b1bcf991c69/tools/relocation_packer/src/elf_file.cc#37 + SHT_ANDROID_REL = 0x60000001, + SHT_ANDROID_RELA = 0x60000002, SHT_LLVM_ODRTAB = 0x6fff4c00, // LLVM ODR table. SHT_GNU_ATTRIBUTES = 0x6ffffff5, // Object attributes. SHT_GNU_HASH = 0x6ffffff6, // GNU-style hash table. @@ -1153,6 +1170,13 @@ enum { DT_LOPROC = 0x70000000, // Start of processor specific tags. DT_HIPROC = 0x7FFFFFFF, // End of processor specific tags. + // Android packed relocation section tags. + // https://android.googlesource.com/platform/bionic/+/6f12bfece5dcc01325e0abba56a46b1bcf991c69/tools/relocation_packer/src/elf_file.cc#31 + DT_ANDROID_REL = 0x6000000F, + DT_ANDROID_RELSZ = 0x60000010, + DT_ANDROID_RELA = 0x60000011, + DT_ANDROID_RELASZ = 0x60000012, + DT_GNU_HASH = 0x6FFFFEF5, // Reference to the GNU hash table. DT_TLSDESC_PLT = 0x6FFFFEF6, // Location of PLT entry for TLS descriptor resolver calls. @@ -1356,6 +1380,14 @@ enum { NT_GNU_GOLD_VERSION = 4, }; +// AMDGPU specific notes. +enum { + // Note types with values between 0 and 9 (inclusive) are reserved. + NT_AMD_AMDGPU_HSA_METADATA = 10, + NT_AMD_AMDGPU_ISA = 11, + NT_AMD_AMDGPU_PAL_METADATA = 12 +}; + enum { GNU_ABI_TAG_LINUX = 0, GNU_ABI_TAG_HURD = 1, @@ -1366,6 +1398,14 @@ enum { GNU_ABI_TAG_NACL = 6, }; +// Android packed relocation group flags. +enum { + RELOCATION_GROUPED_BY_INFO_FLAG = 1, + RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG = 2, + RELOCATION_GROUPED_BY_ADDEND_FLAG = 4, + RELOCATION_GROUP_HAS_ADDEND_FLAG = 8, +}; + // Compressed section header for ELF32. struct Elf32_Chdr { Elf32_Word ch_type; diff --git a/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def b/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def index c66f88d14ec71..00b19c4161d08 100644 --- a/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def +++ b/include/llvm/BinaryFormat/ELFRelocs/AMDGPU.def @@ -14,3 +14,4 @@ ELF_RELOC(R_AMDGPU_GOTPCREL32_LO, 8) ELF_RELOC(R_AMDGPU_GOTPCREL32_HI, 9) ELF_RELOC(R_AMDGPU_REL32_LO, 10) ELF_RELOC(R_AMDGPU_REL32_HI, 11) +ELF_RELOC(R_AMDGPU_RELATIVE64, 13) diff --git a/include/llvm/BinaryFormat/MachO.h b/include/llvm/BinaryFormat/MachO.h index 3529c72acd9d6..7293ed78dfd3c 100644 --- a/include/llvm/BinaryFormat/MachO.h +++ b/include/llvm/BinaryFormat/MachO.h @@ -1373,19 +1373,19 @@ inline void swapStruct(fvmlib_command &C) { // Get/Set functions from -static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc) { +inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc) { return (((n_desc) >> 8u) & 0xffu); } -static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal) { +inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal) { n_desc = (((n_desc)&0x00ff) | (((ordinal)&0xff) << 8)); } -static inline uint8_t GET_COMM_ALIGN(uint16_t n_desc) { +inline uint8_t GET_COMM_ALIGN(uint16_t n_desc) { return (n_desc >> 8u) & 0x0fu; } -static inline void SET_COMM_ALIGN(uint16_t &n_desc, uint8_t align) { +inline void SET_COMM_ALIGN(uint16_t &n_desc, uint8_t align) { n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u)); } @@ -1449,15 +1449,13 @@ enum CPUSubTypeX86 { CPU_SUBTYPE_X86_ARCH1 = 4, CPU_SUBTYPE_X86_64_H = 8 }; -static inline int CPU_SUBTYPE_INTEL(int Family, int Model) { +inline int CPU_SUBTYPE_INTEL(int Family, int Model) { return Family | (Model << 4); } -static inline int CPU_SUBTYPE_INTEL_FAMILY(CPUSubTypeX86 ST) { +inline int CPU_SUBTYPE_INTEL_FAMILY(CPUSubTypeX86 ST) { return ((int)ST) & 0x0f; } -static inline int CPU_SUBTYPE_INTEL_MODEL(CPUSubTypeX86 ST) { - return ((int)ST) >> 4; -} +inline int CPU_SUBTYPE_INTEL_MODEL(CPUSubTypeX86 ST) { return ((int)ST) >> 4; } enum { CPU_SUBTYPE_INTEL_FAMILY_MAX = 15, CPU_SUBTYPE_INTEL_MODEL_ALL = 0 }; enum CPUSubTypeARM { diff --git a/include/llvm/BinaryFormat/Wasm.h b/include/llvm/BinaryFormat/Wasm.h index ffb453513ccea..26475c27df382 100644 --- a/include/llvm/BinaryFormat/Wasm.h +++ b/include/llvm/BinaryFormat/Wasm.h @@ -98,6 +98,8 @@ struct WasmDataSegment { WasmInitExpr Offset; ArrayRef Content; StringRef Name; + uint32_t Alignment; + uint32_t Flags; }; struct WasmElemSegment { @@ -115,7 +117,6 @@ struct WasmRelocation { struct WasmLinkingData { uint32_t DataSize; - uint32_t DataAlignment; }; enum : unsigned { @@ -185,7 +186,7 @@ enum : unsigned { WASM_SYMBOL_INFO = 0x2, WASM_DATA_SIZE = 0x3, WASM_DATA_ALIGNMENT = 0x4, - WASM_SEGMENT_NAMES = 0x5, + WASM_SEGMENT_INFO = 0x5, }; const unsigned WASM_SYMBOL_BINDING_MASK = 0x3; diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index 0847f7cb6d9cd..e0788e02d515a 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -43,11 +43,11 @@ class DIE; class DIEAbbrev; class DwarfDebug; class GCMetadataPrinter; +class GCStrategy; class GlobalIndirectSymbol; class GlobalObject; class GlobalValue; class GlobalVariable; -class GCStrategy; class MachineBasicBlock; class MachineConstantPoolValue; class MachineFunction; @@ -58,6 +58,7 @@ class MachineModuleInfo; class MachineOptimizationRemarkEmitter; class MCAsmInfo; class MCCFIInstruction; +struct MCCodePaddingContext; class MCContext; class MCExpr; class MCInst; @@ -76,11 +77,9 @@ class TargetMachine; class AsmPrinter : public MachineFunctionPass { public: /// Target machine description. - /// TargetMachine &TM; /// Target Asm Printer information. - /// const MCAsmInfo *MAI; /// This is the context for the output file that we are streaming. This owns @@ -103,7 +102,6 @@ class AsmPrinter : public MachineFunctionPass { /// The symbol for the current function. This is recalculated at the beginning /// of each call to runOnMachineFunction(). - /// MCSymbol *CurrentFnSym = nullptr; /// The symbol used to represent the start of the current function for the @@ -128,8 +126,8 @@ class AsmPrinter : public MachineFunctionPass { void *GCMetadataPrinters = nullptr; // Really a DenseMap. /// Emit comments in assembly output if this is true. - /// bool VerboseAsm; + static char ID; /// If VerboseAsm is set, a pointer to the loop info for this function. @@ -149,6 +147,7 @@ class AsmPrinter : public MachineFunctionPass { TimerDescription(TimerDescription), TimerGroupName(TimerGroupName), TimerGroupDescription(TimerGroupDescription) {} }; + /// A vector of all debug/EH info emitters we should use. This vector /// maintains ownership of the emitters. SmallVector Handlers; @@ -187,11 +186,9 @@ class AsmPrinter : public MachineFunctionPass { bool isPositionIndependent() const; /// Return true if assembly output should contain comments. - /// bool isVerbose() const { return VerboseAsm; } /// Return a unique ID for the current function. - /// unsigned getFunctionNumber() const; MCSymbol *getFunctionBegin() const { return CurrentFnBegin; } @@ -266,7 +263,6 @@ class AsmPrinter : public MachineFunctionPass { //===------------------------------------------------------------------===// /// Record analysis usage. - /// void getAnalysisUsage(AnalysisUsage &AU) const override; /// Set up the AsmPrinter when we are working on a new module. If your pass @@ -311,12 +307,10 @@ class AsmPrinter : public MachineFunctionPass { /// Print to the current output stream assembly representations of the /// constants in the constant pool MCP. This is used to print out constants /// which have been "spilled to memory" by the code generator. - /// virtual void EmitConstantPool(); /// Print assembly representations of the jump tables used by the current /// function to the current output stream. - /// virtual void EmitJumpTableInfo(); /// Emit the specified global variable to the .s file. @@ -331,7 +325,6 @@ class AsmPrinter : public MachineFunctionPass { /// global value is specified, and if that global has an explicit alignment /// requested, it will override the alignment request if required for /// correctness. - /// void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const; /// Lower the specified LLVM Constant to an MCExpr. @@ -385,7 +378,7 @@ class AsmPrinter : public MachineFunctionPass { virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const; /// Targets can override this to emit stuff at the end of a basic block. - virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB) {} + virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB); /// Targets should implement this to emit instructions. virtual void EmitInstruction(const MachineInstr *) { @@ -449,15 +442,12 @@ class AsmPrinter : public MachineFunctionPass { void printOffset(int64_t Offset, raw_ostream &OS) const; /// Emit a byte directive and value. - /// void EmitInt8(int Value) const; /// Emit a short directive and value. - /// void EmitInt16(int Value) const; /// Emit a long directive and value. - /// void EmitInt32(int Value) const; /// Emit something like ".long Hi-Lo" where the size in bytes of the directive @@ -632,10 +622,13 @@ class AsmPrinter : public MachineFunctionPass { void EmitModuleIdents(Module &M); void EmitXXStructorList(const DataLayout &DL, const Constant *List, bool isCtor); + GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C); /// Emit GlobalAlias or GlobalIFunc. void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol& GIS); + void setupCodePaddingContext(const MachineBasicBlock &MBB, + MCCodePaddingContext &Context) const; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 14dfc0886271d..0334ed9eacbb0 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -6,25 +6,63 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file provides a helper that implements much of the TTI interface in /// terms of the target-independent code generator and TargetLowering /// interfaces. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_BASICTTIIMPL_H #define LLVM_CODEGEN_BASICTTIIMPL_H +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include namespace llvm { +class Function; +class GlobalValue; +class LLVMContext; +class ScalarEvolution; +class SCEV; +class TargetMachine; + extern cl::opt PartialUnrollingThreshold; /// \brief Base class which can be used to help build a TTI implementation. @@ -39,8 +77,8 @@ extern cl::opt PartialUnrollingThreshold; template class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { private: - typedef TargetTransformInfoImplCRTPBase BaseT; - typedef TargetTransformInfo TTI; + using BaseT = TargetTransformInfoImplCRTPBase; + using TTI = TargetTransformInfo; /// Estimate a cost of shuffle as a sequence of extract and insert /// operations. @@ -231,7 +269,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (N < 2 || N < TLI->getMinimumJumpTableEntries()) return N; uint64_t Range = - (MaxCaseVal - MinCaseVal).getLimitedValue(UINT64_MAX - 1) + 1; + (MaxCaseVal - MinCaseVal) + .getLimitedValue(std::numeric_limits::max() - 1) + 1; // Check whether a range of clusters is dense enough for a jump table if (TLI->isSuitableForJumpTable(&SI, N, Range)) { JumpTableSize = Range; @@ -268,17 +307,15 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { const TargetLoweringBase *TLI = getTLI(); switch (Opcode) { default: break; - case Instruction::Trunc: { + case Instruction::Trunc: if (TLI->isTruncateFree(OpTy, Ty)) return TargetTransformInfo::TCC_Free; return TargetTransformInfo::TCC_Basic; - } - case Instruction::ZExt: { + case Instruction::ZExt: if (TLI->isZExtFree(OpTy, Ty)) return TargetTransformInfo::TCC_Free; return TargetTransformInfo::TCC_Basic; } - } return BaseT::getOperationCost(Opcode, Ty, OpTy); } @@ -397,8 +434,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (A->getType()->isVectorTy()) { VecTy = A->getType(); // If A is a vector operand, VF should be 1 or correspond to A. - assert ((VF == 1 || VF == VecTy->getVectorNumElements()) && - "Vector argument does not match VF"); + assert((VF == 1 || VF == VecTy->getVectorNumElements()) && + "Vector argument does not match VF"); } else VecTy = VectorType::get(A->getType(), VF); @@ -411,7 +448,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } unsigned getScalarizationOverhead(Type *VecTy, ArrayRef Args) { - assert (VecTy->isVectorTy()); + assert(VecTy->isVectorTy()); unsigned Cost = 0; @@ -534,7 +571,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // Handle scalar conversions. if (!Src->isVectorTy() && !Dst->isVectorTy()) { - // Scalar bitcasts are usually free. if (Opcode == Instruction::BitCast) return 0; @@ -550,7 +586,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // Check vector-to-vector casts. if (Dst->isVectorTy() && Src->isVectorTy()) { - // If the cast is between same-sized registers, then the check is simple. if (SrcLT.first == DstLT.first && SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { @@ -746,7 +781,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // We only scale the cost of loads since interleaved store groups aren't // allowed to have gaps. if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) { - // The number of loads of a legal type it will take to represent a load // of the unlegalized vector type. unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); @@ -824,7 +858,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { ArrayRef Args, FastMathFlags FMF, unsigned VF = 1) { unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1); - assert ((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); + assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); switch (IID) { default: { @@ -832,7 +866,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { SmallVector Types; for (Value *Op : Args) { Type *OpTy = Op->getType(); - assert (VF == 1 || !OpTy->isVectorTy()); + assert(VF == 1 || !OpTy->isVectorTy()); Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF)); } @@ -842,7 +876,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // Compute the scalarization overhead based on Args for a vector // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while // CostModel will pass a vector RetTy and VF is 1. - unsigned ScalarizationCost = UINT_MAX; + unsigned ScalarizationCost = std::numeric_limits::max(); if (RetVF > 1 || VF > 1) { ScalarizationCost = 0; if (!RetTy->isVoidTy()) @@ -854,7 +888,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost); } case Intrinsic::masked_scatter: { - assert (VF == 1 && "Can't vectorize types here."); + assert(VF == 1 && "Can't vectorize types here."); Value *Mask = Args[3]; bool VarMask = !isa(Mask); unsigned Alignment = cast(Args[2])->getZExtValue(); @@ -865,7 +899,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { Alignment); } case Intrinsic::masked_gather: { - assert (VF == 1 && "Can't vectorize types here."); + assert(VF == 1 && "Can't vectorize types here."); Value *Mask = Args[2]; bool VarMask = !isa(Mask); unsigned Alignment = cast(Args[1])->getZExtValue(); @@ -878,11 +912,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } /// Get intrinsic cost based on argument types. - /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the - /// arguments and the return value will be computed based on types. - unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX) { + /// If ScalarizationCostPassed is std::numeric_limits::max(), the + /// cost of scalarizing the arguments and the return value will be computed + /// based on types. + unsigned getIntrinsicInstrCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = std::numeric_limits::max()) { SmallVector ISDs; unsigned SingleCallCost = 10; // Library call cost. Make it expensive. switch (IID) { @@ -892,7 +927,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned ScalarCalls = 1; Type *ScalarRetTy = RetTy; if (RetTy->isVectorTy()) { - if (ScalarizationCostPassed == UINT_MAX) + if (ScalarizationCostPassed == std::numeric_limits::max()) ScalarizationCost = getScalarizationOverhead(RetTy, true, false); ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); ScalarRetTy = RetTy->getScalarType(); @@ -901,7 +936,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { Type *Ty = Tys[i]; if (Ty->isVectorTy()) { - if (ScalarizationCostPassed == UINT_MAX) + if (ScalarizationCostPassed == std::numeric_limits::max()) ScalarizationCost += getScalarizationOverhead(Ty, false, true); ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements()); Ty = Ty->getScalarType(); @@ -1050,8 +1085,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { // this will emit a costly libcall, adding call overhead and spills. Make it // very expensive. if (RetTy->isVectorTy()) { - unsigned ScalarizationCost = ((ScalarizationCostPassed != UINT_MAX) ? - ScalarizationCostPassed : getScalarizationOverhead(RetTy, true, false)); + unsigned ScalarizationCost = + ((ScalarizationCostPassed != std::numeric_limits::max()) + ? ScalarizationCostPassed + : getScalarizationOverhead(RetTy, true, false)); unsigned ScalarCalls = RetTy->getVectorNumElements(); SmallVector ScalarTys; for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { @@ -1064,7 +1101,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { IID, RetTy->getScalarType(), ScalarTys, FMF); for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { if (Tys[i]->isVectorTy()) { - if (ScalarizationCostPassed == UINT_MAX) + if (ScalarizationCostPassed == std::numeric_limits::max()) ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements()); } @@ -1241,7 +1278,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { /// \brief Concrete BasicTTIImpl that can be used if no further customization /// is needed. class BasicTTIImpl : public BasicTTIImplBase { - typedef BasicTTIImplBase BaseT; + using BaseT = BasicTTIImplBase; + friend class BasicTTIImplBase; const TargetSubtargetInfo *ST; @@ -1254,6 +1292,6 @@ class BasicTTIImpl : public BasicTTIImplBase { explicit BasicTTIImpl(const TargetMachine *ST, const Function &F); }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_BASICTTIIMPL_H diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h index 17c9415a81cbd..d9e8206408a78 100644 --- a/include/llvm/CodeGen/CalcSpillWeights.h +++ b/include/llvm/CodeGen/CalcSpillWeights.h @@ -1,4 +1,4 @@ -//===---------------- lib/CodeGen/CalcSpillWeights.h ------------*- C++ -*-===// +//===- lib/CodeGen/CalcSpillWeights.h ---------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H #define LLVM_CODEGEN_CALCSPILLWEIGHTS_H @@ -16,11 +15,12 @@ namespace llvm { - class LiveInterval; - class LiveIntervals; - class MachineBlockFrequencyInfo; - class MachineLoopInfo; - class VirtRegMap; +class LiveInterval; +class LiveIntervals; +class MachineBlockFrequencyInfo; +class MachineFunction; +class MachineLoopInfo; +class VirtRegMap; /// \brief Normalize the spill weight of a live interval /// @@ -32,7 +32,6 @@ namespace llvm { /// per function call. Derived from block frequencies. /// @param Size Size of live interval as returnexd by getSize() /// @param NumInstr Number of instructions using this live interval - /// static inline float normalizeSpillWeight(float UseDefFreq, unsigned Size, unsigned NumInstr) { // The constant 25 instructions is added to avoid depending too much on @@ -47,7 +46,7 @@ namespace llvm { /// spill weight and allocation hint. class VirtRegAuxInfo { public: - typedef float (*NormalizingFn)(float, unsigned, unsigned); + using NormalizingFn = float (*)(float, unsigned, unsigned); private: MachineFunction &MF; @@ -67,6 +66,32 @@ namespace llvm { /// \brief (re)compute li's spill weight and allocation hint. void calculateSpillWeightAndHint(LiveInterval &li); + + /// \brief Compute future expected spill weight of a split artifact of li + /// that will span between start and end slot indexes. + /// \param li The live interval to be split. + /// \param start The expected begining of the split artifact. Instructions + /// before start will not affect the weight. + /// \param end The expected end of the split artifact. Instructions + /// after end will not affect the weight. + /// \return The expected spill weight of the split artifact. Returns + /// negative weight for unspillable li. + float futureWeight(LiveInterval &li, SlotIndex start, SlotIndex end); + + /// \brief Helper function for weight calculations. + /// (Re)compute li's spill weight and allocation hint, or, for non null + /// start and end - compute future expected spill weight of a split + /// artifact of li that will span between start and end slot indexes. + /// \param li The live interval for which to compute the weight. + /// \param start The expected begining of the split artifact. Instructions + /// before start will not affect the weight. Relevant for + /// weight calculation of future split artifact. + /// \param end The expected end of the split artifact. Instructions + /// after end will not affect the weight. Relevant for + /// weight calculation of future split artifact. + /// \return The spill weight. Returns negative weight for unspillable li. + float weightCalcHelper(LiveInterval &li, SlotIndex *start = nullptr, + SlotIndex *end = nullptr); }; /// \brief Compute spill weights and allocation hints for all virtual register @@ -77,6 +102,7 @@ namespace llvm { const MachineBlockFrequencyInfo &MBFI, VirtRegAuxInfo::NormalizingFn norm = normalizeSpillWeight); -} + +} // end namespace llvm #endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 50e464ebb9b80..6d65b87854050 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -1,4 +1,4 @@ -//===-- llvm/CallingConvLower.h - Calling Conventions -----------*- C++ -*-===// +//===- llvm/CallingConvLower.h - Calling Conventions ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -23,6 +23,7 @@ #include "llvm/Target/TargetCallingConv.h" namespace llvm { + class CCState; class MVT; class TargetMachine; @@ -503,7 +504,7 @@ class CCState { } // Get list of pending assignments - SmallVectorImpl &getPendingLocs() { + SmallVectorImpl &getPendingLocs() { return PendingLocs; } @@ -564,8 +565,6 @@ class CCState { void MarkAllocated(unsigned Reg); }; - - } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_CALLINGCONVLOWER_H diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h index 77c37ac7abeae..d3aabe22f2165 100644 --- a/include/llvm/CodeGen/DFAPacketizer.h +++ b/include/llvm/CodeGen/DFAPacketizer.h @@ -208,6 +208,13 @@ class VLIWPacketizerList { // Add a DAG mutation to be done before the packetization begins. void addMutation(std::unique_ptr Mutation); + + bool alias(const MachineInstr &MI1, const MachineInstr &MI2, + bool UseTBAA = true) const; + +private: + bool alias(const MachineMemOperand &Op1, const MachineMemOperand &Op2, + bool UseTBAA = true) const; }; } // end namespace llvm diff --git a/include/llvm/CodeGen/FaultMaps.h b/include/llvm/CodeGen/FaultMaps.h index 98ff526dfe946..55e25c9823b17 100644 --- a/include/llvm/CodeGen/FaultMaps.h +++ b/include/llvm/CodeGen/FaultMaps.h @@ -39,6 +39,9 @@ class FaultMaps { void recordFaultingOp(FaultKind FaultTy, const MCSymbol *HandlerLabel); void serializeToFaultMapSection(); + void reset() { + FunctionInfos.clear(); + } private: static const char *WFMP; diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h index 4ab6ceb35a7a8..0a3f133d24181 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -17,6 +17,7 @@ #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Optional.h" #include #include #include @@ -26,6 +27,8 @@ namespace llvm { +class APInt; +class APFloat; class LLT; class MachineInstr; class MachineInstrBuilder; @@ -96,13 +99,29 @@ enum { /// Check an immediate predicate on the specified instruction /// - InsnID - Instruction ID /// - The predicate to test - GIM_CheckImmPredicate, + GIM_CheckI64ImmPredicate, + /// Check an immediate predicate on the specified instruction via an APInt. + /// - InsnID - Instruction ID + /// - The predicate to test + GIM_CheckAPIntImmPredicate, + /// Check a floating point immediate predicate on the specified instruction. + /// - InsnID - Instruction ID + /// - The predicate to test + GIM_CheckAPFloatImmPredicate, + /// Check a memory operation is non-atomic. + /// - InsnID - Instruction ID + GIM_CheckNonAtomic, /// Check the type for the specified operand /// - InsnID - Instruction ID /// - OpIdx - Operand index /// - Expected type GIM_CheckType, + /// Check the type of a pointer to any address space. + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - SizeInBits - The size of the pointer value in bits. + GIM_CheckPointerToAny, /// Check the register bank for the specified operand /// - InsnID - Instruction ID /// - OpIdx - Operand index @@ -140,6 +159,13 @@ enum { /// - InsnID - Instruction ID GIM_CheckIsSafeToFold, + /// Check the specified operands are identical. + /// - InsnID - Instruction ID + /// - OpIdx - Operand index + /// - OtherInsnID - Other instruction ID + /// - OtherOpIdx - Other operand index + GIM_CheckIsSameOperand, + /// Fail the current try-block, or completely fail to match if there is no /// current try-block. GIM_Reject, @@ -161,6 +187,13 @@ enum { /// - OldInsnID - Instruction ID to copy from /// - OpIdx - The operand to copy GIR_Copy, + /// Copy an operand to the specified instruction or add a zero register if the + /// operand is a zero immediate. + /// - NewInsnID - Instruction ID to modify + /// - OldInsnID - Instruction ID to copy from + /// - OpIdx - The operand to copy + /// - ZeroReg - The zero register to use + GIR_CopyOrAddZeroReg, /// Copy an operand to the specified instruction /// - NewInsnID - Instruction ID to modify /// - OldInsnID - Instruction ID to copy from @@ -187,6 +220,11 @@ enum { /// - InsnID - Instruction ID to modify /// - RendererID - The renderer to call GIR_ComplexRenderer, + /// Render sub-operands of complex operands to the specified instruction + /// - InsnID - Instruction ID to modify + /// - RendererID - The renderer to call + /// - RenderOpID - The suboperand to render. + GIR_ComplexSubOperandRenderer, /// Render a G_CONSTANT operator as a sign-extended immediate. /// - NewInsnID - Instruction ID to modify @@ -226,7 +264,9 @@ enum { /// Provides the logic to select generic machine instructions. class InstructionSelector { public: - typedef bool(*ImmediatePredicateFn)(int64_t); + using I64ImmediatePredicateFn = bool (*)(int64_t); + using APIntImmediatePredicateFn = bool (*)(const APInt &); + using APFloatImmediatePredicateFn = bool (*)(const APFloat &); virtual ~InstructionSelector() = default; @@ -240,16 +280,16 @@ class InstructionSelector { /// if returns true: /// for I in all mutated/inserted instructions: /// !isPreISelGenericOpcode(I.getOpcode()) - /// virtual bool select(MachineInstr &I) const = 0; protected: - using ComplexRendererFn = std::function; + using ComplexRendererFns = + Optional, 4>>; using RecordedMIVector = SmallVector; using NewMIVector = SmallVector; struct MatcherState { - std::vector Renderers; + std::vector Renderers; RecordedMIVector MIs; MatcherState(unsigned MaxRenderers); @@ -260,8 +300,10 @@ class InstructionSelector { struct MatcherInfoTy { const LLT *TypeObjects; const PredicateBitset *FeatureBitsets; - const ImmediatePredicateFn *ImmPredicateFns; - const std::vector ComplexPredicates; + const I64ImmediatePredicateFn *I64ImmPredicateFns; + const APIntImmediatePredicateFn *APIntImmPredicateFns; + const APFloatImmediatePredicateFn *APFloatImmPredicateFns; + const ComplexMatcherMemFn *ComplexPredicates; }; protected: @@ -306,6 +348,12 @@ class InstructionSelector { bool isOperandImmEqual(const MachineOperand &MO, int64_t Value, const MachineRegisterInfo &MRI) const; + /// Return true if the specified operand is a G_GEP with a G_CONSTANT on the + /// right-hand side. GlobalISel's separation of pointer and integer types + /// means that we don't need to worry about G_OR with equivalent semantics. + bool isBaseWithConstantOffset(const MachineOperand &Root, + const MachineRegisterInfo &MRI) const; + bool isObviouslySafeToFold(MachineInstr &MI) const; }; diff --git a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h index 72de9815eb5c7..29cc90364018e 100644 --- a/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h +++ b/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h @@ -1,4 +1,4 @@ -//==-- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h ---------*- C++ -*-==// +//===- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,11 +16,29 @@ #ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H #define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include + namespace llvm { /// GlobalISel PatFrag Predicates enum { - GIPFP_Invalid, + GIPFP_I64_Invalid = 0, + GIPFP_APInt_Invalid = 0, + GIPFP_APFloat_Invalid = 0, }; template getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT"); - assert(Predicate > GIPFP_Invalid && "Expected a valid predicate"); + assert(Predicate > GIPFP_I64_Invalid && "Expected a valid predicate"); int64_t Value = 0; if (State.MIs[InsnID]->getOperand(1).isCImm()) Value = State.MIs[InsnID]->getOperand(1).getCImm()->getSExtValue(); @@ -149,11 +168,66 @@ bool InstructionSelector::executeMatchTable( else llvm_unreachable("Expected Imm or CImm operand"); - if (!MatcherInfo.ImmPredicateFns[Predicate](Value)) + if (!MatcherInfo.I64ImmPredicateFns[Predicate](Value)) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + case GIM_CheckAPIntImmPredicate: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t Predicate = MatchTable[CurrentIdx++]; + DEBUG(dbgs() << CurrentIdx << ": GIM_CheckAPIntImmPredicate(MIs[" + << InsnID << "], Predicate=" << Predicate << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(State.MIs[InsnID]->getOpcode() && "Expected G_CONSTANT"); + assert(Predicate > GIPFP_APInt_Invalid && "Expected a valid predicate"); + APInt Value; + if (State.MIs[InsnID]->getOperand(1).isCImm()) + Value = State.MIs[InsnID]->getOperand(1).getCImm()->getValue(); + else + llvm_unreachable("Expected Imm or CImm operand"); + + if (!MatcherInfo.APIntImmPredicateFns[Predicate](Value)) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } + case GIM_CheckAPFloatImmPredicate: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t Predicate = MatchTable[CurrentIdx++]; + DEBUG(dbgs() << CurrentIdx << ": GIM_CheckAPFloatImmPredicate(MIs[" << InsnID + << "], Predicate=" << Predicate << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(State.MIs[InsnID]->getOpcode() == TargetOpcode::G_FCONSTANT && + "Expected G_FCONSTANT"); + assert(State.MIs[InsnID]->getOperand(1).isFPImm() && "Expected FPImm operand"); + assert(Predicate > GIPFP_APFloat_Invalid && "Expected a valid predicate"); + APFloat Value = State.MIs[InsnID]->getOperand(1).getFPImm()->getValueAPF(); + + if (!MatcherInfo.APFloatImmPredicateFns[Predicate](Value)) if (handleReject() == RejectAndGiveUp) return false; break; } + case GIM_CheckNonAtomic: { + int64_t InsnID = MatchTable[CurrentIdx++]; + DEBUG(dbgs() << CurrentIdx << ": GIM_CheckNonAtomic(MIs[" << InsnID + << "])\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert((State.MIs[InsnID]->getOpcode() == TargetOpcode::G_LOAD || + State.MIs[InsnID]->getOpcode() == TargetOpcode::G_STORE) && + "Expected G_LOAD/G_STORE"); + + if (!State.MIs[InsnID]->hasOneMemOperand()) + if (handleReject() == RejectAndGiveUp) + return false; + + for (const auto &MMO : State.MIs[InsnID]->memoperands()) + if (MMO->getOrdering() != AtomicOrdering::NotAtomic) + if (handleReject() == RejectAndGiveUp) + return false; + break; + } case GIM_CheckType: { int64_t InsnID = MatchTable[CurrentIdx++]; @@ -170,6 +244,31 @@ bool InstructionSelector::executeMatchTable( } break; } + case GIM_CheckPointerToAny: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t SizeInBits = MatchTable[CurrentIdx++]; + + DEBUG(dbgs() << CurrentIdx << ": GIM_CheckPointerToAny(MIs[" << InsnID + << "]->getOperand(" << OpIdx + << "), SizeInBits=" << SizeInBits << ")\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + + // iPTR must be looked up in the target. + if (SizeInBits == 0) { + MachineFunction *MF = State.MIs[InsnID]->getParent()->getParent(); + SizeInBits = MF->getDataLayout().getPointerSizeInBits(0); + } + + assert(SizeInBits != 0 && "Pointer size must be known"); + + const LLT &Ty = MRI.getType(State.MIs[InsnID]->getOperand(OpIdx).getReg()); + if (!Ty.isPointer() || Ty.getSizeInBits() != SizeInBits) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } case GIM_CheckRegBankForClass: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -186,6 +285,7 @@ bool InstructionSelector::executeMatchTable( } break; } + case GIM_CheckComplexPattern: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -197,14 +297,17 @@ bool InstructionSelector::executeMatchTable( << "), ComplexPredicateID=" << ComplexPredicateID << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); // FIXME: Use std::invoke() when it's available. - if (!(State.Renderers[RendererID] = - (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])( - State.MIs[InsnID]->getOperand(OpIdx)))) { + ComplexRendererFns Renderer = + (ISel.*MatcherInfo.ComplexPredicates[ComplexPredicateID])( + State.MIs[InsnID]->getOperand(OpIdx)); + if (Renderer.hasValue()) + State.Renderers[RendererID] = Renderer.getValue(); + else if (handleReject() == RejectAndGiveUp) return false; - } break; } + case GIM_CheckConstantInt: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -220,6 +323,7 @@ bool InstructionSelector::executeMatchTable( } break; } + case GIM_CheckLiteralInt: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -228,13 +332,14 @@ bool InstructionSelector::executeMatchTable( << "]->getOperand(" << OpIdx << "), Value=" << Value << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - MachineOperand &OM = State.MIs[InsnID]->getOperand(OpIdx); - if (!OM.isCImm() || !OM.getCImm()->equalsInt(Value)) { + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isCImm() || !MO.getCImm()->equalsInt(Value)) { if (handleReject() == RejectAndGiveUp) return false; } break; } + case GIM_CheckIntrinsicID: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -243,12 +348,13 @@ bool InstructionSelector::executeMatchTable( << "]->getOperand(" << OpIdx << "), Value=" << Value << ")\n"); assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); - MachineOperand &OM = State.MIs[InsnID]->getOperand(OpIdx); - if (!OM.isIntrinsicID() || OM.getIntrinsicID() != Value) + MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx); + if (!MO.isIntrinsicID() || MO.getIntrinsicID() != Value) if (handleReject() == RejectAndGiveUp) return false; break; } + case GIM_CheckIsMBB: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t OpIdx = MatchTable[CurrentIdx++]; @@ -261,6 +367,7 @@ bool InstructionSelector::executeMatchTable( } break; } + case GIM_CheckIsSafeToFold: { int64_t InsnID = MatchTable[CurrentIdx++]; DEBUG(dbgs() << CurrentIdx << ": GIM_CheckIsSafeToFold(MIs[" << InsnID @@ -272,6 +379,23 @@ bool InstructionSelector::executeMatchTable( } break; } + case GIM_CheckIsSameOperand: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t OtherInsnID = MatchTable[CurrentIdx++]; + int64_t OtherOpIdx = MatchTable[CurrentIdx++]; + DEBUG(dbgs() << CurrentIdx << ": GIM_CheckIsSameOperand(MIs[" << InsnID + << "][" << OpIdx << "], MIs[" << OtherInsnID << "][" + << OtherOpIdx << "])\n"); + assert(State.MIs[InsnID] != nullptr && "Used insn before defined"); + assert(State.MIs[OtherInsnID] != nullptr && "Used insn before defined"); + if (!State.MIs[InsnID]->getOperand(OpIdx).isIdenticalTo( + State.MIs[OtherInsnID]->getOperand(OtherOpIdx))) { + if (handleReject() == RejectAndGiveUp) + return false; + } + break; + } case GIM_Reject: DEBUG(dbgs() << CurrentIdx << ": GIM_Reject"); if (handleReject() == RejectAndGiveUp) @@ -284,14 +408,14 @@ bool InstructionSelector::executeMatchTable( int64_t NewOpcode = MatchTable[CurrentIdx++]; assert((size_t)NewInsnID == OutMIs.size() && "Expected to store MIs in order"); - OutMIs.push_back( - MachineInstrBuilder(*State.MIs[OldInsnID]->getParent()->getParent(), - State.MIs[OldInsnID])); + OutMIs.push_back(MachineInstrBuilder(*State.MIs[OldInsnID]->getMF(), + State.MIs[OldInsnID])); OutMIs[NewInsnID]->setDesc(TII.get(NewOpcode)); DEBUG(dbgs() << CurrentIdx << ": GIR_MutateOpcode(OutMIs[" << NewInsnID << "], MIs[" << OldInsnID << "], " << NewOpcode << ")\n"); break; } + case GIR_BuildMI: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t Opcode = MatchTable[CurrentIdx++]; @@ -315,6 +439,24 @@ bool InstructionSelector::executeMatchTable( << "], MIs[" << OldInsnID << "], " << OpIdx << ")\n"); break; } + + case GIR_CopyOrAddZeroReg: { + int64_t NewInsnID = MatchTable[CurrentIdx++]; + int64_t OldInsnID = MatchTable[CurrentIdx++]; + int64_t OpIdx = MatchTable[CurrentIdx++]; + int64_t ZeroReg = MatchTable[CurrentIdx++]; + assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction"); + MachineOperand &MO = State.MIs[OldInsnID]->getOperand(OpIdx); + if (isOperandImmEqual(MO, 0, MRI)) + OutMIs[NewInsnID].addReg(ZeroReg); + else + OutMIs[NewInsnID].add(MO); + DEBUG(dbgs() << CurrentIdx << ": GIR_CopyOrAddZeroReg(OutMIs[" + << NewInsnID << "], MIs[" << OldInsnID << "], " << OpIdx + << ", " << ZeroReg << ")\n"); + break; + } + case GIR_CopySubReg: { int64_t NewInsnID = MatchTable[CurrentIdx++]; int64_t OldInsnID = MatchTable[CurrentIdx++]; @@ -328,6 +470,7 @@ bool InstructionSelector::executeMatchTable( << SubRegIdx << ")\n"); break; } + case GIR_AddImplicitDef: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t RegNum = MatchTable[CurrentIdx++]; @@ -337,6 +480,7 @@ bool InstructionSelector::executeMatchTable( << "], " << RegNum << ")\n"); break; } + case GIR_AddImplicitUse: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t RegNum = MatchTable[CurrentIdx++]; @@ -346,6 +490,7 @@ bool InstructionSelector::executeMatchTable( << "], " << RegNum << ")\n"); break; } + case GIR_AddRegister: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t RegNum = MatchTable[CurrentIdx++]; @@ -355,6 +500,7 @@ bool InstructionSelector::executeMatchTable( << "], " << RegNum << ")\n"); break; } + case GIR_AddImm: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t Imm = MatchTable[CurrentIdx++]; @@ -364,15 +510,28 @@ bool InstructionSelector::executeMatchTable( << Imm << ")\n"); break; } + case GIR_ComplexRenderer: { int64_t InsnID = MatchTable[CurrentIdx++]; int64_t RendererID = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); - State.Renderers[RendererID](OutMIs[InsnID]); + for (const auto &RenderOpFn : State.Renderers[RendererID]) + RenderOpFn(OutMIs[InsnID]); DEBUG(dbgs() << CurrentIdx << ": GIR_ComplexRenderer(OutMIs[" << InsnID << "], " << RendererID << ")\n"); break; } + case GIR_ComplexSubOperandRenderer: { + int64_t InsnID = MatchTable[CurrentIdx++]; + int64_t RendererID = MatchTable[CurrentIdx++]; + int64_t RenderOpID = MatchTable[CurrentIdx++]; + assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); + State.Renderers[RendererID][RenderOpID](OutMIs[InsnID]); + DEBUG(dbgs() << CurrentIdx << ": GIR_ComplexSubOperandRenderer(OutMIs[" + << InsnID << "], " << RendererID << ", " << RenderOpID + << ")\n"); + break; + } case GIR_CopyConstantAsSImm: { int64_t NewInsnID = MatchTable[CurrentIdx++]; @@ -402,6 +561,7 @@ bool InstructionSelector::executeMatchTable( << "], " << OpIdx << ", " << RCEnum << ")\n"); break; } + case GIR_ConstrainSelectedInstOperands: { int64_t InsnID = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); @@ -412,6 +572,7 @@ bool InstructionSelector::executeMatchTable( << "])\n"); break; } + case GIR_MergeMemOperands: { int64_t InsnID = MatchTable[CurrentIdx++]; assert(OutMIs[InsnID] && "Attempted to add to undefined instruction"); @@ -428,6 +589,7 @@ bool InstructionSelector::executeMatchTable( DEBUG(dbgs() << ")\n"); break; } + case GIR_EraseFromParent: { int64_t InsnID = MatchTable[CurrentIdx++]; assert(State.MIs[InsnID] && diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerCombiner.h b/include/llvm/CodeGen/GlobalISel/LegalizerCombiner.h index 607e86d722668..c22b31302e552 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerCombiner.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerCombiner.h @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -24,10 +25,12 @@ namespace llvm { class LegalizerCombiner { MachineIRBuilder &Builder; MachineRegisterInfo &MRI; + const LegalizerInfo &LI; public: - LegalizerCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI) - : Builder(B), MRI(MRI) {} + LegalizerCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, + const LegalizerInfo &LI) + : Builder(B), MRI(MRI), LI(LI) {} bool tryCombineAnyExt(MachineInstr &MI, SmallVectorImpl &DeadInsts) { @@ -41,9 +44,7 @@ class LegalizerCombiner { Builder.setInstr(MI); // We get a copy/trunc/extend depending on the sizes Builder.buildAnyExtOrTrunc(DstReg, SrcReg); - MI.eraseFromParent(); - if (MRI.use_empty(DefMI->getOperand(0).getReg())) - DeadInsts.push_back(DefMI); + markInstAndDefDead(MI, *DefMI, DeadInsts); return true; } return false; @@ -56,21 +57,22 @@ class LegalizerCombiner { return false; MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg()); if (DefMI->getOpcode() == TargetOpcode::G_TRUNC) { + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (isInstUnsupported(TargetOpcode::G_AND, DstTy) || + isInstUnsupported(TargetOpcode::G_CONSTANT, DstTy)) + return false; DEBUG(dbgs() << ".. Combine MI: " << MI;); Builder.setInstr(MI); - unsigned DstReg = MI.getOperand(0).getReg(); unsigned ZExtSrc = MI.getOperand(1).getReg(); LLT ZExtSrcTy = MRI.getType(ZExtSrc); - LLT DstTy = MRI.getType(DstReg); APInt Mask = APInt::getAllOnesValue(ZExtSrcTy.getSizeInBits()); auto MaskCstMIB = Builder.buildConstant(DstTy, Mask.getZExtValue()); unsigned TruncSrc = DefMI->getOperand(1).getReg(); // We get a copy/trunc/extend depending on the sizes auto SrcCopyOrTrunc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc); Builder.buildAnd(DstReg, SrcCopyOrTrunc, MaskCstMIB); - MI.eraseFromParent(); - if (MRI.use_empty(DefMI->getOperand(0).getReg())) - DeadInsts.push_back(DefMI); + markInstAndDefDead(MI, *DefMI, DeadInsts); return true; } return false; @@ -83,10 +85,13 @@ class LegalizerCombiner { return false; MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg()); if (DefMI->getOpcode() == TargetOpcode::G_TRUNC) { - DEBUG(dbgs() << ".. Combine MI: " << MI;); - Builder.setInstr(MI); unsigned DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); + if (isInstUnsupported(TargetOpcode::G_SHL, DstTy) || + isInstUnsupported(TargetOpcode::G_ASHR, DstTy)) + return false; + DEBUG(dbgs() << ".. Combine MI: " << MI;); + Builder.setInstr(MI); unsigned SExtSrc = MI.getOperand(1).getReg(); LLT SExtSrcTy = MRI.getType(SExtSrc); unsigned SizeDiff = DstTy.getSizeInBits() - SExtSrcTy.getSizeInBits(); @@ -97,9 +102,7 @@ class LegalizerCombiner { auto ShlMIB = Builder.buildInstr(TargetOpcode::G_SHL, DstTy, SrcCopyExtOrTrunc, SizeDiffMIB); Builder.buildInstr(TargetOpcode::G_ASHR, DstReg, ShlMIB, SizeDiffMIB); - MI.eraseFromParent(); - if (MRI.use_empty(DefMI->getOperand(0).getReg())) - DeadInsts.push_back(DefMI); + markInstAndDefDead(MI, *DefMI, DeadInsts); return true; } return false; @@ -175,17 +178,14 @@ class LegalizerCombiner { MergeI->getOperand(Idx + 1).getReg()); } - MI.eraseFromParent(); - if (MRI.use_empty(MergeI->getOperand(0).getReg())) - DeadInsts.push_back(MergeI); + markInstAndDefDead(MI, *MergeI, DeadInsts); return true; } /// Try to combine away MI. /// Returns true if it combined away the MI. - /// Caller should not rely in MI existing as it may be deleted. /// Adds instructions that are dead as a result of the combine - // into DeadInsts + /// into DeadInsts, which can include MI. bool tryCombineInstruction(MachineInstr &MI, SmallVectorImpl &DeadInsts) { switch (MI.getOpcode()) { @@ -201,6 +201,23 @@ class LegalizerCombiner { return tryCombineMerges(MI, DeadInsts); } } + +private: + /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be + /// dead due to MI being killed, then mark DefMI as dead too. + void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, + SmallVectorImpl &DeadInsts) { + DeadInsts.push_back(&MI); + if (MRI.hasOneUse(DefMI.getOperand(0).getReg())) + DeadInsts.push_back(&DefMI); + } + /// Checks if the target legalizer info has specified anything about the + /// instruction, or if unsupported. + bool isInstUnsupported(unsigned Opcode, const LLT &DstTy) const { + auto Action = LI.getAction({Opcode, 0, DstTy}); + return Action.first == LegalizerInfo::LegalizeAction::Unsupported || + Action.first == LegalizerInfo::LegalizeAction::NotFound; + } }; } // namespace llvm diff --git a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 1fd45b52e3ac7..8bd8a9dcd0e24 100644 --- a/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -89,6 +89,9 @@ class LegalizerHelper { /// functions MachineIRBuilder MIRBuilder; + /// Expose LegalizerInfo so the clients can re-use. + const LegalizerInfo &getLegalizerInfo() const { return LI; } + private: /// Helper function to split a wide generic register into bitwise blocks with diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 5534c39a1eafb..5fe3137d6d704 100644 --- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -93,8 +93,7 @@ class MachineIRBuilder { /// Some constructors for easy use. MachineIRBuilder() = default; MachineIRBuilder(MachineFunction &MF) { setMF(MF); } - MachineIRBuilder(MachineInstr &MI) - : MachineIRBuilder(*MI.getParent()->getParent()) { + MachineIRBuilder(MachineInstr &MI) : MachineIRBuilder(*MI.getMF()) { setInstr(MI); } @@ -543,6 +542,10 @@ class MachineIRBuilder { /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildCopy(unsigned Res, unsigned Op); + template + MachineInstrBuilder buildCopy(DstType &&Res, SrcType &&Src) { + return buildCopy(getDestFromArg(Res), getRegFromArg(Src)); + } /// Build and insert `Res = G_LOAD Addr, MMO`. /// @@ -660,6 +663,10 @@ class MachineIRBuilder { /// /// \return The newly created instruction. MachineInstrBuilder buildTrunc(unsigned Res, unsigned Op); + template + MachineInstrBuilder buildTrunc(DstType &&Res, SrcType &&Src) { + return buildTrunc(getDestFromArg(Res), getRegFromArg(Src)); + } /// Build and insert a \p Res = G_ICMP \p Pred, \p Op0, \p Op1 /// diff --git a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h index 60905c7ec226d..02868b220984d 100644 --- a/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h +++ b/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h @@ -407,6 +407,10 @@ class RegisterBankInfo { mutable DenseMap> MapOfInstructionMappings; + /// Getting the minimal register class of a physreg is expensive. + /// Cache this information as we get it. + mutable DenseMap PhysRegMinimalRCs; + /// Create a RegisterBankInfo that can accommodate up to \p NumRegBanks /// RegisterBank instances. RegisterBankInfo(RegisterBank **RegBanks, unsigned NumRegBanks); @@ -427,6 +431,11 @@ class RegisterBankInfo { return *RegBanks[ID]; } + /// Get the MinimalPhysRegClass for Reg. + /// \pre Reg is a physical register. + const TargetRegisterClass & + getMinimalPhysRegClass(unsigned Reg, const TargetRegisterInfo &TRI) const; + /// Try to get the mapping of \p MI. /// See getInstrMapping for more details on what a mapping represents. /// @@ -699,8 +708,8 @@ class RegisterBankInfo { /// virtual register. /// /// \pre \p Reg != 0 (NoRegister). - static unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI); + unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI) const; /// Check that information hold by this instance make sense for the /// given \p TRI. diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index 820e883624837..fbcfeb227f1b4 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -107,6 +107,11 @@ class VirtRegMap; const MachineBlockFrequencyInfo *MBFI, const MachineInstr &Instr); + /// Calculate the spill weight to assign to a single instruction. + static float getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineBasicBlock *MBB); + LiveInterval &getInterval(unsigned Reg) { if (hasInterval(Reg)) return *VirtRegIntervals[Reg]; diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h index 94578e1d2404e..a8ea1407a4e7c 100644 --- a/include/llvm/CodeGen/MIRYamlMapping.h +++ b/include/llvm/CodeGen/MIRYamlMapping.h @@ -214,6 +214,7 @@ struct MachineStackObject { unsigned Alignment = 0; uint8_t StackID = 0; StringValue CalleeSavedRegister; + bool CalleeSavedRestored = true; Optional LocalOffset; StringValue DebugVar; StringValue DebugExpr; @@ -225,6 +226,7 @@ struct MachineStackObject { Alignment == Other.Alignment && StackID == Other.StackID && CalleeSavedRegister == Other.CalleeSavedRegister && + CalleeSavedRestored == Other.CalleeSavedRestored && LocalOffset == Other.LocalOffset && DebugVar == Other.DebugVar && DebugExpr == Other.DebugExpr && DebugLoc == Other.DebugLoc; } @@ -253,6 +255,8 @@ template <> struct MappingTraits { YamlIO.mapOptional("stack-id", Object.StackID); YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored, + true); YamlIO.mapOptional("local-offset", Object.LocalOffset, Optional()); YamlIO.mapOptional("di-variable", Object.DebugVar, StringValue()); // Don't print it out when it's empty. @@ -278,13 +282,15 @@ struct FixedMachineStackObject { bool IsImmutable = false; bool IsAliased = false; StringValue CalleeSavedRegister; + bool CalleeSavedRestored = true; bool operator==(const FixedMachineStackObject &Other) const { return ID == Other.ID && Type == Other.Type && Offset == Other.Offset && Size == Other.Size && Alignment == Other.Alignment && StackID == Other.StackID && IsImmutable == Other.IsImmutable && IsAliased == Other.IsAliased && - CalleeSavedRegister == Other.CalleeSavedRegister; + CalleeSavedRegister == Other.CalleeSavedRegister && + CalleeSavedRestored == Other.CalleeSavedRestored; } }; @@ -313,6 +319,8 @@ template <> struct MappingTraits { } YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister, StringValue()); // Don't print it out when it's empty. + YamlIO.mapOptional("callee-saved-restored", Object.CalleeSavedRestored, + true); } static const bool flow = true; diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index f4aa893c45dcf..7523825285a62 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -139,6 +139,17 @@ class MachineInstr const MachineBasicBlock* getParent() const { return Parent; } MachineBasicBlock* getParent() { return Parent; } + /// Return the function that contains the basic block that this instruction + /// belongs to. + /// + /// Note: this is undefined behaviour if the instruction does not have a + /// parent. + const MachineFunction *getMF() const; + MachineFunction *getMF() { + return const_cast( + static_cast(this)->getMF()); + } + /// Return the asm printer flags bitvector. uint8_t getAsmPrinterFlags() const { return AsmPrinterFlags; } diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h index 34b21ceddd434..6a87fa2fbf009 100644 --- a/include/llvm/CodeGen/MachineModuleInfoImpls.h +++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineModuleInfoImpls.h -------------------*- C++ -*-===// +//===- llvm/CodeGen/MachineModuleInfoImpls.h --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,11 +15,12 @@ #ifndef LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H #define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H -#include "llvm/BinaryFormat/Wasm.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/ValueTypes.h" +#include namespace llvm { + class MCSymbol; /// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation @@ -36,6 +37,7 @@ class MachineModuleInfoMachO : public MachineModuleInfoImpl { DenseMap ThreadLocalGVStubs; virtual void anchor(); // Out of line virtual method. + public: MachineModuleInfoMachO(const MachineModuleInfo &) {} @@ -64,6 +66,7 @@ class MachineModuleInfoELF : public MachineModuleInfoImpl { DenseMap GVStubs; virtual void anchor(); // Out of line virtual method. + public: MachineModuleInfoELF(const MachineModuleInfo &) {} @@ -79,4 +82,4 @@ class MachineModuleInfoELF : public MachineModuleInfoImpl { } // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H diff --git a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h index 6b11c7aea4fe9..887752b6d389c 100644 --- a/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h +++ b/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h @@ -16,7 +16,7 @@ #ifndef LLVM_CODEGEN_MACHINEOPTIMIZATIONREMARKEMITTER_H #define LLVM_CODEGEN_MACHINEOPTIMIZATIONREMARKEMITTER_H -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { @@ -164,6 +164,21 @@ class MachineOptimizationRemarkEmitter { .getDiagHandlerPtr()->isAnyRemarkEnabled(PassName)); } + /// \brief Take a lambda that returns a remark which will be emitted. Second + /// argument is only used to restrict this to functions. + template + void emit(T RemarkBuilder, decltype(RemarkBuilder()) * = nullptr) { + // Avoid building the remark unless we know there are at least *some* + // remarks enabled. We can't currently check whether remarks are requested + // for the calling pass since that requires actually building the remark. + + if (MF.getFunction()->getContext().getDiagnosticsOutputFile() || + MF.getFunction()->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) { + auto R = RemarkBuilder(); + emit((DiagnosticInfoOptimizationBase &)R); + } + } + private: MachineFunction &MF; diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 74fd81c143950..e761ef2f7c33e 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -842,6 +842,10 @@ class MachineRegisterInfo { livein_iterator livein_end() const { return LiveIns.end(); } bool livein_empty() const { return LiveIns.empty(); } + ArrayRef> liveins() const { + return LiveIns; + } + bool isLiveIn(unsigned Reg) const; /// getLiveInPhysReg - If VReg is a live-in virtual register, return the diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 1377a6dd6aa7f..8e6b1570e4a37 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -411,7 +411,7 @@ namespace llvm { /// This pass performs outlining on machine instructions directly before /// printing assembly. - ModulePass *createMachineOutlinerPass(); + ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs = false); /// This pass expands the experimental reduction intrinsics into sequences of /// shuffles. diff --git a/include/llvm/CodeGen/RegisterUsageInfo.h b/include/llvm/CodeGen/RegisterUsageInfo.h index 0a04bc6a89f4d..eabadd8d784a8 100644 --- a/include/llvm/CodeGen/RegisterUsageInfo.h +++ b/include/llvm/CodeGen/RegisterUsageInfo.h @@ -20,6 +20,7 @@ #define LLVM_CODEGEN_PHYSICALREGISTERUSAGEINFO_H #include "llvm/ADT/DenseMap.h" +#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include #include diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 61ae1c91073cb..460e58c9dea0e 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -1286,6 +1286,10 @@ class SelectionDAG { return DbgInfo->ByvalParmDbgEnd(); } + /// To be invoked on an SDNode that is slated to be erased. This + /// function mirrors \c llvm::salvageDebugInfo. + void salvageDebugInfo(SDNode &N); + void dump() const; /// Create a stack temporary, suitable for holding the specified value type. diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 02ea4eacfe8c6..001ac9811692e 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1490,11 +1490,7 @@ class ConstantFPSDNode : public SDNode { /// convenient to write "2.0" and the like. Without this function we'd /// have to duplicate its logic everywhere it's called. bool isExactlyValue(double V) const { - bool ignored; - APFloat Tmp(V); - Tmp.convert(Value->getValueAPF().getSemantics(), - APFloat::rmNearestTiesToEven, &ignored); - return isExactlyValue(Tmp); + return Value->getValueAPF().isExactlyValue(V); } bool isExactlyValue(const APFloat& V) const; diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index a7b16e7a9ed22..3a91e363f9231 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -139,7 +139,7 @@ class raw_ostream; }; /// Construct an invalid index. - SlotIndex() : lie(nullptr, 0) {} + SlotIndex() = default; // Construct a new slot index from the given one, and set the slot. SlotIndex(const SlotIndex &li, Slot s) : lie(li.listEntry(), unsigned(s)) { diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index d0d1e0985ccae..9bfe0891916c6 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -113,6 +113,12 @@ /* Define to 1 if you have the `pthread' library (-lpthread). */ #cmakedefine HAVE_LIBPTHREAD ${HAVE_LIBPTHREAD} +/* Define to 1 if you have the `pthread_getname_np' function. */ +#cmakedefine HAVE_PTHREAD_GETNAME_NP ${HAVE_PTHREAD_GETNAME_NP} + +/* Define to 1 if you have the `pthread_setname_np' function. */ +#cmakedefine HAVE_PTHREAD_SETNAME_NP ${HAVE_PTHREAD_SETNAME_NP} + /* Define to 1 if you have the `shell32' library (-lshell32). */ #cmakedefine HAVE_LIBSHELL32 ${HAVE_LIBSHELL32} @@ -185,6 +191,12 @@ /* Define to 1 if you have the `setenv' function. */ #cmakedefine HAVE_SETENV ${HAVE_SETENV} +/* Define to 1 if you have the `sched_getaffinity' function. */ +#cmakedefine HAVE_SCHED_GETAFFINITY ${HAVE_SCHED_GETAFFINITY} + +/* Define to 1 if you have the `CPU_COUNT' macro. */ +#cmakedefine HAVE_CPU_COUNT ${HAVE_CPU_COUNT} + /* Define to 1 if you have the `setrlimit' function. */ #cmakedefine HAVE_SETRLIMIT ${HAVE_SETRLIMIT} diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h index 08874b16ed010..1a4f510c24abe 100644 --- a/include/llvm/DebugInfo/CodeView/CodeView.h +++ b/include/llvm/DebugInfo/CodeView/CodeView.h @@ -505,55 +505,9 @@ enum class FrameCookieKind : uint8_t { // Corresponds to CV_HREG_e enum. enum class RegisterId : uint16_t { - Unknown = 0, - VFrame = 30006, - AL = 1, - CL = 2, - DL = 3, - BL = 4, - AH = 5, - CH = 6, - DH = 7, - BH = 8, - AX = 9, - CX = 10, - DX = 11, - BX = 12, - SP = 13, - BP = 14, - SI = 15, - DI = 16, - EAX = 17, - ECX = 18, - EDX = 19, - EBX = 20, - ESP = 21, - EBP = 22, - ESI = 23, - EDI = 24, - ES = 25, - CS = 26, - SS = 27, - DS = 28, - FS = 29, - GS = 30, - IP = 31, - RAX = 328, - RBX = 329, - RCX = 330, - RDX = 331, - RSI = 332, - RDI = 333, - RBP = 334, - RSP = 335, - R8 = 336, - R9 = 337, - R10 = 338, - R11 = 339, - R12 = 340, - R13 = 341, - R14 = 342, - R15 = 343, +#define CV_REGISTER(name, value) name = value, +#include "CodeViewRegisters.def" +#undef CV_REGISTER }; /// These values correspond to the THUNK_ORDINAL enumeration. diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def new file mode 100644 index 0000000000000..3f06602948668 --- /dev/null +++ b/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def @@ -0,0 +1,268 @@ +//===-- CodeViewRegisters.def - CodeView registers --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// See CV_HREG_e in cvconst.h. This should match the constants there. +// +//===----------------------------------------------------------------------===// + +#ifndef CV_REGISTER +#define CV_REGISTER(name, value) +#endif + +// This currently only contains the "register subset shraed by all processor +// types" (ERR etc.) and the x86 registers. + +CV_REGISTER(ERR, 30000) +CV_REGISTER(TEB, 30001) +CV_REGISTER(TIMER, 30002) +CV_REGISTER(EFAD1, 30003) +CV_REGISTER(EFAD2, 30004) +CV_REGISTER(EFAD3, 30005) +CV_REGISTER(VFRAME, 30006) +CV_REGISTER(HANDLE, 30007) +CV_REGISTER(PARAMS, 30008) +CV_REGISTER(LOCALS, 30009) +CV_REGISTER(TID, 30010) +CV_REGISTER(ENV, 30011) +CV_REGISTER(CMDLN, 30012) + +CV_REGISTER(NONE, 0) +CV_REGISTER(AL, 1) +CV_REGISTER(CL, 2) +CV_REGISTER(DL, 3) +CV_REGISTER(BL, 4) +CV_REGISTER(AH, 5) +CV_REGISTER(CH, 6) +CV_REGISTER(DH, 7) +CV_REGISTER(BH, 8) +CV_REGISTER(AX, 9) +CV_REGISTER(CX, 10) +CV_REGISTER(DX, 11) +CV_REGISTER(BX, 12) +CV_REGISTER(SP, 13) +CV_REGISTER(BP, 14) +CV_REGISTER(SI, 15) +CV_REGISTER(DI, 16) +CV_REGISTER(EAX, 17) +CV_REGISTER(ECX, 18) +CV_REGISTER(EDX, 19) +CV_REGISTER(EBX, 20) +CV_REGISTER(ESP, 21) +CV_REGISTER(EBP, 22) +CV_REGISTER(ESI, 23) +CV_REGISTER(EDI, 24) +CV_REGISTER(ES, 25) +CV_REGISTER(CS, 26) +CV_REGISTER(SS, 27) +CV_REGISTER(DS, 28) +CV_REGISTER(FS, 29) +CV_REGISTER(GS, 30) +CV_REGISTER(IP, 31) +CV_REGISTER(FLAGS, 32) +CV_REGISTER(EIP, 33) +CV_REGISTER(EFLAGS, 34) +CV_REGISTER(TEMP, 40) +CV_REGISTER(TEMPH, 41) +CV_REGISTER(QUOTE, 42) +CV_REGISTER(PCDR3, 43) +CV_REGISTER(PCDR4, 44) +CV_REGISTER(PCDR5, 45) +CV_REGISTER(PCDR6, 46) +CV_REGISTER(PCDR7, 47) +CV_REGISTER(CR0, 80) +CV_REGISTER(CR1, 81) +CV_REGISTER(CR2, 82) +CV_REGISTER(CR3, 83) +CV_REGISTER(CR4, 84) +CV_REGISTER(DR0, 90) +CV_REGISTER(DR1, 91) +CV_REGISTER(DR2, 92) +CV_REGISTER(DR3, 93) +CV_REGISTER(DR4, 94) +CV_REGISTER(DR5, 95) +CV_REGISTER(DR6, 96) +CV_REGISTER(DR7, 97) +CV_REGISTER(GDTR, 110) +CV_REGISTER(GDTL, 111) +CV_REGISTER(IDTR, 112) +CV_REGISTER(IDTL, 113) +CV_REGISTER(LDTR, 114) +CV_REGISTER(TR, 115) + +CV_REGISTER(PSEUDO1, 116) +CV_REGISTER(PSEUDO2, 117) +CV_REGISTER(PSEUDO3, 118) +CV_REGISTER(PSEUDO4, 119) +CV_REGISTER(PSEUDO5, 120) +CV_REGISTER(PSEUDO6, 121) +CV_REGISTER(PSEUDO7, 122) +CV_REGISTER(PSEUDO8, 123) +CV_REGISTER(PSEUDO9, 124) + +CV_REGISTER(ST0, 128) +CV_REGISTER(ST1, 129) +CV_REGISTER(ST2, 130) +CV_REGISTER(ST3, 131) +CV_REGISTER(ST4, 132) +CV_REGISTER(ST5, 133) +CV_REGISTER(ST6, 134) +CV_REGISTER(ST7, 135) +CV_REGISTER(CTRL, 136) +CV_REGISTER(STAT, 137) +CV_REGISTER(TAG, 138) +CV_REGISTER(FPIP, 139) +CV_REGISTER(FPCS, 140) +CV_REGISTER(FPDO, 141) +CV_REGISTER(FPDS, 142) +CV_REGISTER(ISEM, 143) +CV_REGISTER(FPEIP, 144) +CV_REGISTER(FPEDO, 145) + +CV_REGISTER(MM0, 146) +CV_REGISTER(MM1, 147) +CV_REGISTER(MM2, 148) +CV_REGISTER(MM3, 149) +CV_REGISTER(MM4, 150) +CV_REGISTER(MM5, 151) +CV_REGISTER(MM6, 152) +CV_REGISTER(MM7, 153) + +CV_REGISTER(XMM0, 154) +CV_REGISTER(XMM1, 155) +CV_REGISTER(XMM2, 156) +CV_REGISTER(XMM3, 157) +CV_REGISTER(XMM4, 158) +CV_REGISTER(XMM5, 159) +CV_REGISTER(XMM6, 160) +CV_REGISTER(XMM7, 161) + +CV_REGISTER(MXCSR, 211) + +CV_REGISTER(EDXEAX, 212) + +CV_REGISTER(EMM0L, 220) +CV_REGISTER(EMM1L, 221) +CV_REGISTER(EMM2L, 222) +CV_REGISTER(EMM3L, 223) +CV_REGISTER(EMM4L, 224) +CV_REGISTER(EMM5L, 225) +CV_REGISTER(EMM6L, 226) +CV_REGISTER(EMM7L, 227) + +CV_REGISTER(EMM0H, 228) +CV_REGISTER(EMM1H, 229) +CV_REGISTER(EMM2H, 230) +CV_REGISTER(EMM3H, 231) +CV_REGISTER(EMM4H, 232) +CV_REGISTER(EMM5H, 233) +CV_REGISTER(EMM6H, 234) +CV_REGISTER(EMM7H, 235) + +CV_REGISTER(MM00, 236) +CV_REGISTER(MM01, 237) +CV_REGISTER(MM10, 238) +CV_REGISTER(MM11, 239) +CV_REGISTER(MM20, 240) +CV_REGISTER(MM21, 241) +CV_REGISTER(MM30, 242) +CV_REGISTER(MM31, 243) +CV_REGISTER(MM40, 244) +CV_REGISTER(MM41, 245) +CV_REGISTER(MM50, 246) +CV_REGISTER(MM51, 247) +CV_REGISTER(MM60, 248) +CV_REGISTER(MM61, 249) +CV_REGISTER(MM70, 250) +CV_REGISTER(MM71, 251) + +CV_REGISTER(BND0, 396) +CV_REGISTER(BND1, 397) +CV_REGISTER(BND2, 398) + + +CV_REGISTER(XMM8, 252) +CV_REGISTER(XMM9, 253) +CV_REGISTER(XMM10, 254) +CV_REGISTER(XMM11, 255) +CV_REGISTER(XMM12, 256) +CV_REGISTER(XMM13, 257) +CV_REGISTER(XMM14, 258) +CV_REGISTER(XMM15, 259) + + +CV_REGISTER(SIL, 324) +CV_REGISTER(DIL, 325) +CV_REGISTER(BPL, 326) +CV_REGISTER(SPL, 327) + +CV_REGISTER(RAX, 328) +CV_REGISTER(RBX, 329) +CV_REGISTER(RCX, 330) +CV_REGISTER(RDX, 331) +CV_REGISTER(RSI, 332) +CV_REGISTER(RDI, 333) +CV_REGISTER(RBP, 334) +CV_REGISTER(RSP, 335) + +CV_REGISTER(R8, 336) +CV_REGISTER(R9, 337) +CV_REGISTER(R10, 338) +CV_REGISTER(R11, 339) +CV_REGISTER(R12, 340) +CV_REGISTER(R13, 341) +CV_REGISTER(R14, 342) +CV_REGISTER(R15, 343) + +CV_REGISTER(R8B, 344) +CV_REGISTER(R9B, 345) +CV_REGISTER(R10B, 346) +CV_REGISTER(R11B, 347) +CV_REGISTER(R12B, 348) +CV_REGISTER(R13B, 349) +CV_REGISTER(R14B, 350) +CV_REGISTER(R15B, 351) + +CV_REGISTER(R8W, 352) +CV_REGISTER(R9W, 353) +CV_REGISTER(R10W, 354) +CV_REGISTER(R11W, 355) +CV_REGISTER(R12W, 356) +CV_REGISTER(R13W, 357) +CV_REGISTER(R14W, 358) +CV_REGISTER(R15W, 359) + +CV_REGISTER(R8D, 360) +CV_REGISTER(R9D, 361) +CV_REGISTER(R10D, 362) +CV_REGISTER(R11D, 363) +CV_REGISTER(R12D, 364) +CV_REGISTER(R13D, 365) +CV_REGISTER(R14D, 366) +CV_REGISTER(R15D, 367) + + +// cvconst.h defines both CV_REG_YMM0 (252) and CV_AMD64_YMM0 (368). Keep the +// original prefix to distinguish them. + +CV_REGISTER(AMD64_YMM0, 368) +CV_REGISTER(AMD64_YMM1, 369) +CV_REGISTER(AMD64_YMM2, 370) +CV_REGISTER(AMD64_YMM3, 371) +CV_REGISTER(AMD64_YMM4, 372) +CV_REGISTER(AMD64_YMM5, 373) +CV_REGISTER(AMD64_YMM6, 374) +CV_REGISTER(AMD64_YMM7, 375) +CV_REGISTER(AMD64_YMM8, 376) +CV_REGISTER(AMD64_YMM9, 377) +CV_REGISTER(AMD64_YMM10, 378) +CV_REGISTER(AMD64_YMM11, 379) +CV_REGISTER(AMD64_YMM12, 380) +CV_REGISTER(AMD64_YMM13, 381) +CV_REGISTER(AMD64_YMM14, 382) +CV_REGISTER(AMD64_YMM15, 383) diff --git a/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def b/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def index f6b1b54d8630c..41c5380767983 100644 --- a/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def +++ b/include/llvm/DebugInfo/CodeView/CodeViewSymbols.def @@ -1,4 +1,4 @@ -//===-- CVLeafTypes.def - All CodeView leaf types ---------------*- C++ -*-===// +//===-- CodeViewSymbols.def - All CodeView leaf types -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -184,7 +184,8 @@ CV_SYMBOL(S_LDATA_HLSL32 , 0x1163) CV_SYMBOL(S_GDATA_HLSL32_EX, 0x1164) CV_SYMBOL(S_LDATA_HLSL32_EX, 0x1165) -CV_SYMBOL(S_FASTLINK, 0x1167) +CV_SYMBOL(S_FASTLINK, 0x1167) // Undocumented +SYMBOL_RECORD_ALIAS(S_INLINEES, 0x1168, InlineesSym, CallerSym) // Undocumented // Known symbol types SYMBOL_RECORD(S_END , 0x0006, ScopeEndSym) @@ -234,7 +235,7 @@ SYMBOL_RECORD(S_HEAPALLOCSITE , 0x115e, HeapAllocationSiteSym) SYMBOL_RECORD(S_FRAMECOOKIE , 0x113a, FrameCookieSym) SYMBOL_RECORD(S_CALLEES , 0x115a, CallerSym) -SYMBOL_RECORD_ALIAS(S_CALLERS , 0x115b, CalleeSym, CallerSym) +SYMBOL_RECORD_ALIAS(S_CALLERS, 0x115b, CalleeSym, CallerSym) SYMBOL_RECORD(S_UDT , 0x1108, UDTSym) SYMBOL_RECORD_ALIAS(S_COBOLUDT , 0x1109, CobolUDT, UDTSym) diff --git a/include/llvm/DebugInfo/CodeView/CodeViewTypes.def b/include/llvm/DebugInfo/CodeView/CodeViewTypes.def index 8c193bb13cb7e..69ce9606a670f 100644 --- a/include/llvm/DebugInfo/CodeView/CodeViewTypes.def +++ b/include/llvm/DebugInfo/CodeView/CodeViewTypes.def @@ -1,5 +1,4 @@ - -//===-- CVLeafTypes.def - All CodeView leaf types ---------------*- C++ -*-===// +//===-- CodeViewTypes.def - All CodeView leaf types -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h index d5ff7cb35bf7e..4a368bec85cd9 100644 --- a/include/llvm/DebugInfo/DIContext.h +++ b/include/llvm/DebugInfo/DIContext.h @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -26,8 +27,6 @@ namespace llvm { -class raw_ostream; - /// A format-neutral container for source line information. struct DILineInfo { std::string FileName; @@ -46,15 +45,30 @@ struct DILineInfo { FileName == RHS.FileName && FunctionName == RHS.FunctionName && StartLine == RHS.StartLine && Discriminator == RHS.Discriminator; } + bool operator!=(const DILineInfo &RHS) const { return !(*this == RHS); } + bool operator<(const DILineInfo &RHS) const { return std::tie(FileName, FunctionName, Line, Column, StartLine, Discriminator) < std::tie(RHS.FileName, RHS.FunctionName, RHS.Line, RHS.Column, RHS.StartLine, RHS.Discriminator); } + + explicit operator bool() const { return *this != DILineInfo(); } + + void dump(raw_ostream &OS) { + OS << "Line info: "; + if (FileName != "") + OS << "file '" << FileName << "', "; + if (FunctionName != "") + OS << "function '" << FunctionName << "', "; + OS << "line " << Line << ", "; + OS << "column " << Column << ", "; + OS << "start line " << StartLine << '\n'; + } }; using DILineInfoTable = SmallVector, 16>; @@ -141,6 +155,7 @@ struct DIDumpOptions { unsigned RecurseDepth = -1U; bool ShowChildren = false; bool ShowParents = false; + bool ShowForm = false; bool SummarizeTypes = false; bool Verbose = false; diff --git a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h index 5c304340c13c8..e8abd3151e55d 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h +++ b/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h @@ -13,7 +13,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" -#include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include #include @@ -21,6 +21,9 @@ namespace llvm { class raw_ostream; +/// This implements the Apple accelerator table format, a precursor of the +/// DWARF 5 accelerator table format. +/// TODO: Factor out a common base class for both formats. class DWARFAcceleratorTable { struct Header { uint32_t Magic; @@ -43,8 +46,46 @@ class DWARFAcceleratorTable { struct HeaderData HdrData; DWARFDataExtractor AccelSection; DataExtractor StringSection; + bool IsValid = false; public: + /// An iterator for the entries associated with one key. Each entry can have + /// multiple DWARFFormValues. + class ValueIterator : public std::iterator> { + const DWARFAcceleratorTable *AccelTable = nullptr; + SmallVector AtomForms; ///< The decoded data entry. + + unsigned DataOffset = 0; ///< Offset into the section. + unsigned Data = 0; ///< Current data entry. + unsigned NumData = 0; ///< Number of data entries. + + /// Advance the iterator. + void Next(); + public: + /// Construct a new iterator for the entries at \p DataOffset. + ValueIterator(const DWARFAcceleratorTable &AccelTable, unsigned DataOffset); + /// End marker. + ValueIterator() = default; + + const ArrayRef operator*() const { + return AtomForms; + } + ValueIterator &operator++() { Next(); return *this; } + ValueIterator operator++(int) { + ValueIterator I = *this; + Next(); + return I; + } + friend bool operator==(const ValueIterator &A, const ValueIterator &B) { + return A.NumData == B.NumData && A.DataOffset == B.DataOffset; + } + friend bool operator!=(const ValueIterator &A, const ValueIterator &B) { + return !(A == B); + } + }; + + DWARFAcceleratorTable(const DWARFDataExtractor &AccelSection, DataExtractor StringSection) : AccelSection(AccelSection), StringSection(StringSection) {} @@ -67,6 +108,9 @@ class DWARFAcceleratorTable { /// DieTag is the tag of the DIE std::pair readAtoms(uint32_t &HashDataOffset); void dump(raw_ostream &OS) const; + + /// Look up all entries in the accelerator table matching \c Key. + iterator_range equal_range(StringRef Key) const; }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h index 7701f4ab6213a..2ddbc4b91ba2e 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" @@ -68,6 +69,10 @@ class DWARFContext : public DIContext { std::unique_ptr DebugFrame; std::unique_ptr EHFrame; std::unique_ptr Macro; + std::unique_ptr AppleNames; + std::unique_ptr AppleTypes; + std::unique_ptr AppleNamespaces; + std::unique_ptr AppleObjC; DWARFUnitSection DWOCUs; std::deque> DWOTUs; @@ -237,9 +242,33 @@ class DWARFContext : public DIContext { /// Get a pointer to the parsed DebugMacro object. const DWARFDebugMacro *getDebugMacro(); + /// Get a reference to the parsed accelerator table object. + const DWARFAcceleratorTable &getAppleNames(); + + /// Get a reference to the parsed accelerator table object. + const DWARFAcceleratorTable &getAppleTypes(); + + /// Get a reference to the parsed accelerator table object. + const DWARFAcceleratorTable &getAppleNamespaces(); + + /// Get a reference to the parsed accelerator table object. + const DWARFAcceleratorTable &getAppleObjC(); + /// Get a pointer to a parsed line table corresponding to a compile unit. const DWARFDebugLine::LineTable *getLineTableForUnit(DWARFUnit *cu); + /// Wraps the returned DIEs for a given address. + struct DIEsForAddress { + DWARFCompileUnit *CompileUnit = nullptr; + DWARFDie FunctionDIE; + DWARFDie BlockDIE; + explicit operator bool() const { return CompileUnit != nullptr; } + }; + + /// Get the compilation unit, the function DIE and lexical block DIE for the + /// given address where applicable. + DIEsForAddress getDIEsForAddress(uint64_t Address); + DILineInfo getLineInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; DILineInfoTable getLineInfoForAddressRange(uint64_t Address, uint64_t Size, diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index 0d97c2169e898..f9ec96366a538 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -57,6 +57,8 @@ static inline bool operator<(const DWARFAddressRange &LHS, return std::tie(LHS.LowPC, LHS.HighPC) < std::tie(RHS.LowPC, RHS.HighPC); } +raw_ostream &operator<<(raw_ostream &OS, const DWARFAddressRange &R); + /// DWARFAddressRangesVector - represents a set of absolute address ranges. using DWARFAddressRangesVector = std::vector; diff --git a/include/llvm/DebugInfo/DWARF/DWARFDie.h b/include/llvm/DebugInfo/DWARF/DWARFDie.h index 9cb067a01298f..75fc5995c5b22 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -108,11 +108,7 @@ class DWARFDie { /// /// \returns a valid DWARFDie instance if this object has children or an /// invalid DWARFDie instance if it doesn't. - DWARFDie getFirstChild() const { - if (isValid() && Die->hasChildren()) - return DWARFDie(U, Die + 1); - return DWARFDie(); - } + DWARFDie getFirstChild() const; /// Dump the DIE and all of its attributes to the supplied stream. /// diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h index 0df5c16e4a23f..e9178e03fa8a2 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -285,12 +285,21 @@ class DWARFUnit { uint8_t getUnitType() const { return UnitType; } - static bool isValidUnitType(uint8_t UnitType) { - return UnitType == dwarf::DW_UT_compile || UnitType == dwarf::DW_UT_type || - UnitType == dwarf::DW_UT_partial || - UnitType == dwarf::DW_UT_skeleton || - UnitType == dwarf::DW_UT_split_compile || - UnitType == dwarf::DW_UT_split_type; + static bool isMatchingUnitTypeAndTag(uint8_t UnitType, dwarf::Tag Tag) { + switch (UnitType) { + case dwarf::DW_UT_compile: + return Tag == dwarf::DW_TAG_compile_unit; + case dwarf::DW_UT_type: + return Tag == dwarf::DW_TAG_type_unit; + case dwarf::DW_UT_partial: + return Tag == dwarf::DW_TAG_partial_unit; + case dwarf::DW_UT_skeleton: + return Tag == dwarf::DW_TAG_skeleton_unit; + case dwarf::DW_UT_split_compile: + case dwarf::DW_UT_split_type: + return dwarf::isUnitType(Tag); + } + return false; } /// \brief Return the number of bytes for the header of a unit of @@ -329,6 +338,11 @@ class DWARFUnit { void collectAddressRanges(DWARFAddressRangesVector &CURanges); + /// Returns subprogram DIE with address range encompassing the provided + /// address. The pointer is alive as long as parsed compile unit DIEs are not + /// cleared. + DWARFDie getSubroutineForAddress(uint64_t Address); + /// getInlinedChainForAddress - fetches inlined chain for a given address. /// Returns empty chain if there is no subprogram containing address. The /// chain is valid as long as parsed compile unit DIEs are not cleared. @@ -363,6 +377,7 @@ class DWARFUnit { DWARFDie getParent(const DWARFDebugInfoEntry *Die); DWARFDie getSibling(const DWARFDebugInfoEntry *Die); + DWARFDie getFirstChild(const DWARFDebugInfoEntry *Die); /// \brief Return the DIE object for a given offset inside the /// unit's DIE vector. @@ -411,11 +426,6 @@ class DWARFUnit { /// parseDWO - Parses .dwo file for current compile unit. Returns true if /// it was actually constructed. bool parseDWO(); - - /// getSubroutineForAddress - Returns subprogram DIE with address range - /// encompassing the provided address. The pointer is alive as long as parsed - /// compile unit DIEs are not cleared. - DWARFDie getSubroutineForAddress(uint64_t Address); }; } // end namespace llvm diff --git a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h index b4add789b1f15..0d920abe32315 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFVerifier.h +++ b/include/llvm/DebugInfo/DWARF/DWARFVerifier.h @@ -96,6 +96,10 @@ class DWARFVerifier { std::map> ReferenceToDIEOffsets; uint32_t NumDebugLineErrors = 0; + raw_ostream &error() const; + raw_ostream &warn() const; + raw_ostream ¬e() const; + /// Verifies the abbreviations section. /// /// This function currently checks that: @@ -132,8 +136,22 @@ class DWARFVerifier { uint32_t *Offset, unsigned UnitIndex, uint8_t &UnitType, bool &isUnitDWARF64); - - bool verifyUnitContents(DWARFUnit Unit); + /// Verifies the header of a unit in the .debug_info section. + /// + /// This function currently verifies: + /// - The debug info attributes. + /// - The debug info form=s. + /// - The presence of a root DIE. + /// - That the root DIE is a unit DIE. + /// - If a unit type is provided, that the unit DIE matches the unit type. + /// - The DIE ranges. + /// + /// \param Unit The DWARF Unit to verifiy. + /// \param UnitType An optional unit type which will be used to verify the + /// type of the unit DIE. + /// + /// \returns true if the content is verified successfully, false otherwise. + bool verifyUnitContents(DWARFUnit Unit, uint8_t UnitType = 0); /// Verify that all Die ranges are valid. /// diff --git a/include/llvm/DebugInfo/PDB/Native/NativeSession.h b/include/llvm/DebugInfo/PDB/Native/NativeSession.h index 770673115506b..c2344d5648e35 100644 --- a/include/llvm/DebugInfo/PDB/Native/NativeSession.h +++ b/include/llvm/DebugInfo/PDB/Native/NativeSession.h @@ -31,7 +31,7 @@ class NativeSession : public IPDBSession { std::unique_ptr Allocator); ~NativeSession() override; - static Error createFromPdb(StringRef Path, + static Error createFromPdb(std::unique_ptr MB, std::unique_ptr &Session); static Error createFromExe(StringRef Path, std::unique_ptr &Session); diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index 70ee843095f28..77c23b46d3201 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -541,6 +541,7 @@ class EngineBuilder { SmallVector MAttrs; bool VerifyModules; bool UseOrcMCJITReplacement; + bool EmulatedTLS = true; public: /// Default constructor for EngineBuilder. @@ -641,6 +642,10 @@ class EngineBuilder { this->UseOrcMCJITReplacement = UseOrcMCJITReplacement; } + void setEmulatedTLS(bool EmulatedTLS) { + this->EmulatedTLS = EmulatedTLS; + } + TargetMachine *selectTarget(); /// selectTarget - Pick a target either via -march or by guessing the native diff --git a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h index 633713c38ae48..246c57341f359 100644 --- a/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h +++ b/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -99,8 +99,9 @@ class RTDyldObjectLinkingLayer : public RTDyldObjectLinkingLayerBase { using RTDyldObjectLinkingLayerBase::ObjectPtr; /// @brief Functor for receiving object-loaded notifications. - using NotifyLoadedFtor = std::function; + using NotifyLoadedFtor = + std::function; /// @brief Functor for receiving finalization notifications. using NotifyFinalizedFtor = std::function; diff --git a/include/llvm/FuzzMutate/FuzzerCLI.h b/include/llvm/FuzzMutate/FuzzerCLI.h index 83c8356247e18..756c744018d00 100644 --- a/include/llvm/FuzzMutate/FuzzerCLI.h +++ b/include/llvm/FuzzMutate/FuzzerCLI.h @@ -15,6 +15,7 @@ #ifndef LLVM_FUZZMUTATE_FUZZER_CLI_H #define LLVM_FUZZMUTATE_FUZZER_CLI_H +#include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -24,6 +25,17 @@ namespace llvm { /// This handles all arguments after -ignore_remaining_args=1 as cl::opts. void parseFuzzerCLOpts(int ArgC, char *ArgV[]); +/// Handle backend options that are encoded in the executable name. +/// +/// Parses some common backend options out of a specially crafted executable +/// name (argv[0]). For example, a name like llvm-foo-fuzzer--aarch64-gisel +/// might set up an AArch64 triple and the Global ISel selector. This should be +/// called *before* parseFuzzerCLOpts if calling both. +/// +/// This is meant to be used for environments like OSS-Fuzz that aren't capable +/// of passing in command line arguments in the normal way. +void handleExecNameEncodedBEOpts(StringRef ExecName); + using FuzzerTestFun = int (*)(const uint8_t *Data, size_t Size); using FuzzerInitFun = int (*)(int *argc, char ***argv); diff --git a/include/llvm/IR/AutoUpgrade.h b/include/llvm/IR/AutoUpgrade.h index b42a3d3ad9550..3f406f0cf1969 100644 --- a/include/llvm/IR/AutoUpgrade.h +++ b/include/llvm/IR/AutoUpgrade.h @@ -51,6 +51,8 @@ namespace llvm { /// module is modified. bool UpgradeModuleFlags(Module &M); + void UpgradeSectionAttributes(Module &M); + /// If the given TBAA tag uses the scalar TBAA format, create a new node /// corresponding to the upgrade to the struct-path aware TBAA format. /// Otherwise return the \p TBAANode itself. diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index 42c25e25c1cf2..f1af2e436631e 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -62,7 +62,7 @@ class CallSiteBase { protected: PointerIntPair I; - CallSiteBase() : I(nullptr, false) {} + CallSiteBase() = default; CallSiteBase(CallTy *CI) : I(CI, true) { assert(CI); } CallSiteBase(InvokeTy *II) : I(II, false) { assert(II); } explicit CallSiteBase(ValTy *II) { *this = get(II); } @@ -110,12 +110,12 @@ class CallSiteBase { /// Return true if the callsite is an indirect call. bool isIndirectCall() const { - Value *V = getCalledValue(); + const Value *V = getCalledValue(); if (!V) return false; if (isa(V) || isa(V)) return false; - if (CallInst *CI = dyn_cast(getInstruction())) { + if (const CallInst *CI = dyn_cast(getInstruction())) { if (CI->isInlineAsm()) return false; } diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index 850964afc3076..84fe836adc353 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -183,16 +183,18 @@ namespace CallingConv { /// which have an "optimized" convention to preserve registers. AVR_BUILTIN = 86, - /// Calling convention used for Mesa vertex shaders. + /// Calling convention used for Mesa vertex shaders, or AMDPAL last shader + /// stage before rasterization (vertex shader if tessellation and geometry + /// are not in use, or otherwise copy shader if one is needed). AMDGPU_VS = 87, - /// Calling convention used for Mesa geometry shaders. + /// Calling convention used for Mesa/AMDPAL geometry shaders. AMDGPU_GS = 88, - /// Calling convention used for Mesa pixel shaders. + /// Calling convention used for Mesa/AMDPAL pixel shaders. AMDGPU_PS = 89, - /// Calling convention used for Mesa compute shaders. + /// Calling convention used for Mesa/AMDPAL compute shaders. AMDGPU_CS = 90, /// Calling convention for AMDGPU code object kernels. @@ -201,14 +203,23 @@ namespace CallingConv { /// Register calling convention used for parameters transfer optimization X86_RegCall = 92, - /// Calling convention used for Mesa hull shaders. (= tessellation control - /// shaders) + /// Calling convention used for Mesa/AMDPAL hull shaders (= tessellation + /// control shaders). AMDGPU_HS = 93, /// Calling convention used for special MSP430 rtlib functions /// which have an "optimized" convention using additional registers. MSP430_BUILTIN = 94, + /// Calling convention used for AMDPAL vertex shader if tessellation is in + /// use. + AMDGPU_LS = 95, + + /// Calling convention used for AMDPAL shader stage before geometry shader + /// if geometry is in use. So either the domain (= tessellation evaluation) + /// shader if tessellation is in use, or otherwise the vertex shader. + AMDGPU_ES = 96, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h index dd6cc44c9465d..eac48d9f727be 100644 --- a/include/llvm/IR/DIBuilder.h +++ b/include/llvm/IR/DIBuilder.h @@ -74,6 +74,17 @@ namespace llvm { /// Create an \a temporary node and track it in \a UnresolvedNodes. void trackIfUnresolved(MDNode *N); + /// Internal helper for insertDeclare. + Instruction *insertDeclare(llvm::Value *Storage, DILocalVariable *VarInfo, + DIExpression *Expr, const DILocation *DL, + BasicBlock *InsertBB, Instruction *InsertBefore); + + /// Internal helper for insertDbgValueIntrinsic. + Instruction * + insertDbgValueIntrinsic(llvm::Value *Val, DILocalVariable *VarInfo, + DIExpression *Expr, const DILocation *DL, + BasicBlock *InsertBB, Instruction *InsertBefore); + public: /// Construct a builder for a module. /// diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index e18395781e98d..bee8cf8a39d9d 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -1417,11 +1417,15 @@ class DILocation : public MDNode { /// could create a location with a new discriminator. If they are from /// different files/lines the location is ambiguous and can't be /// represented in a single line entry. In this case, no location - /// should be set. + /// should be set, unless the merged instruction is a call, which we will + /// set the merged debug location as line 0 of the nearest common scope + /// where 2 locations are inlined from. This only applies to Instruction, + /// For MachineInstruction, as it is post-inline, we will treat the call + /// instruction the same way as other instructions. /// - /// Currently the function does not create a new location. If the locations - /// are the same, or cannot be discriminated, the first location is returned. - /// Otherwise an empty location will be used. + /// This should only be used by MachineInstruction because call can be + /// treated the same as other instructions. Otherwise, use + /// \p applyMergedLocation instead. static const DILocation *getMergedLocation(const DILocation *LocA, const DILocation *LocB) { if (LocA && LocB && (LocA == LocB || !LocA->canDiscriminate(*LocB))) diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index 1e9bcb67e2848..020b67d6b7110 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -987,6 +987,12 @@ class DiagnosticInfoUnsupported : public DiagnosticInfoWithLocationBase { void print(DiagnosticPrinter &DP) const override; }; +namespace yaml { +template <> struct MappingTraits { + static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag); +}; +} // namespace yaml + } // end namespace llvm #endif // LLVM_IR_DIAGNOSTICINFO_H diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h index 59874b05b0cef..1519a45d59e91 100644 --- a/include/llvm/IR/InlineAsm.h +++ b/include/llvm/IR/InlineAsm.h @@ -101,7 +101,7 @@ class InlineAsm final : public Value { /// input constraint is required to match it (e.g. "0"). The value is the /// constraint number that matches this one (for example, if this is /// constraint #0 and constraint #4 has the value "0", this will be 4). - signed char MatchingInput = -1; + int MatchingInput = -1; /// Code - The constraint code, either the register name (in braces) or the /// constraint letter/number. @@ -128,7 +128,7 @@ class InlineAsm final : public Value { /// input constraint is required to match it (e.g. "0"). The value is the /// constraint number that matches this one (for example, if this is /// constraint #0 and constraint #4 has the value "0", this will be 4). - signed char MatchingInput = -1; + int MatchingInput = -1; /// hasMatchingInput - Return true if this is an output constraint that has /// a matching input constraint. diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index d749077fd34a1..063e4baef4608 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -775,28 +775,21 @@ class CastInst : public UnaryInstruction { /// A no-op cast is one that can be effected without changing any bits. /// It implies that the source and destination types are the same size. The - /// IntPtrTy argument is used to make accurate determinations for casts + /// DataLayout argument is to determine the pointer size when examining casts /// involving Integer and Pointer types. They are no-op casts if the integer /// is the same size as the pointer. However, pointer size varies with - /// platform. Generally, the result of DataLayout::getIntPtrType() should be - /// passed in. If that's not available, use Type::Int64Ty, which will make - /// the isNoopCast call conservative. + /// platform. /// @brief Determine if the described cast is a no-op cast. static bool isNoopCast( - Instruction::CastOps Opcode, ///< Opcode of cast - Type *SrcTy, ///< SrcTy of cast - Type *DstTy, ///< DstTy of cast - Type *IntPtrTy ///< Integer type corresponding to Ptr types + Instruction::CastOps Opcode, ///< Opcode of cast + Type *SrcTy, ///< SrcTy of cast + Type *DstTy, ///< DstTy of cast + const DataLayout &DL ///< DataLayout to get the Int Ptr type from. ); - /// @brief Determine if this cast is a no-op cast. - bool isNoopCast( - Type *IntPtrTy ///< Integer type corresponding to pointer - ) const; - /// @brief Determine if this cast is a no-op cast. /// - /// \param DL is the DataLayout to get the Int Ptr type from. + /// \param DL is the DataLayout to determine pointer size. bool isNoopCast(const DataLayout &DL) const; /// Determine how a pair of casts can be eliminated, if they can be at all. diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index 0cf8003423f98..66b1e7e01fe42 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -377,6 +377,21 @@ class Instruction : public User, /// V and this instruction. void andIRFlags(const Value *V); + /// Merge 2 debug locations and apply it to the Instruction. If the + /// instruction is a CallIns, we need to traverse the inline chain to find + /// the common scope. This is not efficient for N-way merging as each time + /// you merge 2 iterations, you need to rebuild the hashmap to find the + /// common scope. However, we still choose this API because: + /// 1) Simplicity: it takes 2 locations instead of a list of locations. + /// 2) In worst case, it increases the complexity from O(N*I) to + /// O(2*N*I), where N is # of Instructions to merge, and I is the + /// maximum level of inline stack. So it is still linear. + /// 3) Merging of call instructions should be extremely rare in real + /// applications, thus the N-way merging should be in code path. + /// The DebugLoc attached to this instruction will be overwritten by the + /// merged DebugLoc. + void applyMergedLocation(const DILocation *LocA, const DILocation *LocB); + private: /// Return true if we have an entry in the on-the-side metadata hash. bool hasMetadataHashEntry() const { diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index f2203470008f6..f507f9c166894 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -747,6 +747,15 @@ def int_amdgcn_wqm : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable] >; +// Return true if at least one thread within the pixel quad passes true into +// the function. +def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], + [llvm_i1_ty], [IntrNoMem, IntrConvergent] +>; + +// If false, set EXEC=0 for the current thread until the end of program. +def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>; + // Copies the active channels of the source value to the destination value, // with the guarantee that the source value is computed as if the entire // program were executed in Whole Wavefront Mode, i.e. with all channels diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td index d17a9fc6ccaed..7ba1a3eb2e5ba 100644 --- a/include/llvm/IR/IntrinsicsNVVM.td +++ b/include/llvm/IR/IntrinsicsNVVM.td @@ -3869,4 +3869,150 @@ def int_nvvm_match_all_sync_i64p : Intrinsic<[llvm_i64_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent], "llvm.nvvm.match.all.sync.i64p">; +// +// WMMA instructions +// + +// WMMA.LOAD +class NVVM_WMMA_LD_ALSTS + : Intrinsic; + +multiclass NVVM_WMMA_LD_ALST { + def _stride: NVVM_WMMA_LD_ALSTS; + def NAME : NVVM_WMMA_LD_ALSTS; +} + +multiclass NVVM_WMMA_LD_ALT { + defm _global: NVVM_WMMA_LD_ALST; + defm _shared: NVVM_WMMA_LD_ALST; + defm NAME: NVVM_WMMA_LD_ALST; +} + +multiclass NVVM_WMMA_LD_AT { + defm _row: NVVM_WMMA_LD_ALT; + defm _col: NVVM_WMMA_LD_ALT; +} + +// For some reason ReadOnly and NoCapture confuses tblgen if they are +// passed to Intrinsic<> form inside of a multiclass. Setting them globally +// outside of the multiclass works. +let IntrProperties = [IntrReadMem, IntrArgMemOnly, + ReadOnly<0>, NoCapture<0>] in { + defm int_nvvm_wmma_load_a_f16: NVVM_WMMA_LD_AT<"a", "f16", llvm_v2f16_ty>; + defm int_nvvm_wmma_load_b_f16: NVVM_WMMA_LD_AT<"b", "f16", llvm_v2f16_ty>; + defm int_nvvm_wmma_load_c_f16: NVVM_WMMA_LD_AT<"c", "f16", llvm_v2f16_ty>; + defm int_nvvm_wmma_load_c_f32: NVVM_WMMA_LD_AT<"c", "f32", llvm_float_ty>; +} + +// WMMA.STORE.D +class NVVM_WMMA_STD_LSTSEmpty=[]> + : Intrinsic<[], + !listconcat( + [llvm_ptr_ty], + !if(!eq(Type,"f16"), + [regty, regty, regty, regty], + [regty, regty, regty, regty, + regty, regty, regty, regty]), + !if(WithStride, [llvm_i32_ty], Empty)), + [], // Properties must be set during instantiation. + "llvm.nvvm.wmma.store.d.sync."#Layout + #".m16n16k16"#Space + #!if(WithStride,".stride","") + #"."#Type>; + +multiclass NVVM_WMMA_STD_LST { + def _stride: NVVM_WMMA_STD_LSTS; + def NAME: NVVM_WMMA_STD_LSTS; +} + +multiclass NVVM_WMMA_STD_LT { + defm _global: NVVM_WMMA_STD_LST; + defm _shared: NVVM_WMMA_STD_LST; + defm NAME: NVVM_WMMA_STD_LST; +} + +multiclass NVVM_WMMA_STD_T { + defm _row: NVVM_WMMA_STD_LT<"row", Type, regty>; + defm _col: NVVM_WMMA_STD_LT<"col", Type, regty>; +} + +let IntrProperties = [IntrWriteMem, IntrArgMemOnly, + WriteOnly<0>, NoCapture<0>] in { + defm int_nvvm_wmma_store_d_f16: NVVM_WMMA_STD_T<"f16", llvm_v2f16_ty>; + defm int_nvvm_wmma_store_d_f32: NVVM_WMMA_STD_T<"f32", llvm_float_ty>; +} + +// WMMA.MMA +class NVVM_WMMA_MMA_ABDCS + : Intrinsic; + +multiclass NVVM_WMMA_MMA_ABDC { + def NAME : NVVM_WMMA_MMA_ABDCS; + def _satfinite: NVVM_WMMA_MMA_ABDCS; +} + +multiclass NVVM_WMMA_MMA_ABD { + defm _f16: NVVM_WMMA_MMA_ABDC; + defm _f32: NVVM_WMMA_MMA_ABDC; +} + +multiclass NVVM_WMMA_MMA_AB { + defm _f16: NVVM_WMMA_MMA_ABD; + defm _f32: NVVM_WMMA_MMA_ABD; +} + +multiclass NVVM_WMMA_MMA_A { + defm _col: NVVM_WMMA_MMA_AB; + defm _row: NVVM_WMMA_MMA_AB; +} + +defm int_nvvm_wmma_mma_sync_col: NVVM_WMMA_MMA_A<"col">; +defm int_nvvm_wmma_mma_sync_row: NVVM_WMMA_MMA_A<"row">; + } // let TargetPrefix = "nvvm" diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index eae8564fdf952..e31db99cede7f 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -3473,10 +3473,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". } //===----------------------------------------------------------------------===// -// CLFLUSHOPT +// CLFLUSHOPT and CLWB let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_clflushopt : GCCBuiltin<"__builtin_ia32_clflushopt">, Intrinsic<[], [llvm_ptr_ty], []>; + + def int_x86_clwb : GCCBuiltin<"__builtin_ia32_clwb">, + Intrinsic<[], [llvm_ptr_ty], []>; } //===----------------------------------------------------------------------===// diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h index 2de3e5f651aaf..9e935823c775c 100644 --- a/include/llvm/IR/LLVMContext.h +++ b/include/llvm/IR/LLVMContext.h @@ -100,6 +100,7 @@ class LLVMContext { MD_section_prefix = 20, // "section_prefix" MD_absolute_symbol = 21, // "absolute_symbol" MD_associated = 22, // "associated" + MD_callees = 23, // "callees" }; /// Known operand bundle tag IDs, which always have the same value. All diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h index 899976a87bc7e..d679cef95b68b 100644 --- a/include/llvm/IR/MDBuilder.h +++ b/include/llvm/IR/MDBuilder.h @@ -84,6 +84,14 @@ class MDBuilder { /// \brief Return metadata describing the range [Lo, Hi). MDNode *createRange(Constant *Lo, Constant *Hi); + //===------------------------------------------------------------------===// + // Callees metadata. + //===------------------------------------------------------------------===// + + /// \brief Return metadata indicating the possible callees of indirect + /// calls. + MDNode *createCallees(ArrayRef Callees); + //===------------------------------------------------------------------===// // AA metadata. //===------------------------------------------------------------------===// diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h index d1564c1e2cec7..92dcebe48b01f 100644 --- a/include/llvm/IR/ModuleSummaryIndex.h +++ b/include/llvm/IR/ModuleSummaryIndex.h @@ -743,7 +743,7 @@ class ModuleSummaryIndex { static std::string getGlobalNameForLocal(StringRef Name, ModuleHash ModHash) { SmallString<256> NewName(Name); NewName += ".llvm."; - NewName += utohexstr(ModHash[0]); // Take the first 32 bits + NewName += utostr(ModHash[0]); // Take the first 32 bits return NewName.str(); } diff --git a/include/llvm/IR/Operator.h b/include/llvm/IR/Operator.h index 54e1165a111cc..ae9255174a315 100644 --- a/include/llvm/IR/Operator.h +++ b/include/llvm/IR/Operator.h @@ -61,9 +61,9 @@ class Operator : public User { } }; -/// Utility class for integer arithmetic operators which may exhibit overflow - -/// Add, Sub, and Mul. It does not include SDiv, despite that operator having -/// the potential for overflow. +/// Utility class for integer operators which may exhibit overflow - Add, Sub, +/// Mul, and Shl. It does not include SDiv, despite that operator having the +/// potential for overflow. class OverflowingBinaryOperator : public Operator { public: enum { diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h index 577100c7af732..4f838a719512f 100644 --- a/include/llvm/IR/PassManager.h +++ b/include/llvm/IR/PassManager.h @@ -470,7 +470,7 @@ class PassManager : public PassInfoMixin< //IR.getContext().yield(); } - // Invaliadtion was handled after each pass in the above loop for the + // Invalidation was handled after each pass in the above loop for the // current unit of IR. Therefore, the remaining analysis results in the // AnalysisManager are preserved. We mark this with a set so that we don't // need to inspect each one individually. diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h index ef7801266777c..1574fc334ffc3 100644 --- a/include/llvm/IR/Type.h +++ b/include/llvm/IR/Type.h @@ -438,7 +438,7 @@ class Type { }; // Printing of types. -static inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) { +inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) { T.print(OS); return OS; } diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h index 7b24ec11fb646..f5621647db069 100644 --- a/include/llvm/IRReader/IRReader.h +++ b/include/llvm/IRReader/IRReader.h @@ -37,14 +37,22 @@ getLazyIRFileModule(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, /// If the given MemoryBuffer holds a bitcode image, return a Module /// for it. Otherwise, attempt to parse it as LLVM Assembly and return /// a Module for it. +/// \param UpgradeDebugInfo Run UpgradeDebugInfo, which runs the Verifier. +/// This option should only be set to false by llvm-as +/// for use inside the LLVM testuite! std::unique_ptr parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, - LLVMContext &Context); + LLVMContext &Context, + bool UpgradeDebugInfo = true); /// If the given file holds a bitcode image, return a Module for it. /// Otherwise, attempt to parse it as LLVM Assembly and return a Module /// for it. +/// \param UpgradeDebugInfo Run UpgradeDebugInfo, which runs the Verifier. +/// This option should only be set to false by llvm-as +/// for use inside the LLVM testuite! std::unique_ptr parseIRFile(StringRef Filename, SMDiagnostic &Err, - LLVMContext &Context); + LLVMContext &Context, + bool UpgradeDebugInfo = true); } #endif diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index bf54b6471f460..6b0e6acadad95 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -93,6 +93,7 @@ void initializeCallGraphViewerPass(PassRegistry&); void initializeCallGraphWrapperPassPass(PassRegistry&); void initializeCodeGenPreparePass(PassRegistry&); void initializeConstantHoistingLegacyPassPass(PassRegistry&); +void initializeCalledValuePropagationLegacyPassPass(PassRegistry &); void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 293146171775d..abc3bac936736 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -80,6 +80,7 @@ namespace { (void) llvm::createCFLSteensAAWrapperPass(); (void) llvm::createStructurizeCFGPass(); (void) llvm::createLibCallsShrinkWrapPass(); + (void) llvm::createCalledValuePropagationPass(); (void) llvm::createConstantMergePass(); (void) llvm::createConstantPropagationPass(); (void) llvm::createCostModelAnalysisPass(); diff --git a/include/llvm/MC/LaneBitmask.h b/include/llvm/MC/LaneBitmask.h index 35f472d817a08..a2bdcd4e69c7f 100644 --- a/include/llvm/MC/LaneBitmask.h +++ b/include/llvm/MC/LaneBitmask.h @@ -91,7 +91,7 @@ namespace llvm { }; /// Create Printable object to print LaneBitmasks on a \ref raw_ostream. - static LLVM_ATTRIBUTE_UNUSED Printable PrintLaneMask(LaneBitmask LaneMask) { + inline Printable PrintLaneMask(LaneBitmask LaneMask) { return Printable([LaneMask](raw_ostream &OS) { OS << format(LaneBitmask::FormatStr, LaneMask.getAsInteger()); }); diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h index 5a8e29d08ad23..ef2007ff69209 100644 --- a/include/llvm/MC/MCAsmBackend.h +++ b/include/llvm/MC/MCAsmBackend.h @@ -15,17 +15,22 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFragment.h" #include +#include namespace llvm { class MCAsmLayout; class MCAssembler; class MCCFIInstruction; +class MCCodePadder; struct MCFixupKindInfo; class MCFragment; class MCInst; +class MCObjectStreamer; class MCObjectWriter; +struct MCCodePaddingContext; class MCRelaxableFragment; class MCSubtargetInfo; class MCValue; @@ -33,8 +38,11 @@ class raw_pwrite_stream; /// Generic interface to target specific assembler backends. class MCAsmBackend { + std::unique_ptr CodePadder; + protected: // Can only create subclasses. MCAsmBackend(); + MCAsmBackend(std::unique_ptr TargetCodePadder); public: MCAsmBackend(const MCAsmBackend &) = delete; @@ -46,7 +54,8 @@ class MCAsmBackend { /// Create a new MCObjectWriter instance for use by the assembler backend to /// emit the final object file. - virtual MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const = 0; + virtual std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const = 0; /// \name Target Fixup Interfaces /// @{ @@ -132,6 +141,40 @@ class MCAsmBackend { generateCompactUnwindEncoding(ArrayRef) const { return 0; } + + /// Handles all target related code padding when starting to write a new + /// basic block to an object file. + /// + /// \param OS The streamer used for writing the padding data and function. + /// \param Context the context of the padding, Embeds the basic block's + /// parameters. + void handleCodePaddingBasicBlockStart(MCObjectStreamer *OS, + const MCCodePaddingContext &Context); + /// Handles all target related code padding after writing a block to an object + /// file. + /// + /// \param Context the context of the padding, Embeds the basic block's + /// parameters. + void handleCodePaddingBasicBlockEnd(const MCCodePaddingContext &Context); + /// Handles all target related code padding before writing a new instruction + /// to an object file. + /// + /// \param Inst the instruction. + void handleCodePaddingInstructionBegin(const MCInst &Inst); + /// Handles all target related code padding after writing an instruction to an + /// object file. + /// + /// \param Inst the instruction. + void handleCodePaddingInstructionEnd(const MCInst &Inst); + + /// Relaxes a fragment (changes the size of the padding) according to target + /// requirements. The new size computation is done w.r.t a layout. + /// + /// \param PF The fragment to relax. + /// \param Layout Code layout information. + /// + /// \returns true iff any relaxation occured. + bool relaxFragment(MCPaddingFragment *PF, MCAsmLayout &Layout); }; } // end namespace llvm diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index 4f1b5a8b3d72e..1ce6b09355d61 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -183,6 +183,8 @@ class MCAssembler { bool relaxInstruction(MCAsmLayout &Layout, MCRelaxableFragment &IF); + bool relaxPaddingFragment(MCAsmLayout &Layout, MCPaddingFragment &PF); + bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF); bool relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF); diff --git a/include/llvm/MC/MCCodePadder.h b/include/llvm/MC/MCCodePadder.h new file mode 100644 index 0000000000000..b590773d2ede1 --- /dev/null +++ b/include/llvm/MC/MCCodePadder.h @@ -0,0 +1,243 @@ +//===- llvm/MC/CodePadder.h - MC Code Padder --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCCODEPADDER_H +#define LLVM_MC_MCCODEPADDER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class MCAsmLayout; +class MCCodePaddingPolicy; +class MCFragment; +class MCInst; +class MCObjectStreamer; +class MCPaddingFragment; +class MCSection; + +typedef SmallVector MCPFRange; + +struct MCCodePaddingContext { + bool IsPaddingActive; + bool IsBasicBlockInsideInnermostLoop; + bool IsBasicBlockReachableViaFallthrough; + bool IsBasicBlockReachableViaBranch; +}; + +/// Target-independent base class incharge of all code padding decisions for a +/// target. During encoding it determines if and where MCPaddingFragments will +/// be located, as later on, when layout information is available, it determines +/// their sizes. +class MCCodePadder { + MCCodePadder(const MCCodePadder &) = delete; + void operator=(const MCCodePadder &) = delete; + + /// Determines if the MCCodePaddingPolicies are active. + bool ArePoliciesActive; + + /// All the supported MCCodePaddingPolicies. + SmallPtrSet CodePaddingPolicies; + + /// A pointer to the fragment of the instruction whose padding is currently + /// done for. + MCPaddingFragment *CurrHandledInstFragment; + + /// A map holding the jurisdiction for each padding fragment. Key: padding + /// fragment. Value: The fragment's jurisdiction. A jurisdiction is a vector + /// of padding fragments whose conditions are being controlled by another + /// fragment, the key fragment. + DenseMap FragmentToJurisdiction; + MCPFRange &getJurisdiction(MCPaddingFragment *Fragment, MCAsmLayout &Layout); + + /// A map holding the maximal instruction window size relevant for a padding + /// fragment. + DenseMap FragmentToMaxWindowSize; + uint64_t getMaxWindowSize(MCPaddingFragment *Fragment, MCAsmLayout &Layout); + +protected: + /// The current streamer, used to stream code padding. + MCObjectStreamer *OS; + + bool addPolicy(MCCodePaddingPolicy *Policy); + + virtual bool + basicBlockRequiresInsertionPoint(const MCCodePaddingContext &Context) { + return false; + } + + virtual bool instructionRequiresInsertionPoint(const MCInst &Inst) { + return false; + } + + virtual bool usePoliciesForBasicBlock(const MCCodePaddingContext &Context) { + return Context.IsPaddingActive; + } + +public: + MCCodePadder() + : ArePoliciesActive(false), CurrHandledInstFragment(nullptr), + OS(nullptr) {} + virtual ~MCCodePadder(); + + /// Handles all target related code padding when starting to write a new + /// basic block to an object file. + /// + /// \param OS The streamer used for writing the padding data and function. + /// \param Context the context of the padding, Embeds the basic block's + /// parameters. + void handleBasicBlockStart(MCObjectStreamer *OS, + const MCCodePaddingContext &Context); + /// Handles all target related code padding when done writing a block to an + /// object file. + /// + /// \param Context the context of the padding, Embeds the basic block's + /// parameters. + void handleBasicBlockEnd(const MCCodePaddingContext &Context); + /// Handles all target related code padding before writing a new instruction + /// to an object file. + /// + /// \param Inst the instruction. + void handleInstructionBegin(const MCInst &Inst); + /// Handles all target related code padding after writing an instruction to an + /// object file. + /// + /// \param Inst the instruction. + void handleInstructionEnd(const MCInst &Inst); + + /// Relaxes a fragment (changes the size of the padding) according to target + /// requirements. The new size computation is done w.r.t a layout. + /// + /// \param Fragment The fragment to relax. + /// \param Layout Code layout information. + /// + /// \returns true iff any relaxation occured. + bool relaxFragment(MCPaddingFragment *Fragment, MCAsmLayout &Layout); +}; + +/// The base class for all padding policies, i.e. a rule or set of rules to pad +/// the generated code. +class MCCodePaddingPolicy { + MCCodePaddingPolicy() = delete; + MCCodePaddingPolicy(const MCCodePaddingPolicy &) = delete; + void operator=(const MCCodePaddingPolicy &) = delete; + +protected: + /// A mask holding the kind of this policy, i.e. only the i'th bit will be set + /// where i is the kind number. + const uint64_t KindMask; + /// Instruction window size relevant to this policy. + const uint64_t WindowSize; + /// A boolean indicating which byte of the instruction determies its + /// instruction window. If true - the last byte of the instructions, o.w. - + /// the first byte of the instruction. + const bool InstByteIsLastByte; + + MCCodePaddingPolicy(uint64_t Kind, uint64_t WindowSize, + bool InstByteIsLastByte) + : KindMask(UINT64_C(1) << Kind), WindowSize(WindowSize), + InstByteIsLastByte(InstByteIsLastByte) {} + + /// Computes and returns the offset of the consecutive fragment of a given + /// fragment. + /// + /// \param Fragment The fragment whose consecutive offset will be computed. + /// \param Layout Code layout information. + /// + /// \returns the offset of the consecutive fragment of \p Fragment. + static uint64_t getNextFragmentOffset(const MCFragment *Fragment, + const MCAsmLayout &Layout); + /// Returns the instruction byte of an instruction pointed by a given + /// MCPaddingFragment. An instruction byte is the address of the byte of an + /// instruction which determines its instruction window. + /// + /// \param Fragment The fragment pointing to the instruction. + /// \param Layout Code layout information. + /// + /// \returns the instruction byte of an instruction pointed by \p Fragment. + uint64_t getFragmentInstByte(const MCPaddingFragment *Fragment, + MCAsmLayout &Layout) const; + uint64_t computeWindowEndAddress(const MCPaddingFragment *Fragment, + uint64_t Offset, MCAsmLayout &Layout) const; + + /// Computes and returns the penalty weight of a first instruction window in a + /// range. This requires a special function since the first window does not + /// contain all the padding fragments in that window. It only contains all the + /// padding fragments starting from the relevant insertion point. + /// + /// \param Window The first window. + /// \param Offset The offset of the parent section relative to the beginning + /// of the file, mod the window size. + /// \param Layout Code layout information. + /// + /// \returns the penalty weight of a first instruction window in a range, \p + /// Window. + double computeFirstWindowPenaltyWeight(const MCPFRange &Window, + uint64_t Offset, + MCAsmLayout &Layout) const; + /// Computes and returns the penalty caused by an instruction window. + /// + /// \param Window The instruction window. + /// \param Offset The offset of the parent section relative to the beginning + /// of the file, mod the window size. + /// \param Layout Code layout information. + /// + /// \returns the penalty caused by \p Window. + virtual double computeWindowPenaltyWeight(const MCPFRange &Window, + uint64_t Offset, + MCAsmLayout &Layout) const = 0; + +public: + virtual ~MCCodePaddingPolicy() {} + + /// Returns the kind mask of this policy - A mask holding the kind of this + /// policy, i.e. only the i'th bit will be set where i is the kind number. + uint64_t getKindMask() const { return KindMask; } + /// Returns the instruction window size relevant to this policy. + uint64_t getWindowSize() const { return WindowSize; } + /// Returns true if the last byte of an instruction determines its instruction + /// window, or false if the first of an instruction determines it. + bool isInstByteLastByte() const { return InstByteIsLastByte; } + + /// Returns true iff this policy needs padding for a given basic block. + /// + /// \param Context the context of the padding, Embeds the basic block's + /// parameters. + /// + /// \returns true iff this policy needs padding for the basic block. + virtual bool + basicBlockRequiresPaddingFragment(const MCCodePaddingContext &Context) const { + return false; + } + /// Returns true iff this policy needs padding for a given instruction. + /// + /// \param Inst The given instruction. + /// + /// \returns true iff this policy needs padding for \p Inst. + virtual bool instructionRequiresPaddingFragment(const MCInst &Inst) const { + return false; + } + /// Computes and returns the penalty caused by a range of instruction windows. + /// The weight is computed for each window separelty and then accumulated. + /// + /// \param Range The range. + /// \param Offset The offset of the parent section relative to the beginning + /// of the file, mod the window size. + /// \param Layout Code layout information. + /// + /// \returns the penalty caused by \p Range. + double computeRangePenaltyWeight(const MCPFRange &Range, uint64_t Offset, + MCAsmLayout &Layout) const; +}; + +} // namespace llvm + +#endif // LLVM_MC_MCCODEPADDER_H diff --git a/include/llvm/MC/MCCodeView.h b/include/llvm/MC/MCCodeView.h index 265ed2303c018..e2249f49c86cb 100644 --- a/include/llvm/MC/MCCodeView.h +++ b/include/llvm/MC/MCCodeView.h @@ -276,6 +276,10 @@ class CodeViewContext { /// Emits the offset into the checksum table of the given file number. void emitFileChecksumOffset(MCObjectStreamer &OS, unsigned FileNo); + /// Add something to the string table. Returns the final string as well as + /// offset into the string table. + std::pair addToStringTable(StringRef S); + private: /// The current CodeView line information from the last .cv_loc directive. MCCVLoc CurrentCVLoc = MCCVLoc(0, 0, 0, 0, false, true); @@ -290,10 +294,6 @@ class CodeViewContext { MCDataFragment *getStringTableFragment(); - /// Add something to the string table. Returns the final string as well as - /// offset into the string table. - std::pair addToStringTable(StringRef S); - /// Get a string table offset. unsigned getStringTableOffset(StringRef S); diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 92d419887d2a6..432fc0ede0720 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -441,25 +441,25 @@ namespace llvm { getAssociativeCOFFSection(MCSectionCOFF *Sec, const MCSymbol *KeySym, unsigned UniqueID = GenericSectionID); - MCSectionWasm *getWasmSection(const Twine &Section, unsigned Type) { - return getWasmSection(Section, Type, nullptr); + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K) { + return getWasmSection(Section, K, nullptr); } - MCSectionWasm *getWasmSection(const Twine &Section, unsigned Type, + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, const char *BeginSymName) { - return getWasmSection(Section, Type, "", ~0, BeginSymName); + return getWasmSection(Section, K, "", ~0, BeginSymName); } - MCSectionWasm *getWasmSection(const Twine &Section, unsigned Type, + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, const Twine &Group, unsigned UniqueID) { - return getWasmSection(Section, Type, Group, UniqueID, nullptr); + return getWasmSection(Section, K, Group, UniqueID, nullptr); } - MCSectionWasm *getWasmSection(const Twine &Section, unsigned Type, + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, const Twine &Group, unsigned UniqueID, const char *BeginSymName); - MCSectionWasm *getWasmSection(const Twine &Section, unsigned Type, + MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, const MCSymbolWasm *Group, unsigned UniqueID, const char *BeginSymName); diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index d32b56a4ba087..fd8d118ccdc5e 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -137,9 +137,9 @@ class MCELFObjectTargetWriter { /// \param MOTW - The target specific ELF writer subclass. /// \param OS - The stream to write to. /// \returns The constructed object writer. -MCObjectWriter *createELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_pwrite_stream &OS, - bool IsLittleEndian); +std::unique_ptr +createELFObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS, bool IsLittleEndian); } // end namespace llvm diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h index 90434f34da5f1..c5b66a163c851 100644 --- a/include/llvm/MC/MCELFStreamer.h +++ b/include/llvm/MC/MCELFStreamer.h @@ -23,9 +23,8 @@ class MCInst; class MCELFStreamer : public MCObjectStreamer { public: - MCELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter) - : MCObjectStreamer(Context, TAB, OS, Emitter) {} + MCELFStreamer(MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, std::unique_ptr Emitter); ~MCELFStreamer() override = default; @@ -90,10 +89,11 @@ class MCELFStreamer : public MCObjectStreamer { SmallVector BundleGroups; }; -MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, +MCELFStreamer *createARMELFStreamer(MCContext &Context, + std::unique_ptr TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, - bool IsThumb); + std::unique_ptr Emitter, + bool RelaxAll, bool IsThumb); } // end namespace llvm diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h index 284ca50e19d5b..7c66b2126cd59 100644 --- a/include/llvm/MC/MCFragment.h +++ b/include/llvm/MC/MCFragment.h @@ -41,6 +41,7 @@ class MCFragment : public ilist_node_with_parent { FT_Dwarf, FT_DwarfFrame, FT_LEB, + FT_Padding, FT_SafeSEH, FT_CVInlineLines, FT_CVDefRange, @@ -323,6 +324,98 @@ class MCAlignFragment : public MCFragment { } }; +/// Fragment for adding required padding. +/// This fragment is always inserted before an instruction, and holds that +/// instruction as context information (as well as a mask of kinds) for +/// determining the padding size. +/// +class MCPaddingFragment : public MCFragment { + /// A mask containing all the kinds relevant to this fragment. i.e. the i'th + /// bit will be set iff kind i is relevant to this fragment. + uint64_t PaddingPoliciesMask; + /// A boolean indicating if this fragment will actually hold padding. If its + /// value is false, then this fragment serves only as a placeholder, + /// containing data to assist other insertion point in their decision making. + bool IsInsertionPoint; + + uint64_t Size; + + struct MCInstInfo { + bool IsInitialized; + MCInst Inst; + /// A boolean indicating whether the instruction pointed by this fragment is + /// a fixed size instruction or a relaxable instruction held by a + /// MCRelaxableFragment. + bool IsImmutableSizedInst; + union { + /// If the instruction is a fixed size instruction, hold its size. + size_t InstSize; + /// Otherwise, hold a pointer to the MCRelaxableFragment holding it. + MCRelaxableFragment *InstFragment; + }; + }; + MCInstInfo InstInfo; + +public: + static const uint64_t PFK_None = UINT64_C(0); + + enum MCPaddingFragmentKind { + // values 0-7 are reserved for future target independet values. + + FirstTargetPerfNopFragmentKind = 8, + + /// Limit range of target MCPerfNopFragment kinds to fit in uint64_t + MaxTargetPerfNopFragmentKind = 63 + }; + + MCPaddingFragment(MCSection *Sec = nullptr) + : MCFragment(FT_Padding, false, 0, Sec), PaddingPoliciesMask(PFK_None), + IsInsertionPoint(false), Size(UINT64_C(0)), + InstInfo({false, MCInst(), false, {0}}) {} + + bool isInsertionPoint() const { return IsInsertionPoint; } + void setAsInsertionPoint() { IsInsertionPoint = true; } + uint64_t getPaddingPoliciesMask() const { return PaddingPoliciesMask; } + void setPaddingPoliciesMask(uint64_t Value) { PaddingPoliciesMask = Value; } + bool hasPaddingPolicy(uint64_t PolicyMask) const { + assert(isPowerOf2_64(PolicyMask) && + "Policy mask must contain exactly one policy"); + return (getPaddingPoliciesMask() & PolicyMask) != PFK_None; + } + const MCInst &getInst() const { + assert(isInstructionInitialized() && "Fragment has no instruction!"); + return InstInfo.Inst; + } + size_t getInstSize() const { + assert(isInstructionInitialized() && "Fragment has no instruction!"); + if (InstInfo.IsImmutableSizedInst) + return InstInfo.InstSize; + assert(InstInfo.InstFragment != nullptr && + "Must have a valid InstFragment to retrieve InstSize from"); + return InstInfo.InstFragment->getContents().size(); + } + void setInstAndInstSize(const MCInst &Inst, size_t InstSize) { + InstInfo.IsInitialized = true; + InstInfo.IsImmutableSizedInst = true; + InstInfo.Inst = Inst; + InstInfo.InstSize = InstSize; + } + void setInstAndInstFragment(const MCInst &Inst, + MCRelaxableFragment *InstFragment) { + InstInfo.IsInitialized = true; + InstInfo.IsImmutableSizedInst = false; + InstInfo.Inst = Inst; + InstInfo.InstFragment = InstFragment; + } + uint64_t getSize() const { return Size; } + void setSize(uint64_t Value) { Size = Value; } + bool isInstructionInitialized() const { return InstInfo.IsInitialized; } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Padding; + } +}; + class MCFillFragment : public MCFragment { /// Value to use for filling bytes. uint8_t Value; diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h index 9bf440ea96d21..db28fd0fd6d9d 100644 --- a/include/llvm/MC/MCInst.h +++ b/include/llvm/MC/MCInst.h @@ -160,6 +160,10 @@ class MCInst { unsigned Opcode = 0; SMLoc Loc; SmallVector Operands; + // These flags could be used to pass some info from one target subcomponent + // to another, for example, from disassembler to asm printer. The values of + // the flags have any sense on target level only (e.g. prefixes on x86). + unsigned Flags = 0; public: MCInst() = default; @@ -167,6 +171,9 @@ class MCInst { void setOpcode(unsigned Op) { Opcode = Op; } unsigned getOpcode() const { return Opcode; } + void setFlags(unsigned F) { Flags = F; } + unsigned getFlags() const { return Flags; } + void setLoc(SMLoc loc) { Loc = loc; } SMLoc getLoc() const { return Loc; } diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h index 42dc90da3049a..594869f74632d 100644 --- a/include/llvm/MC/MCMachObjectWriter.h +++ b/include/llvm/MC/MCMachObjectWriter.h @@ -117,9 +117,10 @@ class MachObjectWriter : public MCObjectWriter { MachSymbolData *findSymbolData(const MCSymbol &Sym); public: - MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_pwrite_stream &OS, - bool IsLittleEndian) - : MCObjectWriter(OS, IsLittleEndian), TargetObjectWriter(MOTW) {} + MachObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS, bool IsLittleEndian) + : MCObjectWriter(OS, IsLittleEndian), + TargetObjectWriter(std::move(MOTW)) {} const MCSymbol &findAliasedSymbol(const MCSymbol &Sym) const; @@ -269,9 +270,9 @@ class MachObjectWriter : public MCObjectWriter { /// \param MOTW - The target specific Mach-O writer subclass. /// \param OS - The stream to write to. /// \returns The constructed object writer. -MCObjectWriter *createMachObjectWriter(MCMachObjectTargetWriter *MOTW, - raw_pwrite_stream &OS, - bool IsLittleEndian); +std::unique_ptr +createMachObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS, bool IsLittleEndian); } // end namespace llvm diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h index b03fd099c1d9b..d95f84d1d816e 100644 --- a/include/llvm/MC/MCObjectFileInfo.h +++ b/include/llvm/MC/MCObjectFileInfo.h @@ -123,6 +123,9 @@ class MCObjectFileInfo { /// Section for newer gnu pubtypes. MCSection *DwarfGnuPubTypesSection; + // Section for Swift AST + MCSection *DwarfSwiftASTSection; + MCSection *COFFDebugSymbolsSection; MCSection *COFFDebugTypesSection; @@ -267,6 +270,7 @@ class MCObjectFileInfo { MCSection *getDwarfAddrSection() const { return DwarfAddrSection; } MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; } MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; } + MCSection *getDwarfSwiftASTSection() const { return DwarfSwiftASTSection; } MCSection *getCOFFDebugSymbolsSection() const { return COFFDebugSymbolsSection; diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h index 284af06e1a58b..a3dbc56ebc10f 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -34,7 +34,10 @@ class raw_pwrite_stream; /// to that file format or custom semantics expected by the object writer /// implementation. class MCObjectStreamer : public MCStreamer { - MCAssembler *Assembler; + std::unique_ptr ObjectWriter; + std::unique_ptr TAB; + std::unique_ptr Emitter; + std::unique_ptr Assembler; MCSection::iterator CurInsertionPoint; bool EmitEHFrame; bool EmitDebugFrame; @@ -43,11 +46,14 @@ class MCObjectStreamer : public MCStreamer { virtual void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo&) = 0; void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; + MCSymbol *EmitCFILabel() override; + void EmitInstructionImpl(const MCInst &Inst, const MCSubtargetInfo &STI); protected: - MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter); - ~MCObjectStreamer() override; + MCObjectStreamer(MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter); + ~MCObjectStreamer(); public: /// state management @@ -71,6 +77,7 @@ class MCObjectStreamer : public MCStreamer { /// Get a data fragment to write into, creating a new one if the current /// fragment is not a data fragment. MCDataFragment *getOrCreateDataFragment(); + MCPaddingFragment *getOrCreatePaddingFragment(); protected: bool changeSectionImpl(MCSection *Section, const MCExpr *Subsection); @@ -116,6 +123,10 @@ class MCObjectStreamer : public MCStreamer { unsigned MaxBytesToEmit = 0) override; void emitValueToOffset(const MCExpr *Offset, unsigned char Value, SMLoc Loc) override; + void + EmitCodePaddingBasicBlockStart(const MCCodePaddingContext &Context) override; + void + EmitCodePaddingBasicBlockEnd(const MCCodePaddingContext &Context) override; void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, unsigned Isa, unsigned Discriminator, diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index 3a659f048ccf6..55bd435a9b2fe 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -34,19 +34,61 @@ class MCStreamer; class MCTargetAsmParser; class SourceMgr; -class InlineAsmIdentifierInfo { -public: - void *OpDecl; - bool IsVarDecl; - unsigned Length, Size, Type; - - void clear() { - OpDecl = nullptr; - IsVarDecl = false; - Length = 1; - Size = 0; - Type = 0; +struct InlineAsmIdentifierInfo { + enum IdKind { + IK_Invalid, // Initial state. Unexpected after a successful parsing. + IK_Label, // Function/Label reference. + IK_EnumVal, // Value of enumeration type. + IK_Var // Variable. + }; + // Represents an Enum value + struct EnumIdentifier { + int64_t EnumVal; + }; + // Represents a label/function reference + struct LabelIdentifier { + void *Decl; + }; + // Represents a variable + struct VariableIdentifier { + void *Decl; + bool IsGlobalLV; + unsigned Length; + unsigned Size; + unsigned Type; + }; + // An InlineAsm identifier can only be one of those + union { + EnumIdentifier Enum; + LabelIdentifier Label; + VariableIdentifier Var; + }; + bool isKind(IdKind kind) const { return Kind == kind; } + // Initializers + void setEnum(int64_t enumVal) { + assert(isKind(IK_Invalid) && "should be initialized only once"); + Kind = IK_EnumVal; + Enum.EnumVal = enumVal; + } + void setLabel(void *decl) { + assert(isKind(IK_Invalid) && "should be initialized only once"); + Kind = IK_Label; + Label.Decl = decl; + } + void setVar(void *decl, bool isGlobalLV, unsigned size, unsigned type) { + assert(isKind(IK_Invalid) && "should be initialized only once"); + Kind = IK_Var; + Var.Decl = decl; + Var.IsGlobalLV = isGlobalLV; + Var.Size = size; + Var.Type = type; + Var.Length = size / type; } + InlineAsmIdentifierInfo() : Kind(IK_Invalid) {} + +private: + // Discriminate using the current kind. + IdKind Kind; }; /// \brief Generic Sema callback for assembly parser. @@ -54,9 +96,9 @@ class MCAsmParserSemaCallback { public: virtual ~MCAsmParserSemaCallback(); - virtual void *LookupInlineAsmIdentifier(StringRef &LineBuf, - InlineAsmIdentifierInfo &Info, - bool IsUnevaluatedContext) = 0; + virtual void LookupInlineAsmIdentifier(StringRef &LineBuf, + InlineAsmIdentifierInfo &Info, + bool IsUnevaluatedContext) = 0; virtual StringRef LookupInlineAsmLabel(StringRef Identifier, SourceMgr &SM, SMLoc Location, bool Create) = 0; virtual bool LookupInlineAsmField(StringRef Base, StringRef Member, diff --git a/include/llvm/MC/MCParser/MCTargetAsmParser.h b/include/llvm/MC/MCParser/MCTargetAsmParser.h index e5d5a2a4e06e9..9f8550c3887c8 100644 --- a/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -12,6 +12,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParserExtension.h" #include "llvm/MC/MCTargetOptions.h" @@ -132,6 +133,139 @@ enum OperandMatchResultTy { MatchOperand_ParseFail // operand matched but had errors }; +// When matching of an assembly instruction fails, there may be multiple +// encodings that are close to being a match. It's often ambiguous which one +// the programmer intended to use, so we want to report an error which mentions +// each of these "near-miss" encodings. This struct contains information about +// one such encoding, and why it did not match the parsed instruction. +class NearMissInfo { +public: + enum NearMissKind { + NoNearMiss, + NearMissOperand, + NearMissFeature, + NearMissPredicate, + NearMissTooFewOperands, + }; + + // The encoding is valid for the parsed assembly string. This is only used + // internally to the table-generated assembly matcher. + static NearMissInfo getSuccess() { return NearMissInfo(); } + + // The instruction encoding is not valid because it requires some target + // features that are not currently enabled. MissingFeatures has a bit set for + // each feature that the encoding needs but which is not enabled. + static NearMissInfo getMissedFeature(uint64_t MissingFeatures) { + NearMissInfo Result; + Result.Kind = NearMissFeature; + Result.Features = MissingFeatures; + return Result; + } + + // The instruction encoding is not valid because the target-specific + // predicate function returned an error code. FailureCode is the + // target-specific error code returned by the predicate. + static NearMissInfo getMissedPredicate(unsigned FailureCode) { + NearMissInfo Result; + Result.Kind = NearMissPredicate; + Result.PredicateError = FailureCode; + return Result; + } + + // The instruction encoding is not valid because one (and only one) parsed + // operand is not of the correct type. OperandError is the error code + // relating to the operand class expected by the encoding. OperandClass is + // the type of the expected operand. Opcode is the opcode of the encoding. + // OperandIndex is the index into the parsed operand list. + static NearMissInfo getMissedOperand(unsigned OperandError, + unsigned OperandClass, unsigned Opcode, + unsigned OperandIndex) { + NearMissInfo Result; + Result.Kind = NearMissOperand; + Result.MissedOperand.Error = OperandError; + Result.MissedOperand.Class = OperandClass; + Result.MissedOperand.Opcode = Opcode; + Result.MissedOperand.Index = OperandIndex; + return Result; + } + + // The instruction encoding is not valid because it expects more operands + // than were parsed. OperandClass is the class of the expected operand that + // was not provided. Opcode is the instruction encoding. + static NearMissInfo getTooFewOperands(unsigned OperandClass, + unsigned Opcode) { + NearMissInfo Result; + Result.Kind = NearMissTooFewOperands; + Result.TooFewOperands.Class = OperandClass; + Result.TooFewOperands.Opcode = Opcode; + return Result; + } + + operator bool() const { return Kind != NoNearMiss; } + + NearMissKind getKind() const { return Kind; } + + // Feature flags required by the instruction, that the current target does + // not have. + uint64_t getFeatures() const { + assert(Kind == NearMissFeature); + return Features; + } + // Error code returned by the target predicate when validating this + // instruction encoding. + unsigned getPredicateError() const { + assert(Kind == NearMissPredicate); + return PredicateError; + } + // MatchClassKind of the operand that we expected to see. + unsigned getOperandClass() const { + assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); + return MissedOperand.Class; + } + // Opcode of the encoding we were trying to match. + unsigned getOpcode() const { + assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); + return MissedOperand.Opcode; + } + // Error code returned when validating the operand. + unsigned getOperandError() const { + assert(Kind == NearMissOperand); + return MissedOperand.Error; + } + // Index of the actual operand we were trying to match in the list of parsed + // operands. + unsigned getOperandIndex() const { + assert(Kind == NearMissOperand); + return MissedOperand.Index; + } + +private: + NearMissKind Kind; + + // These two structs share a common prefix, so we can safely rely on the fact + // that they overlap in the union. + struct MissedOpInfo { + unsigned Class; + unsigned Opcode; + unsigned Error; + unsigned Index; + }; + + struct TooFewOperandsInfo { + unsigned Class; + unsigned Opcode; + }; + + union { + uint64_t Features; + unsigned PredicateError; + MissedOpInfo MissedOperand; + TooFewOperandsInfo TooFewOperands; + }; + + NearMissInfo() : Kind(NoNearMiss) {} +}; + /// MCTargetAsmParser - Generic interface to target specific assembly parsers. class MCTargetAsmParser : public MCAsmParserExtension { public: @@ -140,11 +274,13 @@ class MCTargetAsmParser : public MCAsmParserExtension { Match_MissingFeature, Match_MnemonicFail, Match_Success, + Match_NearMisses, FIRST_TARGET_MATCH_RESULT_TY }; protected: // Can only create subclasses. - MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI); + MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, + const MCInstrInfo &MII); /// Create a copy of STI and return a non-const reference to it. MCSubtargetInfo ©STI(); @@ -165,6 +301,8 @@ class MCTargetAsmParser : public MCAsmParserExtension { /// Current STI. const MCSubtargetInfo *STI; + const MCInstrInfo &MII; + public: MCTargetAsmParser(const MCTargetAsmParser &) = delete; MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; diff --git a/include/llvm/MC/MCSectionWasm.h b/include/llvm/MC/MCSectionWasm.h index 66ae8d68d336e..cc467ed9837ab 100644 --- a/include/llvm/MC/MCSectionWasm.h +++ b/include/llvm/MC/MCSectionWasm.h @@ -27,13 +27,11 @@ class MCSymbol; /// This represents a section on wasm. class MCSectionWasm final : public MCSection { private: + /// This is the name of the section. The referenced memory is owned by /// TargetLoweringObjectFileWasm's WasmUniqueMap. StringRef SectionName; - /// This is the type of the section, from the enums in BinaryFormat/Wasm.h - unsigned Type; - unsigned UniqueID; const MCSymbolWasm *Group; @@ -48,12 +46,10 @@ class MCSectionWasm final : public MCSection { uint64_t MemoryOffset; friend class MCContext; - MCSectionWasm(StringRef Section, unsigned type, SectionKind K, - const MCSymbolWasm *group, unsigned UniqueID, MCSymbol *Begin) - : MCSection(SV_Wasm, K, Begin), SectionName(Section), Type(type), - UniqueID(UniqueID), Group(group), SectionOffset(0) { - assert(type == wasm::WASM_SEC_CODE || type == wasm::WASM_SEC_DATA); - } + MCSectionWasm(StringRef Section, SectionKind K, const MCSymbolWasm *group, + unsigned UniqueID, MCSymbol *Begin) + : MCSection(SV_Wasm, K, Begin), SectionName(Section), UniqueID(UniqueID), + Group(group), SectionOffset(0) {} void setSectionName(StringRef Name) { SectionName = Name; } @@ -65,7 +61,6 @@ class MCSectionWasm final : public MCSection { bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; StringRef getSectionName() const { return SectionName; } - unsigned getType() const { return Type; } const MCSymbolWasm *getGroup() const { return Group; } void PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T, @@ -74,6 +69,10 @@ class MCSectionWasm final : public MCSection { bool UseCodeAlign() const override; bool isVirtualSection() const override; + bool isWasmData() const { + return Kind.isGlobalWriteableData() || Kind.isReadOnly(); + } + bool isUnique() const { return UniqueID != ~0U; } unsigned getUniqueID() const { return UniqueID; } diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index d6f41ce5cfe8f..58003d7d596c6 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -38,6 +38,7 @@ class AssemblerConstantPools; class formatted_raw_ostream; class MCAsmBackend; class MCCodeEmitter; +struct MCCodePaddingContext; class MCContext; class MCExpr; class MCInst; @@ -171,14 +172,16 @@ class MCStreamer { std::vector DwarfFrameInfos; MCDwarfFrameInfo *getCurrentDwarfFrameInfo(); - void EnsureValidDwarfFrame(); - MCSymbol *EmitCFILabel(); - MCSymbol *EmitCFICommon(); + /// Similar to DwarfFrameInfos, but for SEH unwind info. Chained frames may + /// refer to each other, so use std::unique_ptr to provide pointer stability. + std::vector> WinFrameInfos; - std::vector WinFrameInfos; WinEH::FrameInfo *CurrentWinFrameInfo; - void EnsureValidWinFrameInfo(); + + /// Retreive the current frame info if one is available and it is not yet + /// closed. Otherwise, issue an error and return null. + WinEH::FrameInfo *EnsureValidWinFrameInfo(SMLoc Loc); /// \brief Tracks an index to represent the order a symbol was emitted in. /// Zero means we did not emit that symbol. @@ -200,6 +203,10 @@ class MCStreamer { virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame); virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame); + /// When emitting an object file, create and emit a real label. When emitting + /// textual assembly, this should do nothing to avoid polluting our output. + virtual MCSymbol *EmitCFILabel(); + WinEH::FrameInfo *getCurrentWinFrameInfo() { return CurrentWinFrameInfo; } @@ -238,7 +245,7 @@ class MCStreamer { bool hasUnfinishedDwarfFrameInfo(); unsigned getNumWinFrameInfos() { return WinFrameInfos.size(); } - ArrayRef getWinFrameInfos() const { + ArrayRef> getWinFrameInfos() const { return WinFrameInfos; } @@ -710,6 +717,12 @@ class MCStreamer { virtual void emitValueToOffset(const MCExpr *Offset, unsigned char Value, SMLoc Loc); + virtual void + EmitCodePaddingBasicBlockStart(const MCCodePaddingContext &Context) {} + + virtual void + EmitCodePaddingBasicBlockEnd(const MCCodePaddingContext &Context) {} + /// @} /// \brief Switch to a new logical file. This is used to implement the '.file @@ -784,6 +797,9 @@ class MCStreamer { /// directive. virtual void EmitCVFileChecksumOffsetDirective(unsigned FileNo) {} + /// This implements the CodeView '.cv_fpo_data' assembler directive. + virtual void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc = {}) {} + /// Emit the absolute difference between two symbols. /// /// \pre Offset of \c Hi is greater than the offset \c Lo. @@ -814,20 +830,23 @@ class MCStreamer { virtual void EmitCFIRegister(int64_t Register1, int64_t Register2); virtual void EmitCFIWindowSave(); - virtual void EmitWinCFIStartProc(const MCSymbol *Symbol); - virtual void EmitWinCFIEndProc(); - virtual void EmitWinCFIStartChained(); - virtual void EmitWinCFIEndChained(); - virtual void EmitWinCFIPushReg(unsigned Register); - virtual void EmitWinCFISetFrame(unsigned Register, unsigned Offset); - virtual void EmitWinCFIAllocStack(unsigned Size); - virtual void EmitWinCFISaveReg(unsigned Register, unsigned Offset); - virtual void EmitWinCFISaveXMM(unsigned Register, unsigned Offset); - virtual void EmitWinCFIPushFrame(bool Code); - virtual void EmitWinCFIEndProlog(); - - virtual void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except); - virtual void EmitWinEHHandlerData(); + virtual void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc()); + virtual void EmitWinCFIEndProc(SMLoc Loc = SMLoc()); + virtual void EmitWinCFIStartChained(SMLoc Loc = SMLoc()); + virtual void EmitWinCFIEndChained(SMLoc Loc = SMLoc()); + virtual void EmitWinCFIPushReg(unsigned Register, SMLoc Loc = SMLoc()); + virtual void EmitWinCFISetFrame(unsigned Register, unsigned Offset, + SMLoc Loc = SMLoc()); + virtual void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc = SMLoc()); + virtual void EmitWinCFISaveReg(unsigned Register, unsigned Offset, + SMLoc Loc = SMLoc()); + virtual void EmitWinCFISaveXMM(unsigned Register, unsigned Offset, + SMLoc Loc = SMLoc()); + virtual void EmitWinCFIPushFrame(bool Code, SMLoc Loc = SMLoc()); + virtual void EmitWinCFIEndProlog(SMLoc Loc = SMLoc()); + virtual void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, + SMLoc Loc = SMLoc()); + virtual void EmitWinEHHandlerData(SMLoc Loc = SMLoc()); /// Get the .pdata section used for the given section. Typically the given /// section is either the main .text section or some other COMDAT .text diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h index aa1eaf022c555..ff223f70303bc 100644 --- a/include/llvm/MC/MCValue.h +++ b/include/llvm/MC/MCValue.h @@ -38,11 +38,12 @@ class raw_ostream; /// Note that this class must remain a simple POD value class, because we need /// it to live in unions etc. class MCValue { - const MCSymbolRefExpr *SymA, *SymB; - int64_t Cst; - uint32_t RefKind; + const MCSymbolRefExpr *SymA = nullptr, *SymB = nullptr; + int64_t Cst = 0; + uint32_t RefKind = 0; + public: - MCValue() : SymA(nullptr), SymB(nullptr), Cst(0), RefKind(0) {} + MCValue() = default; int64_t getConstant() const { return Cst; } const MCSymbolRefExpr *getSymA() const { return SymA; } const MCSymbolRefExpr *getSymB() const { return SymB; } diff --git a/include/llvm/MC/MCWasmObjectWriter.h b/include/llvm/MC/MCWasmObjectWriter.h index bebc0a8258100..a4d5eb857b393 100644 --- a/include/llvm/MC/MCWasmObjectWriter.h +++ b/include/llvm/MC/MCWasmObjectWriter.h @@ -44,8 +44,9 @@ class MCWasmObjectTargetWriter { /// \param MOTW - The target specific Wasm writer subclass. /// \param OS - The stream to write to. /// \returns The constructed object writer. -MCObjectWriter *createWasmObjectWriter(MCWasmObjectTargetWriter *MOTW, - raw_pwrite_stream &OS); +std::unique_ptr +createWasmObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS); } // End llvm namespace diff --git a/include/llvm/MC/MCWasmStreamer.h b/include/llvm/MC/MCWasmStreamer.h index bdd6f103cd445..135d5e38bc937 100644 --- a/include/llvm/MC/MCWasmStreamer.h +++ b/include/llvm/MC/MCWasmStreamer.h @@ -26,9 +26,10 @@ class raw_ostream; class MCWasmStreamer : public MCObjectStreamer { public: - MCWasmStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter) - : MCObjectStreamer(Context, TAB, OS, Emitter), SeenIdent(false) {} + MCWasmStreamer(MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, std::unique_ptr Emitter) + : MCObjectStreamer(Context, std::move(TAB), OS, std::move(Emitter)), + SeenIdent(false) {} ~MCWasmStreamer() override; diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h index 198a08b5f5394..3234bd93cad0a 100644 --- a/include/llvm/MC/MCWinCOFFObjectWriter.h +++ b/include/llvm/MC/MCWinCOFFObjectWriter.h @@ -10,6 +10,8 @@ #ifndef LLVM_MC_MCWINCOFFOBJECTWRITER_H #define LLVM_MC_MCWINCOFFOBJECTWRITER_H +#include + namespace llvm { class MCAsmBackend; @@ -42,8 +44,9 @@ class raw_pwrite_stream; /// \param MOTW - The target specific WinCOFF writer subclass. /// \param OS - The stream to write to. /// \returns The constructed object writer. - MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, - raw_pwrite_stream &OS); + std::unique_ptr + createWinCOFFObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS); } // end namespace llvm #endif // LLVM_MC_MCWINCOFFOBJECTWRITER_H diff --git a/include/llvm/MC/MCWinCOFFStreamer.h b/include/llvm/MC/MCWinCOFFStreamer.h index 84e60b85be6a4..a2500c06efa1e 100644 --- a/include/llvm/MC/MCWinCOFFStreamer.h +++ b/include/llvm/MC/MCWinCOFFStreamer.h @@ -27,8 +27,8 @@ class raw_pwrite_stream; class MCWinCOFFStreamer : public MCObjectStreamer { public: - MCWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, MCCodeEmitter &CE, - raw_pwrite_stream &OS); + MCWinCOFFStreamer(MCContext &Context, std::unique_ptr MAB, + std::unique_ptr CE, raw_pwrite_stream &OS); /// state management void reset() override { @@ -61,7 +61,7 @@ class MCWinCOFFStreamer : public MCObjectStreamer { void EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; void EmitIdent(StringRef IdentString) override; - void EmitWinEHHandlerData() override; + void EmitWinEHHandlerData(SMLoc Loc) override; void FinishImpl() override; /// \} diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h index fa5785562b704..c5b500d87e2a8 100644 --- a/include/llvm/Object/COFF.h +++ b/include/llvm/Object/COFF.h @@ -25,7 +25,6 @@ #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ErrorOr.h" #include #include #include @@ -954,28 +953,28 @@ class COFFObjectFile : public ObjectFile { Res = reinterpret_cast(getSymbolTable()) + Index; return std::error_code(); } - ErrorOr getSymbol(uint32_t index) const { + Expected getSymbol(uint32_t index) const { if (SymbolTable16) { const coff_symbol16 *Symb = nullptr; if (std::error_code EC = getSymbol(index, Symb)) - return EC; + return errorCodeToError(EC); return COFFSymbolRef(Symb); } if (SymbolTable32) { const coff_symbol32 *Symb = nullptr; if (std::error_code EC = getSymbol(index, Symb)) - return EC; + return errorCodeToError(EC); return COFFSymbolRef(Symb); } - return object_error::parse_failed; + return errorCodeToError(object_error::parse_failed); } template std::error_code getAuxSymbol(uint32_t index, const T *&Res) const { - ErrorOr s = getSymbol(index); - if (std::error_code EC = s.getError()) - return EC; - Res = reinterpret_cast(s->getRawPtr()); + Expected S = getSymbol(index); + if (Error E = S.takeError()) + return errorToErrorCode(std::move(E)); + Res = reinterpret_cast(S->getRawPtr()); return std::error_code(); } @@ -1164,16 +1163,17 @@ class ResourceSectionRef { ResourceSectionRef() = default; explicit ResourceSectionRef(StringRef Ref) : BBS(Ref, support::little) {} - ErrorOr> getEntryNameString(const coff_resource_dir_entry &Entry); - ErrorOr + Expected> + getEntryNameString(const coff_resource_dir_entry &Entry); + Expected getEntrySubDir(const coff_resource_dir_entry &Entry); - ErrorOr getBaseTable(); + Expected getBaseTable(); private: BinaryByteStream BBS; - ErrorOr getTableAtOffset(uint32_t Offset); - ErrorOr> getDirStringAtOffset(uint32_t Offset); + Expected getTableAtOffset(uint32_t Offset); + Expected> getDirStringAtOffset(uint32_t Offset); }; // Corresponds to `_FPO_DATA` structure in the PE/COFF spec. diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index 670c0bbce3ac6..0774b9801049c 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -83,6 +83,8 @@ class ELFFile { private: StringRef Buf; + ELFFile(StringRef Object); + public: const Elf_Ehdr *getHeader() const { return reinterpret_cast(base()); @@ -102,8 +104,6 @@ class ELFFile { Expected> getSHNDXTable(const Elf_Shdr &Section, Elf_Shdr_Range Sections) const; - void VerifyStrTab(const Elf_Shdr *sh) const; - StringRef getRelocationTypeName(uint32_t Type) const; void getRelocationTypeName(uint32_t Type, SmallVectorImpl &Result) const; @@ -112,7 +112,7 @@ class ELFFile { Expected getRelocationSymbol(const Elf_Rel *Rel, const Elf_Shdr *SymTab) const; - ELFFile(StringRef Object); + static Expected create(StringRef Object); bool isMipsELF64() const { return getHeader()->e_machine == ELF::EM_MIPS && @@ -140,10 +140,16 @@ class ELFFile { return getSectionContentsAsArray(Sec); } + Expected> android_relas(const Elf_Shdr *Sec) const; + /// \brief Iterate over program header table. Expected program_headers() const { if (getHeader()->e_phnum && getHeader()->e_phentsize != sizeof(Elf_Phdr)) return createError("invalid e_phentsize"); + if (getHeader()->e_phoff + + (getHeader()->e_phnum * getHeader()->e_phentsize) > + getBufSize()) + return createError("program headers longer than binary"); auto *Begin = reinterpret_cast(base() + getHeader()->e_phoff); return makeArrayRef(Begin, Begin + getHeader()->e_phnum); @@ -341,9 +347,13 @@ ELFFile::getSectionStringTable(Elf_Shdr_Range Sections) const { return getStringTable(&Sections[Index]); } +template ELFFile::ELFFile(StringRef Object) : Buf(Object) {} + template -ELFFile::ELFFile(StringRef Object) : Buf(Object) { - assert(sizeof(Elf_Ehdr) <= Buf.size() && "Invalid buffer"); +Expected> ELFFile::create(StringRef Object) { + if (sizeof(Elf_Ehdr) > Object.size()) + return createError("Invalid buffer"); + return ELFFile(Object); } template diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h index f436e4534ce16..905ce450f7f17 100644 --- a/include/llvm/Object/ELFObjectFile.h +++ b/include/llvm/Object/ELFObjectFile.h @@ -33,7 +33,6 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" #include #include @@ -61,7 +60,7 @@ class ELFObjectFileBase : public ObjectFile { virtual uint64_t getSectionFlags(DataRefImpl Sec) const = 0; virtual uint64_t getSectionOffset(DataRefImpl Sec) const = 0; - virtual ErrorOr getRelocationAddend(DataRefImpl Rel) const = 0; + virtual Expected getRelocationAddend(DataRefImpl Rel) const = 0; public: using elf_symbol_iterator_range = iterator_range; @@ -167,7 +166,7 @@ class ELFRelocationRef : public RelocationRef { return cast(RelocationRef::getObject()); } - ErrorOr getAddend() const { + Expected getAddend() const { return getObject()->getRelocationAddend(getRawDataRefImpl()); } }; @@ -210,6 +209,11 @@ template class ELFObjectFile : public ELFObjectFileBase { using Elf_Rela = typename ELFFile::Elf_Rela; using Elf_Dyn = typename ELFFile::Elf_Dyn; +private: + ELFObjectFile(MemoryBufferRef Object, ELFFile EF, + const Elf_Shdr *DotDynSymSec, const Elf_Shdr *DotSymtabSec, + ArrayRef ShndxTable); + protected: ELFFile EF; @@ -328,7 +332,8 @@ template class ELFObjectFile : public ELFObjectFileBase { bool isDyldELFObject; public: - ELFObjectFile(MemoryBufferRef Object, std::error_code &EC); + ELFObjectFile(ELFObjectFile &&Other); + static Expected> create(MemoryBufferRef Object); const Elf_Rel *getRel(DataRefImpl Rel) const; const Elf_Rela *getRela(DataRefImpl Rela) const; @@ -353,7 +358,7 @@ template class ELFObjectFile : public ELFObjectFileBase { section_iterator section_begin() const override; section_iterator section_end() const override; - ErrorOr getRelocationAddend(DataRefImpl Rel) const override; + Expected getRelocationAddend(DataRefImpl Rel) const override; uint8_t getBytesInAddress() const override; StringRef getFileFormatName() const override; @@ -816,10 +821,10 @@ void ELFObjectFile::getRelocationTypeName( } template -ErrorOr +Expected ELFObjectFile::getRelocationAddend(DataRefImpl Rel) const { if (getRelSection(Rel)->sh_type != ELF::SHT_RELA) - return object_error::parse_failed; + return createError("Section is not SHT_RELA"); return (int64_t)getRela(Rel)->r_addend; } @@ -844,49 +849,63 @@ ELFObjectFile::getRela(DataRefImpl Rela) const { } template -ELFObjectFile::ELFObjectFile(MemoryBufferRef Object, std::error_code &EC) - : ELFObjectFileBase( - getELFType(ELFT::TargetEndianness == support::little, ELFT::Is64Bits), - Object), - EF(Data.getBuffer()) { +Expected> +ELFObjectFile::create(MemoryBufferRef Object) { + auto EFOrErr = ELFFile::create(Object.getBuffer()); + if (Error E = EFOrErr.takeError()) + return std::move(E); + auto EF = std::move(*EFOrErr); + auto SectionsOrErr = EF.sections(); - if (!SectionsOrErr) { - EC = errorToErrorCode(SectionsOrErr.takeError()); - return; - } + if (!SectionsOrErr) + return SectionsOrErr.takeError(); + + const Elf_Shdr *DotDynSymSec = nullptr; + const Elf_Shdr *DotSymtabSec = nullptr; + ArrayRef ShndxTable; for (const Elf_Shdr &Sec : *SectionsOrErr) { switch (Sec.sh_type) { case ELF::SHT_DYNSYM: { - if (DotDynSymSec) { - // More than one .dynsym! - EC = object_error::parse_failed; - return; - } + if (DotDynSymSec) + return createError("More than one dynamic symbol table!"); DotDynSymSec = &Sec; break; } case ELF::SHT_SYMTAB: { - if (DotSymtabSec) { - // More than one .dynsym! - EC = object_error::parse_failed; - return; - } + if (DotSymtabSec) + return createError("More than one static symbol table!"); DotSymtabSec = &Sec; break; } case ELF::SHT_SYMTAB_SHNDX: { auto TableOrErr = EF.getSHNDXTable(Sec); - if (!TableOrErr) { - EC = errorToErrorCode(TableOrErr.takeError()); - return; - } + if (!TableOrErr) + return TableOrErr.takeError(); ShndxTable = *TableOrErr; break; } } } + return ELFObjectFile(Object, EF, DotDynSymSec, DotSymtabSec, + ShndxTable); } +template +ELFObjectFile::ELFObjectFile(MemoryBufferRef Object, ELFFile EF, + const Elf_Shdr *DotDynSymSec, + const Elf_Shdr *DotSymtabSec, + ArrayRef ShndxTable) + : ELFObjectFileBase( + getELFType(ELFT::TargetEndianness == support::little, ELFT::Is64Bits), + Object), + EF(EF), DotDynSymSec(DotDynSymSec), DotSymtabSec(DotSymtabSec), + ShndxTable(ShndxTable) {} + +template +ELFObjectFile::ELFObjectFile(ELFObjectFile &&Other) + : ELFObjectFile(Other.Data, Other.EF, Other.DotDynSymSec, + Other.DotSymtabSec, Other.ShndxTable) {} + template basic_symbol_iterator ELFObjectFile::symbol_begin() const { DataRefImpl Sym = toDRI(DotSymtabSec, 0); @@ -995,9 +1014,7 @@ StringRef ELFObjectFile::getFileFormatName() const { case ELF::EM_WEBASSEMBLY: return "ELF64-wasm"; case ELF::EM_AMDGPU: - return (EF.getHeader()->e_ident[ELF::EI_OSABI] == ELF::ELFOSABI_AMDGPU_HSA - && IsLittleEndian) ? - "ELF64-amdgpu-hsacobj" : "ELF64-amdgpu"; + return "ELF64-amdgpu"; case ELF::EM_BPF: return "ELF64-BPF"; default: @@ -1065,11 +1082,20 @@ unsigned ELFObjectFile::getArch() const { default: return Triple::UnknownArch; } - case ELF::EM_AMDGPU: - return (EF.getHeader()->e_ident[ELF::EI_CLASS] == ELF::ELFCLASS64 - && EF.getHeader()->e_ident[ELF::EI_OSABI] == ELF::ELFOSABI_AMDGPU_HSA - && IsLittleEndian) ? - Triple::amdgcn : Triple::UnknownArch; + case ELF::EM_AMDGPU: { + if (!IsLittleEndian) + return Triple::UnknownArch; + + unsigned EFlags = EF.getHeader()->e_flags; + switch (EFlags & ELF::EF_AMDGPU_ARCH) { + case ELF::EF_AMDGPU_ARCH_R600: + return Triple::r600; + case ELF::EF_AMDGPU_ARCH_GCN: + return Triple::amdgcn; + default: + return Triple::UnknownArch; + } + } case ELF::EM_BPF: return IsLittleEndian ? Triple::bpfel : Triple::bpfeb; diff --git a/include/llvm/Object/IRObjectFile.h b/include/llvm/Object/IRObjectFile.h index 9a696bffd1f0b..6c271b1a1f44c 100644 --- a/include/llvm/Object/IRObjectFile.h +++ b/include/llvm/Object/IRObjectFile.h @@ -52,12 +52,12 @@ class IRObjectFile : public SymbolicFile { /// \brief Finds and returns bitcode embedded in the given object file, or an /// error code if not found. - static ErrorOr findBitcodeInObject(const ObjectFile &Obj); + static Expected findBitcodeInObject(const ObjectFile &Obj); /// \brief Finds and returns bitcode in the given memory buffer (which may /// be either a bitcode file or a native object file with embedded bitcode), /// or an error code if not found. - static ErrorOr + static Expected findBitcodeInMemBuffer(MemoryBufferRef Object); static Expected> create(MemoryBufferRef Object, diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index 57496373973c8..c7943512f0cf2 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -23,7 +23,6 @@ #include "llvm/Object/SymbolicFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" -#include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include @@ -322,10 +321,10 @@ class ObjectFile : public SymbolicFile { return v->isObject(); } - static ErrorOr> + static Expected> createCOFFObjectFile(MemoryBufferRef Object); - static ErrorOr> + static Expected> createELFObjectFile(MemoryBufferRef Object); static Expected> diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h index 35bfc24523644..c1e2a82c9f880 100644 --- a/include/llvm/Object/RelocVisitor.h +++ b/include/llvm/Object/RelocVisitor.h @@ -25,7 +25,6 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ErrorOr.h" #include #include @@ -115,9 +114,10 @@ class RelocVisitor { } int64_t getELFAddend(RelocationRef R) { - ErrorOr AddendOrErr = ELFRelocationRef(R).getAddend(); - if (std::error_code EC = AddendOrErr.getError()) - report_fatal_error(EC.message()); + Expected AddendOrErr = ELFRelocationRef(R).getAddend(); + handleAllErrors(AddendOrErr.takeError(), [](const ErrorInfoBase &EI) { + report_fatal_error(EI.message()); + }); return *AddendOrErr; } diff --git a/include/llvm/ObjectYAML/WasmYAML.h b/include/llvm/ObjectYAML/WasmYAML.h index 171f823a27da4..d26faa1486231 100644 --- a/include/llvm/ObjectYAML/WasmYAML.h +++ b/include/llvm/ObjectYAML/WasmYAML.h @@ -109,6 +109,13 @@ struct NameEntry { StringRef Name; }; +struct SegmentInfo { + uint32_t Index; + StringRef Name; + uint32_t Alignment; + uint32_t Flags; +}; + struct Signature { uint32_t Index; SignatureForm Form = wasm::WASM_TYPE_FUNC; @@ -161,9 +168,8 @@ struct LinkingSection : CustomSection { } uint32_t DataSize; - uint32_t DataAlignment; std::vector SymbolInfos; - std::vector SegmentNames; + std::vector SegmentInfos; }; struct TypeSection : Section { @@ -298,6 +304,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Function) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::LocalDecl) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Relocation) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::NameEntry) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SegmentInfo) LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo) namespace llvm { @@ -355,6 +362,10 @@ template <> struct MappingTraits { static void mapping(IO &IO, WasmYAML::NameEntry &NameEntry); }; +template <> struct MappingTraits { + static void mapping(IO &IO, WasmYAML::SegmentInfo &SegmentInfo); +}; + template <> struct MappingTraits { static void mapping(IO &IO, WasmYAML::LocalDecl &LocalDecl); }; diff --git a/include/llvm/ProfileData/Coverage/CoverageMapping.h b/include/llvm/ProfileData/Coverage/CoverageMapping.h index 22286f6d46882..5a4098cf666c4 100644 --- a/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -594,6 +594,89 @@ class CoverageMapping { getInstantiationGroups(StringRef Filename) const; }; +/// Coverage statistics for a single line. +class LineCoverageStats { + uint64_t ExecutionCount; + bool HasMultipleRegions; + bool Mapped; + unsigned Line; + ArrayRef LineSegments; + const CoverageSegment *WrappedSegment; + + friend class LineCoverageIterator; + LineCoverageStats() = default; + +public: + LineCoverageStats(ArrayRef LineSegments, + const CoverageSegment *WrappedSegment, unsigned Line); + + uint64_t getExecutionCount() const { return ExecutionCount; } + + bool hasMultipleRegions() const { return HasMultipleRegions; } + + bool isMapped() const { return Mapped; } + + unsigned getLine() const { return Line; } + + ArrayRef getLineSegments() const { + return LineSegments; + } + + const CoverageSegment *getWrappedSegment() const { return WrappedSegment; } +}; + +/// An iterator over the \c LineCoverageStats objects for lines described by +/// a \c CoverageData instance. +class LineCoverageIterator + : public iterator_facade_base< + LineCoverageIterator, std::forward_iterator_tag, LineCoverageStats> { +public: + LineCoverageIterator(const CoverageData &CD) + : LineCoverageIterator(CD, CD.begin()->Line) {} + + LineCoverageIterator(const CoverageData &CD, unsigned Line) + : CD(CD), WrappedSegment(nullptr), Next(CD.begin()), Ended(false), + Line(Line), Segments(), Stats() { + this->operator++(); + } + + LineCoverageIterator &operator=(const LineCoverageIterator &R) = default; + + bool operator==(const LineCoverageIterator &R) const { + return &CD == &R.CD && Next == R.Next && Ended == R.Ended; + } + + const LineCoverageStats &operator*() const { return Stats; } + + LineCoverageStats &operator*() { return Stats; } + + LineCoverageIterator &operator++(); + + LineCoverageIterator getEnd() const { + auto EndIt = *this; + EndIt.Next = CD.end(); + EndIt.Ended = true; + return EndIt; + } + +private: + const CoverageData &CD; + const CoverageSegment *WrappedSegment; + std::vector::const_iterator Next; + bool Ended; + unsigned Line; + SmallVector Segments; + LineCoverageStats Stats; +}; + +/// Get a \c LineCoverageIterator range for the lines described by \p CD. +static inline iterator_range +getLineCoverageStats(const coverage::CoverageData &CD) { + auto Begin = LineCoverageIterator(CD); + auto End = Begin.getEnd(); + return make_range(Begin, End); +} + // Profile coverage map has the following layout: // [CoverageMapFileHeader] // [ArrayStart] diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index 424360e0f7655..aa58ead1eda19 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -397,6 +397,8 @@ class IndexedInstrProfReader : public InstrProfReader { std::unique_ptr Index; /// Profile summary data. std::unique_ptr Summary; + // Index to the current record in the record array. + unsigned RecordIndex; // Read the profile summary. Return a pointer pointing to one byte past the // end of the summary data if it exists or the input \c Cur. @@ -405,7 +407,7 @@ class IndexedInstrProfReader : public InstrProfReader { public: IndexedInstrProfReader(std::unique_ptr DataBuffer) - : DataBuffer(std::move(DataBuffer)) {} + : DataBuffer(std::move(DataBuffer)), RecordIndex(0) {} IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h index 7fc258831be88..3aba12344d024 100644 --- a/include/llvm/ProfileData/SampleProf.h +++ b/include/llvm/ProfileData/SampleProf.h @@ -296,10 +296,33 @@ class FunctionSamples { /// Return the total number of samples collected inside the function. uint64_t getTotalSamples() const { return TotalSamples; } - /// Return the total number of samples collected at the head of the - /// function. + /// Return the total number of branch samples that have the function as the + /// branch target. This should be equivalent to the sample of the first + /// instruction of the symbol. But as we directly get this info for raw + /// profile without referring to potentially inaccurate debug info, this + /// gives more accurate profile data and is preferred for standalone symbols. uint64_t getHeadSamples() const { return TotalHeadSamples; } + /// Return the sample count of the first instruction of the function. + /// The function can be either a standalone symbol or an inlined function. + uint64_t getEntrySamples() const { + // Use either BodySamples or CallsiteSamples which ever has the smaller + // lineno. + if (!BodySamples.empty() && + (CallsiteSamples.empty() || + BodySamples.begin()->first < CallsiteSamples.begin()->first)) + return BodySamples.begin()->second.getSamples(); + if (!CallsiteSamples.empty()) { + uint64_t T = 0; + // An indirect callsite may be promoted to several inlined direct calls. + // We need to get the sum of them. + for (const auto &N_FS : CallsiteSamples.begin()->second) + T += N_FS.second.getEntrySamples(); + return T; + } + return 0; + } + /// Return all the samples collected in the body of the function. const BodySampleMap &getBodySamples() const { return BodySamples; } @@ -331,7 +354,8 @@ class FunctionSamples { /// Recursively traverses all children, if the corresponding function is /// not defined in module \p M, and its total sample is no less than - /// \p Threshold, add its corresponding GUID to \p S. + /// \p Threshold, add its corresponding GUID to \p S. Also traverse the + /// BodySamples to add hot CallTarget's GUID to \p S. void findImportedFunctions(DenseSet &S, const Module *M, uint64_t Threshold) const { if (TotalSamples <= Threshold) @@ -339,7 +363,16 @@ class FunctionSamples { Function *F = M->getFunction(Name); if (!F || !F->getSubprogram()) S.insert(Function::getGUID(Name)); - for (auto CS : CallsiteSamples) + // Import hot CallTargets, which may not be available in IR because full + // profile annotation cannot be done until backend compilation in ThinLTO. + for (const auto &BS : BodySamples) + for (const auto &TS : BS.second.getCallTargets()) + if (TS.getValue() > Threshold) { + Function *Callee = M->getFunction(TS.getKey()); + if (!Callee || !Callee->getSubprogram()) + S.insert(Function::getGUID(TS.getKey())); + } + for (const auto &CS : CallsiteSamples) for (const auto &NameFS : CS.second) NameFS.second.findImportedFunctions(S, M, Threshold); } diff --git a/include/llvm/Support/AMDGPUKernelDescriptor.h b/include/llvm/Support/AMDGPUKernelDescriptor.h new file mode 100644 index 0000000000000..ce2c0c1c959eb --- /dev/null +++ b/include/llvm/Support/AMDGPUKernelDescriptor.h @@ -0,0 +1,139 @@ +//===--- AMDGPUKernelDescriptor.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief AMDGPU kernel descriptor definitions. For more information, visit +/// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor-for-gfx6-gfx9 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_AMDGPUKERNELDESCRIPTOR_H +#define LLVM_SUPPORT_AMDGPUKERNELDESCRIPTOR_H + +#include + +// Creates enumeration entries used for packing bits into integers. Enumeration +// entries include bit shift amount, bit width, and bit mask. +#define AMDGPU_BITS_ENUM_ENTRY(name, shift, width) \ + name ## _SHIFT = (shift), \ + name ## _WIDTH = (width), \ + name = (((1 << (width)) - 1) << (shift)) \ + +// Gets bits for specified bit mask from specified source. +#define AMDGPU_BITS_GET(src, mask) \ + ((src & mask) >> mask ## _SHIFT) \ + +// Sets bits for specified bit mask in specified destination. +#define AMDGPU_BITS_SET(dst, mask, val) \ + dst &= (~(1 << mask ## _SHIFT) & ~mask); \ + dst |= (((val) << mask ## _SHIFT) & mask) \ + +namespace llvm { +namespace AMDGPU { +namespace HSAKD { + +/// \brief Floating point rounding modes. +enum : uint8_t { + AMDGPU_FLOAT_ROUND_MODE_NEAR_EVEN = 0, + AMDGPU_FLOAT_ROUND_MODE_PLUS_INFINITY = 1, + AMDGPU_FLOAT_ROUND_MODE_MINUS_INFINITY = 2, + AMDGPU_FLOAT_ROUND_MODE_ZERO = 3, +}; + +/// \brief Floating point denorm modes. +enum : uint8_t { + AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0, + AMDGPU_FLOAT_DENORM_MODE_FLUSH_DST = 1, + AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC = 2, + AMDGPU_FLOAT_DENORM_MODE_FLUSH_NONE = 3, +}; + +/// \brief System VGPR workitem IDs. +enum : uint8_t { + AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X = 0, + AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y = 1, + AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z = 2, + AMDGPU_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3, +}; + +/// \brief Compute program resource register one layout. +enum ComputePgmRsrc1 { + AMDGPU_BITS_ENUM_ENTRY(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6), + AMDGPU_BITS_ENUM_ENTRY(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4), + AMDGPU_BITS_ENUM_ENTRY(PRIORITY, 10, 2), + AMDGPU_BITS_ENUM_ENTRY(FLOAT_ROUND_MODE_32, 12, 2), + AMDGPU_BITS_ENUM_ENTRY(FLOAT_ROUND_MODE_16_64, 14, 2), + AMDGPU_BITS_ENUM_ENTRY(FLOAT_DENORM_MODE_32, 16, 2), + AMDGPU_BITS_ENUM_ENTRY(FLOAT_DENORM_MODE_16_64, 18, 2), + AMDGPU_BITS_ENUM_ENTRY(PRIV, 20, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_DX10_CLAMP, 21, 1), + AMDGPU_BITS_ENUM_ENTRY(DEBUG_MODE, 22, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_IEEE_MODE, 23, 1), + AMDGPU_BITS_ENUM_ENTRY(BULKY, 24, 1), + AMDGPU_BITS_ENUM_ENTRY(CDBG_USER, 25, 1), + AMDGPU_BITS_ENUM_ENTRY(FP16_OVFL, 26, 1), + AMDGPU_BITS_ENUM_ENTRY(RESERVED0, 27, 5), +}; + +/// \brief Compute program resource register two layout. +enum ComputePgmRsrc2 { + AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_OFFSET, 0, 1), + AMDGPU_BITS_ENUM_ENTRY(USER_SGPR_COUNT, 1, 5), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_TRAP_HANDLER, 6, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_INFO, 10, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_VGPR_WORKITEM_ID, 11, 2), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_MEMORY, 14, 1), + AMDGPU_BITS_ENUM_ENTRY(GRANULATED_LDS_SIZE, 15, 9), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1), + AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1), + AMDGPU_BITS_ENUM_ENTRY(RESERVED1, 31, 1), +}; + +/// \brief Kernel descriptor layout. This layout should be kept backwards +/// compatible as it is consumed by the command processor. +struct KernelDescriptor final { + uint32_t GroupSegmentFixedSize; + uint32_t PrivateSegmentFixedSize; + uint32_t MaxFlatWorkGroupSize; + uint64_t IsDynamicCallStack : 1; + uint64_t IsXNACKEnabled : 1; + uint64_t Reserved0 : 30; + int64_t KernelCodeEntryByteOffset; + uint64_t Reserved1[3]; + uint32_t ComputePgmRsrc1; + uint32_t ComputePgmRsrc2; + uint64_t EnableSGPRPrivateSegmentBuffer : 1; + uint64_t EnableSGPRDispatchPtr : 1; + uint64_t EnableSGPRQueuePtr : 1; + uint64_t EnableSGPRKernargSegmentPtr : 1; + uint64_t EnableSGPRDispatchID : 1; + uint64_t EnableSGPRFlatScratchInit : 1; + uint64_t EnableSGPRPrivateSegmentSize : 1; + uint64_t EnableSGPRGridWorkgroupCountX : 1; + uint64_t EnableSGPRGridWorkgroupCountY : 1; + uint64_t EnableSGPRGridWorkgroupCountZ : 1; + uint64_t Reserved2 : 54; + + KernelDescriptor() = default; +}; + +} // end namespace HSAKD +} // end namespace AMDGPU +} // end namespace llvm + +#endif // LLVM_SUPPORT_AMDGPUKERNELDESCRIPTOR_H diff --git a/include/llvm/Support/AMDGPUCodeObjectMetadata.h b/include/llvm/Support/AMDGPUMetadata.h similarity index 70% rename from include/llvm/Support/AMDGPUCodeObjectMetadata.h rename to include/llvm/Support/AMDGPUMetadata.h index d274c5ee91842..0e26a4a90838b 100644 --- a/include/llvm/Support/AMDGPUCodeObjectMetadata.h +++ b/include/llvm/Support/AMDGPUMetadata.h @@ -1,4 +1,4 @@ -//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===// +//===--- AMDGPUMetadata.h ---------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,14 +8,13 @@ //===----------------------------------------------------------------------===// // /// \file -/// \brief AMDGPU Code Object Metadata definitions and in-memory -/// representations. +/// \brief AMDGPU metadata definitions and in-memory representations. /// // //===----------------------------------------------------------------------===// -#ifndef LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H -#define LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H +#ifndef LLVM_SUPPORT_AMDGPUMETADATA_H +#define LLVM_SUPPORT_AMDGPUMETADATA_H #include #include @@ -26,21 +25,19 @@ namespace llvm { namespace AMDGPU { //===----------------------------------------------------------------------===// -// Code Object Metadata. +// HSA metadata. //===----------------------------------------------------------------------===// -namespace CodeObject { +namespace HSAMD { -/// \brief Code object metadata major version. -constexpr uint32_t MetadataVersionMajor = 1; -/// \brief Code object metadata minor version. -constexpr uint32_t MetadataVersionMinor = 0; +/// \brief HSA metadata major version. +constexpr uint32_t VersionMajor = 1; +/// \brief HSA metadata minor version. +constexpr uint32_t VersionMinor = 0; -/// \brief Code object metadata beginning assembler directive. -constexpr char MetadataAssemblerDirectiveBegin[] = - ".amdgpu_code_object_metadata"; -/// \brief Code object metadata ending assembler directive. -constexpr char MetadataAssemblerDirectiveEnd[] = - ".end_amdgpu_code_object_metadata"; +/// \brief HSA metadata beginning assembler directive. +constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata"; +/// \brief HSA metadata ending assembler directive. +constexpr char AssemblerDirectiveEnd[] = ".end_amd_amdgpu_hsa_metadata"; /// \brief Access qualifiers. enum class AccessQualifier : uint8_t { @@ -115,6 +112,8 @@ constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize"; constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint"; /// \brief Key for Kernel::Attr::Metadata::mVecTypeHint. constexpr char VecTypeHint[] = "VecTypeHint"; +/// \brief Key for Kernel::Attr::Metadata::mRuntimeHandle. +constexpr char RuntimeHandle[] = "RuntimeHandle"; } // end namespace Key /// \brief In-memory representation of kernel attributes metadata. @@ -125,20 +124,22 @@ struct Metadata final { std::vector mWorkGroupSizeHint = std::vector(); /// \brief 'vec_type_hint' attribute. Optional. std::string mVecTypeHint = std::string(); + /// \brief External symbol created by runtime to store the kernel address + /// for enqueued blocks. + std::string mRuntimeHandle = std::string(); /// \brief Default constructor. Metadata() = default; /// \returns True if kernel attributes metadata is empty, false otherwise. bool empty() const { - return mReqdWorkGroupSize.empty() && - mWorkGroupSizeHint.empty() && - mVecTypeHint.empty(); + return !notEmpty(); } /// \returns True if kernel attributes metadata is not empty, false otherwise. bool notEmpty() const { - return !empty(); + return !mReqdWorkGroupSize.empty() || !mWorkGroupSizeHint.empty() || + !mVecTypeHint.empty() || !mRuntimeHandle.empty(); } }; @@ -150,6 +151,10 @@ struct Metadata final { namespace Arg { namespace Key { +/// \brief Key for Kernel::Arg::Metadata::mName. +constexpr char Name[] = "Name"; +/// \brief Key for Kernel::Arg::Metadata::mTypeName. +constexpr char TypeName[] = "TypeName"; /// \brief Key for Kernel::Arg::Metadata::mSize. constexpr char Size[] = "Size"; /// \brief Key for Kernel::Arg::Metadata::mAlign. @@ -160,26 +165,28 @@ constexpr char ValueKind[] = "ValueKind"; constexpr char ValueType[] = "ValueType"; /// \brief Key for Kernel::Arg::Metadata::mPointeeAlign. constexpr char PointeeAlign[] = "PointeeAlign"; -/// \brief Key for Kernel::Arg::Metadata::mAccQual. -constexpr char AccQual[] = "AccQual"; /// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual. constexpr char AddrSpaceQual[] = "AddrSpaceQual"; +/// \brief Key for Kernel::Arg::Metadata::mAccQual. +constexpr char AccQual[] = "AccQual"; +/// \brief Key for Kernel::Arg::Metadata::mActualAccQual. +constexpr char ActualAccQual[] = "ActualAccQual"; /// \brief Key for Kernel::Arg::Metadata::mIsConst. constexpr char IsConst[] = "IsConst"; -/// \brief Key for Kernel::Arg::Metadata::mIsPipe. -constexpr char IsPipe[] = "IsPipe"; /// \brief Key for Kernel::Arg::Metadata::mIsRestrict. constexpr char IsRestrict[] = "IsRestrict"; /// \brief Key for Kernel::Arg::Metadata::mIsVolatile. constexpr char IsVolatile[] = "IsVolatile"; -/// \brief Key for Kernel::Arg::Metadata::mName. -constexpr char Name[] = "Name"; -/// \brief Key for Kernel::Arg::Metadata::mTypeName. -constexpr char TypeName[] = "TypeName"; +/// \brief Key for Kernel::Arg::Metadata::mIsPipe. +constexpr char IsPipe[] = "IsPipe"; } // end namespace Key /// \brief In-memory representation of kernel argument metadata. struct Metadata final { + /// \brief Name. Optional. + std::string mName = std::string(); + /// \brief Type name. Optional. + std::string mTypeName = std::string(); /// \brief Size in bytes. Required. uint32_t mSize = 0; /// \brief Alignment in bytes. Required. @@ -190,22 +197,20 @@ struct Metadata final { ValueType mValueType = ValueType::Unknown; /// \brief Pointee alignment in bytes. Optional. uint32_t mPointeeAlign = 0; - /// \brief Access qualifier. Optional. - AccessQualifier mAccQual = AccessQualifier::Unknown; /// \brief Address space qualifier. Optional. AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown; + /// \brief Access qualifier. Optional. + AccessQualifier mAccQual = AccessQualifier::Unknown; + /// \brief Actual access qualifier. Optional. + AccessQualifier mActualAccQual = AccessQualifier::Unknown; /// \brief True if 'const' qualifier is specified. Optional. bool mIsConst = false; - /// \brief True if 'pipe' qualifier is specified. Optional. - bool mIsPipe = false; /// \brief True if 'restrict' qualifier is specified. Optional. bool mIsRestrict = false; /// \brief True if 'volatile' qualifier is specified. Optional. bool mIsVolatile = false; - /// \brief Name. Optional. - std::string mName = std::string(); - /// \brief Type name. Optional. - std::string mTypeName = std::string(); + /// \brief True if 'pipe' qualifier is specified. Optional. + bool mIsPipe = false; /// \brief Default constructor. Metadata() = default; @@ -221,51 +226,55 @@ namespace CodeProps { namespace Key { /// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentSize. constexpr char KernargSegmentSize[] = "KernargSegmentSize"; -/// \brief Key for Kernel::CodeProps::Metadata::mWorkgroupGroupSegmentSize. -constexpr char WorkgroupGroupSegmentSize[] = "WorkgroupGroupSegmentSize"; -/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemPrivateSegmentSize. -constexpr char WorkitemPrivateSegmentSize[] = "WorkitemPrivateSegmentSize"; -/// \brief Key for Kernel::CodeProps::Metadata::mWavefrontNumSGPRs. -constexpr char WavefrontNumSGPRs[] = "WavefrontNumSGPRs"; -/// \brief Key for Kernel::CodeProps::Metadata::mWorkitemNumVGPRs. -constexpr char WorkitemNumVGPRs[] = "WorkitemNumVGPRs"; +/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentFixedSize. +constexpr char GroupSegmentFixedSize[] = "GroupSegmentFixedSize"; +/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentFixedSize. +constexpr char PrivateSegmentFixedSize[] = "PrivateSegmentFixedSize"; /// \brief Key for Kernel::CodeProps::Metadata::mKernargSegmentAlign. constexpr char KernargSegmentAlign[] = "KernargSegmentAlign"; -/// \brief Key for Kernel::CodeProps::Metadata::mGroupSegmentAlign. -constexpr char GroupSegmentAlign[] = "GroupSegmentAlign"; -/// \brief Key for Kernel::CodeProps::Metadata::mPrivateSegmentAlign. -constexpr char PrivateSegmentAlign[] = "PrivateSegmentAlign"; /// \brief Key for Kernel::CodeProps::Metadata::mWavefrontSize. constexpr char WavefrontSize[] = "WavefrontSize"; +/// \brief Key for Kernel::CodeProps::Metadata::mNumSGPRs. +constexpr char NumSGPRs[] = "NumSGPRs"; +/// \brief Key for Kernel::CodeProps::Metadata::mNumVGPRs. +constexpr char NumVGPRs[] = "NumVGPRs"; +/// \brief Key for Kernel::CodeProps::Metadata::mMaxFlatWorkGroupSize. +constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize"; +/// \brief Key for Kernel::CodeProps::Metadata::mIsDynamicCallStack. +constexpr char IsDynamicCallStack[] = "IsDynamicCallStack"; +/// \brief Key for Kernel::CodeProps::Metadata::mIsXNACKEnabled. +constexpr char IsXNACKEnabled[] = "IsXNACKEnabled"; } // end namespace Key /// \brief In-memory representation of kernel code properties metadata. struct Metadata final { /// \brief Size in bytes of the kernarg segment memory. Kernarg segment memory - /// holds the values of the arguments to the kernel. Optional. + /// holds the values of the arguments to the kernel. Required. uint64_t mKernargSegmentSize = 0; /// \brief Size in bytes of the group segment memory required by a workgroup. /// This value does not include any dynamically allocated group segment memory - /// that may be added when the kernel is dispatched. Optional. - uint32_t mWorkgroupGroupSegmentSize = 0; + /// that may be added when the kernel is dispatched. Required. + uint32_t mGroupSegmentFixedSize = 0; /// \brief Size in bytes of the private segment memory required by a workitem. - /// Private segment memory includes arg, spill and private segments. Optional. - uint32_t mWorkitemPrivateSegmentSize = 0; + /// Private segment memory includes arg, spill and private segments. Required. + uint32_t mPrivateSegmentFixedSize = 0; + /// \brief Maximum byte alignment of variables used by the kernel in the + /// kernarg memory segment. Required. + uint32_t mKernargSegmentAlign = 0; + /// \brief Wavefront size. Required. + uint32_t mWavefrontSize = 0; /// \brief Total number of SGPRs used by a wavefront. Optional. - uint16_t mWavefrontNumSGPRs = 0; + uint16_t mNumSGPRs = 0; /// \brief Total number of VGPRs used by a workitem. Optional. - uint16_t mWorkitemNumVGPRs = 0; - /// \brief Maximum byte alignment of variables used by the kernel in the - /// kernarg memory segment. Expressed as a power of two. Optional. - uint8_t mKernargSegmentAlign = 0; - /// \brief Maximum byte alignment of variables used by the kernel in the - /// group memory segment. Expressed as a power of two. Optional. - uint8_t mGroupSegmentAlign = 0; - /// \brief Maximum byte alignment of variables used by the kernel in the - /// private memory segment. Expressed as a power of two. Optional. - uint8_t mPrivateSegmentAlign = 0; - /// \brief Wavefront size. Expressed as a power of two. Optional. - uint8_t mWavefrontSize = 0; + uint16_t mNumVGPRs = 0; + /// \brief Maximum flat work-group size supported by the kernel. Optional. + uint32_t mMaxFlatWorkGroupSize = 0; + /// \brief True if the generated machine code is using a dynamically sized + /// call stack. Optional. + bool mIsDynamicCallStack = false; + /// \brief True if the generated machine code is capable of supporting XNACK. + /// Optional. + bool mIsXNACKEnabled = false; /// \brief Default constructor. Metadata() = default; @@ -279,10 +288,7 @@ struct Metadata final { /// \returns True if kernel code properties metadata is not empty, false /// otherwise. bool notEmpty() const { - return mKernargSegmentSize || mWorkgroupGroupSegmentSize || - mWorkitemPrivateSegmentSize || mWavefrontNumSGPRs || - mWorkitemNumVGPRs || mKernargSegmentAlign || mGroupSegmentAlign || - mPrivateSegmentAlign || mWavefrontSize; + return true; } }; @@ -348,6 +354,8 @@ struct Metadata final { namespace Key { /// \brief Key for Kernel::Metadata::mName. constexpr char Name[] = "Name"; +/// \brief Key for Kernel::Metadata::mSymbolName. +constexpr char SymbolName[] = "SymbolName"; /// \brief Key for Kernel::Metadata::mLanguage. constexpr char Language[] = "Language"; /// \brief Key for Kernel::Metadata::mLanguageVersion. @@ -364,8 +372,10 @@ constexpr char DebugProps[] = "DebugProps"; /// \brief In-memory representation of kernel metadata. struct Metadata final { - /// \brief Name. Required. + /// \brief Kernel source name. Required. std::string mName = std::string(); + /// \brief Kernel descriptor name. Required. + std::string mSymbolName = std::string(); /// \brief Language. Optional. std::string mLanguage = std::string(); /// \brief Language version. Optional. @@ -386,37 +396,78 @@ struct Metadata final { } // end namespace Kernel namespace Key { -/// \brief Key for CodeObject::Metadata::mVersion. +/// \brief Key for HSA::Metadata::mVersion. constexpr char Version[] = "Version"; -/// \brief Key for CodeObject::Metadata::mPrintf. +/// \brief Key for HSA::Metadata::mPrintf. constexpr char Printf[] = "Printf"; -/// \brief Key for CodeObject::Metadata::mKernels. +/// \brief Key for HSA::Metadata::mKernels. constexpr char Kernels[] = "Kernels"; } // end namespace Key -/// \brief In-memory representation of code object metadata. +/// \brief In-memory representation of HSA metadata. struct Metadata final { - /// \brief Code object metadata version. Required. + /// \brief HSA metadata version. Required. std::vector mVersion = std::vector(); /// \brief Printf metadata. Optional. std::vector mPrintf = std::vector(); - /// \brief Kernels metadata. Optional. + /// \brief Kernels metadata. Required. std::vector mKernels = std::vector(); /// \brief Default constructor. Metadata() = default; +}; + +/// \brief Converts \p String to \p HSAMetadata. +std::error_code fromString(std::string String, Metadata &HSAMetadata); - /// \brief Converts \p YamlString to \p CodeObjectMetadata. - static std::error_code fromYamlString(std::string YamlString, - Metadata &CodeObjectMetadata); +/// \brief Converts \p HSAMetadata to \p String. +std::error_code toString(Metadata HSAMetadata, std::string &String); - /// \brief Converts \p CodeObjectMetadata to \p YamlString. - static std::error_code toYamlString(Metadata CodeObjectMetadata, - std::string &YamlString); +} // end namespace HSAMD + +//===----------------------------------------------------------------------===// +// PAL metadata. +//===----------------------------------------------------------------------===// +namespace PALMD { + +/// \brief PAL metadata assembler directive. +constexpr char AssemblerDirective[] = ".amd_amdgpu_pal_metadata"; + +/// \brief PAL metadata keys. +enum Key : uint32_t { + LS_NUM_USED_VGPRS = 0x10000015, + HS_NUM_USED_VGPRS = 0x10000016, + ES_NUM_USED_VGPRS = 0x10000017, + GS_NUM_USED_VGPRS = 0x10000018, + VS_NUM_USED_VGPRS = 0x10000019, + PS_NUM_USED_VGPRS = 0x1000001a, + CS_NUM_USED_VGPRS = 0x1000001b, + + LS_NUM_USED_SGPRS = 0x1000001c, + HS_NUM_USED_SGPRS = 0x1000001d, + ES_NUM_USED_SGPRS = 0x1000001e, + GS_NUM_USED_SGPRS = 0x1000001f, + VS_NUM_USED_SGPRS = 0x10000020, + PS_NUM_USED_SGPRS = 0x10000021, + CS_NUM_USED_SGPRS = 0x10000022, + + LS_SCRATCH_SIZE = 0x10000038, + HS_SCRATCH_SIZE = 0x10000039, + ES_SCRATCH_SIZE = 0x1000003a, + GS_SCRATCH_SIZE = 0x1000003b, + VS_SCRATCH_SIZE = 0x1000003c, + PS_SCRATCH_SIZE = 0x1000003d, + CS_SCRATCH_SIZE = 0x1000003e }; -} // end namespace CodeObject +/// \brief PAL metadata represented as a vector. +typedef std::vector Metadata; + +/// \brief Converts \p PALMetadata to \p String. +std::error_code toString(const Metadata &PALMetadata, std::string &String); + +} // end namespace PALMD } // end namespace AMDGPU } // end namespace llvm -#endif // LLVM_SUPPORT_AMDGPUCODEOBJECTMETADATA_H +#endif // LLVM_SUPPORT_AMDGPUMETADATA_H diff --git a/include/llvm/Support/AtomicOrdering.h b/include/llvm/Support/AtomicOrdering.h index 001804248b85c..e93b755aa63b8 100644 --- a/include/llvm/Support/AtomicOrdering.h +++ b/include/llvm/Support/AtomicOrdering.h @@ -42,7 +42,7 @@ bool operator>=(AtomicOrderingCABI, AtomicOrderingCABI) = delete; // Validate an integral value which isn't known to fit within the enum's range // is a valid AtomicOrderingCABI. -template static inline bool isValidAtomicOrderingCABI(Int I) { +template inline bool isValidAtomicOrderingCABI(Int I) { return (Int)AtomicOrderingCABI::relaxed <= I && I <= (Int)AtomicOrderingCABI::seq_cst; } @@ -72,13 +72,13 @@ bool operator>=(AtomicOrdering, AtomicOrdering) = delete; // Validate an integral value which isn't known to fit within the enum's range // is a valid AtomicOrdering. -template static inline bool isValidAtomicOrdering(Int I) { +template inline bool isValidAtomicOrdering(Int I) { return static_cast(AtomicOrdering::NotAtomic) <= I && I <= static_cast(AtomicOrdering::SequentiallyConsistent); } /// String used by LLVM IR to represent atomic ordering. -static inline const char *toIRString(AtomicOrdering ao) { +inline const char *toIRString(AtomicOrdering ao) { static const char *names[8] = {"not_atomic", "unordered", "monotonic", "consume", "acquire", "release", "acq_rel", "seq_cst"}; @@ -87,7 +87,7 @@ static inline const char *toIRString(AtomicOrdering ao) { /// Returns true if ao is stronger than other as defined by the AtomicOrdering /// lattice, which is based on C++'s definition. -static inline bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other) { +inline bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other) { static const bool lookup[8][8] = { // NA UN RX CO AC RE AR SC /* NotAtomic */ {false, false, false, false, false, false, false, false}, @@ -102,8 +102,7 @@ static inline bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other) { return lookup[static_cast(ao)][static_cast(other)]; } -static inline bool isAtLeastOrStrongerThan(AtomicOrdering ao, - AtomicOrdering other) { +inline bool isAtLeastOrStrongerThan(AtomicOrdering ao, AtomicOrdering other) { static const bool lookup[8][8] = { // NA UN RX CO AC RE AR SC /* NotAtomic */ { true, false, false, false, false, false, false, false}, @@ -118,23 +117,23 @@ static inline bool isAtLeastOrStrongerThan(AtomicOrdering ao, return lookup[static_cast(ao)][static_cast(other)]; } -static inline bool isStrongerThanUnordered(AtomicOrdering ao) { +inline bool isStrongerThanUnordered(AtomicOrdering ao) { return isStrongerThan(ao, AtomicOrdering::Unordered); } -static inline bool isStrongerThanMonotonic(AtomicOrdering ao) { +inline bool isStrongerThanMonotonic(AtomicOrdering ao) { return isStrongerThan(ao, AtomicOrdering::Monotonic); } -static inline bool isAcquireOrStronger(AtomicOrdering ao) { +inline bool isAcquireOrStronger(AtomicOrdering ao) { return isAtLeastOrStrongerThan(ao, AtomicOrdering::Acquire); } -static inline bool isReleaseOrStronger(AtomicOrdering ao) { +inline bool isReleaseOrStronger(AtomicOrdering ao) { return isAtLeastOrStrongerThan(ao, AtomicOrdering::Release); } -static inline AtomicOrderingCABI toCABI(AtomicOrdering ao) { +inline AtomicOrderingCABI toCABI(AtomicOrdering ao) { static const AtomicOrderingCABI lookup[8] = { /* NotAtomic */ AtomicOrderingCABI::relaxed, /* Unordered */ AtomicOrderingCABI::relaxed, diff --git a/include/llvm/Support/Chrono.h b/include/llvm/Support/Chrono.h index 6118ed0476edf..994068af3771b 100644 --- a/include/llvm/Support/Chrono.h +++ b/include/llvm/Support/Chrono.h @@ -51,6 +51,20 @@ toTimePoint(std::time_t T) { raw_ostream &operator<<(raw_ostream &OS, sys::TimePoint<> TP); +/// Format provider for TimePoint<> +/// +/// The options string is a strftime format string, with extensions: +/// - %L is millis: 000-999 +/// - %f is micros: 000000-999999 +/// - %N is nanos: 000000000 - 999999999 +/// +/// If no options are given, the default format is "%Y-%m-%d %H:%M:%S.%N". +template <> +struct format_provider> { + static void format(const sys::TimePoint<> &TP, llvm::raw_ostream &OS, + StringRef Style); +}; + /// Implementation of format_provider for duration types. /// /// The options string of a duration type has the grammar: diff --git a/include/llvm/Support/ConvertUTF.h b/include/llvm/Support/ConvertUTF.h index bd439f3602169..99ae171aeabbb 100644 --- a/include/llvm/Support/ConvertUTF.h +++ b/include/llvm/Support/ConvertUTF.h @@ -242,10 +242,10 @@ bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr); * * \sa ConvertUTF8toUTF32 */ -static inline ConversionResult convertUTF8Sequence(const UTF8 **source, - const UTF8 *sourceEnd, - UTF32 *target, - ConversionFlags flags) { +inline ConversionResult convertUTF8Sequence(const UTF8 **source, + const UTF8 *sourceEnd, + UTF32 *target, + ConversionFlags flags) { if (*source == sourceEnd) return sourceExhausted; unsigned size = getNumBytesForUTF8(**source); diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index e8460ca0a31bb..03015a0ca3bfd 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -141,65 +141,48 @@ class UniqueID { uint64_t getFile() const { return File; } }; -/// file_status - Represents the result of a call to stat and friends. It has -/// a platform-specific member to store the result. -class file_status -{ - friend bool equivalent(file_status A, file_status B); - +/// Represents the result of a call to directory_iterator::status(). This is a +/// subset of the information returned by a regular sys::fs::status() call, and +/// represents the information provided by Windows FileFirstFile/FindNextFile. +class basic_file_status { +protected: #if defined(LLVM_ON_UNIX) - dev_t fs_st_dev = 0; - nlink_t fs_st_nlinks = 0; - ino_t fs_st_ino = 0; time_t fs_st_atime = 0; time_t fs_st_mtime = 0; uid_t fs_st_uid = 0; gid_t fs_st_gid = 0; off_t fs_st_size = 0; #elif defined (LLVM_ON_WIN32) - uint32_t NumLinks = 0; uint32_t LastAccessedTimeHigh = 0; uint32_t LastAccessedTimeLow = 0; uint32_t LastWriteTimeHigh = 0; uint32_t LastWriteTimeLow = 0; - uint32_t VolumeSerialNumber = 0; uint32_t FileSizeHigh = 0; uint32_t FileSizeLow = 0; - uint32_t FileIndexHigh = 0; - uint32_t FileIndexLow = 0; #endif file_type Type = file_type::status_error; perms Perms = perms_not_known; public: - #if defined(LLVM_ON_UNIX) - file_status() = default; - - file_status(file_type Type) : Type(Type) {} - - file_status(file_type Type, perms Perms, dev_t Dev, nlink_t Links, ino_t Ino, - time_t ATime, time_t MTime, uid_t UID, gid_t GID, off_t Size) - : fs_st_dev(Dev), fs_st_nlinks(Links), fs_st_ino(Ino), fs_st_atime(ATime), - fs_st_mtime(MTime), fs_st_uid(UID), fs_st_gid(GID), fs_st_size(Size), - Type(Type), Perms(Perms) {} - #elif defined(LLVM_ON_WIN32) - file_status() = default; + basic_file_status() = default; - file_status(file_type Type) : Type(Type) {} + explicit basic_file_status(file_type Type) : Type(Type) {} - file_status(file_type Type, perms Perms, uint32_t LinkCount, - uint32_t LastAccessTimeHigh, uint32_t LastAccessTimeLow, - uint32_t LastWriteTimeHigh, uint32_t LastWriteTimeLow, - uint32_t VolumeSerialNumber, uint32_t FileSizeHigh, - uint32_t FileSizeLow, uint32_t FileIndexHigh, - uint32_t FileIndexLow) - : NumLinks(LinkCount), LastAccessedTimeHigh(LastAccessTimeHigh), + #if defined(LLVM_ON_UNIX) + basic_file_status(file_type Type, perms Perms, time_t ATime, time_t MTime, + uid_t UID, gid_t GID, off_t Size) + : fs_st_atime(ATime), fs_st_mtime(MTime), fs_st_uid(UID), fs_st_gid(GID), + fs_st_size(Size), Type(Type), Perms(Perms) {} +#elif defined(LLVM_ON_WIN32) + basic_file_status(file_type Type, perms Perms, uint32_t LastAccessTimeHigh, + uint32_t LastAccessTimeLow, uint32_t LastWriteTimeHigh, + uint32_t LastWriteTimeLow, uint32_t FileSizeHigh, + uint32_t FileSizeLow) + : LastAccessedTimeHigh(LastAccessTimeHigh), LastAccessedTimeLow(LastAccessTimeLow), LastWriteTimeHigh(LastWriteTimeHigh), - LastWriteTimeLow(LastWriteTimeLow), - VolumeSerialNumber(VolumeSerialNumber), FileSizeHigh(FileSizeHigh), - FileSizeLow(FileSizeLow), FileIndexHigh(FileIndexHigh), - FileIndexLow(FileIndexLow), Type(Type), Perms(Perms) {} + LastWriteTimeLow(LastWriteTimeLow), FileSizeHigh(FileSizeHigh), + FileSizeLow(FileSizeLow), Type(Type), Perms(Perms) {} #endif // getters @@ -207,8 +190,6 @@ class file_status perms permissions() const { return Perms; } TimePoint<> getLastAccessedTime() const; TimePoint<> getLastModificationTime() const; - UniqueID getUniqueID() const; - uint32_t getLinkCount() const; #if defined(LLVM_ON_UNIX) uint32_t getUser() const { return fs_st_uid; } @@ -233,6 +214,49 @@ class file_status void permissions(perms p) { Perms = p; } }; +/// Represents the result of a call to sys::fs::status(). +class file_status : public basic_file_status { + friend bool equivalent(file_status A, file_status B); + + #if defined(LLVM_ON_UNIX) + dev_t fs_st_dev = 0; + nlink_t fs_st_nlinks = 0; + ino_t fs_st_ino = 0; + #elif defined (LLVM_ON_WIN32) + uint32_t NumLinks = 0; + uint32_t VolumeSerialNumber = 0; + uint32_t FileIndexHigh = 0; + uint32_t FileIndexLow = 0; + #endif + +public: + file_status() = default; + + explicit file_status(file_type Type) : basic_file_status(Type) {} + + #if defined(LLVM_ON_UNIX) + file_status(file_type Type, perms Perms, dev_t Dev, nlink_t Links, ino_t Ino, + time_t ATime, time_t MTime, uid_t UID, gid_t GID, off_t Size) + : basic_file_status(Type, Perms, ATime, MTime, UID, GID, Size), + fs_st_dev(Dev), fs_st_nlinks(Links), fs_st_ino(Ino) {} + #elif defined(LLVM_ON_WIN32) + file_status(file_type Type, perms Perms, uint32_t LinkCount, + uint32_t LastAccessTimeHigh, uint32_t LastAccessTimeLow, + uint32_t LastWriteTimeHigh, uint32_t LastWriteTimeLow, + uint32_t VolumeSerialNumber, uint32_t FileSizeHigh, + uint32_t FileSizeLow, uint32_t FileIndexHigh, + uint32_t FileIndexLow) + : basic_file_status(Type, Perms, LastAccessTimeHigh, LastAccessTimeLow, + LastWriteTimeHigh, LastWriteTimeLow, FileSizeHigh, + FileSizeLow), + NumLinks(LinkCount), VolumeSerialNumber(VolumeSerialNumber), + FileIndexHigh(FileIndexHigh), FileIndexLow(FileIndexLow) {} + #endif + + UniqueID getUniqueID() const; + uint32_t getLinkCount() const; +}; + /// @} /// @name Physical Operators /// @{ @@ -343,7 +367,11 @@ std::error_code remove(const Twine &path, bool IgnoreNonExisting = true); /// platform-specific error code. std::error_code remove_directories(const Twine &path, bool IgnoreErrors = true); -/// @brief Rename \a from to \a to. Files are renamed as if by POSIX rename(). +/// @brief Rename \a from to \a to. +/// +/// Files are renamed as if by POSIX rename(), except that on Windows there may +/// be a short interval of time during which the destination file does not +/// exist. /// /// @param from The path to rename from. /// @param to The path to rename to. This is created. @@ -379,10 +407,10 @@ ErrorOr md5_contents(const Twine &Path); /// @brief Does file exist? /// -/// @param status A file_status previously returned from stat. +/// @param status A basic_file_status previously returned from stat. /// @returns True if the file represented by status exists, false if it does /// not. -bool exists(file_status status); +bool exists(const basic_file_status &status); enum class AccessMode { Exist, Write, Execute }; @@ -481,9 +509,9 @@ file_type get_file_type(const Twine &Path, bool Follow = true); /// @brief Does status represent a directory? /// -/// @param status A file_status previously returned from status. +/// @param status A basic_file_status previously returned from status. /// @returns status.type() == file_type::directory_file. -bool is_directory(file_status status); +bool is_directory(const basic_file_status &status); /// @brief Is path a directory? /// @@ -503,9 +531,9 @@ inline bool is_directory(const Twine &Path) { /// @brief Does status represent a regular file? /// -/// @param status A file_status previously returned from status. +/// @param status A basic_file_status previously returned from status. /// @returns status_known(status) && status.type() == file_type::regular_file. -bool is_regular_file(file_status status); +bool is_regular_file(const basic_file_status &status); /// @brief Is path a regular file? /// @@ -527,9 +555,9 @@ inline bool is_regular_file(const Twine &Path) { /// @brief Does status represent a symlink file? /// -/// @param status A file_status previously returned from status. +/// @param status A basic_file_status previously returned from status. /// @returns status_known(status) && status.type() == file_type::symlink_file. -bool is_symlink_file(file_status status); +bool is_symlink_file(const basic_file_status &status); /// @brief Is path a symlink file? /// @@ -552,9 +580,9 @@ inline bool is_symlink_file(const Twine &Path) { /// @brief Does this status represent something that exists but is not a /// directory or regular file? /// -/// @param status A file_status previously returned from status. +/// @param status A basic_file_status previously returned from status. /// @returns exists(s) && !is_regular_file(s) && !is_directory(s) -bool is_other(file_status status); +bool is_other(const basic_file_status &status); /// @brief Is path something that exists but is not a directory, /// regular file, or symlink? @@ -627,7 +655,7 @@ std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time); /// /// @param s Input file status. /// @returns True if status() != status_error. -bool status_known(file_status s); +bool status_known(const basic_file_status &s); /// @brief Is status available? /// @@ -789,24 +817,25 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr); class directory_entry { std::string Path; bool FollowSymlinks; - mutable file_status Status; + basic_file_status Status; public: explicit directory_entry(const Twine &path, bool follow_symlinks = true, - file_status st = file_status()) + basic_file_status st = basic_file_status()) : Path(path.str()), FollowSymlinks(follow_symlinks), Status(st) {} directory_entry() = default; - void assign(const Twine &path, file_status st = file_status()) { + void assign(const Twine &path, basic_file_status st = basic_file_status()) { Path = path.str(); Status = st; } - void replace_filename(const Twine &filename, file_status st = file_status()); + void replace_filename(const Twine &filename, + basic_file_status st = basic_file_status()); const std::string &path() const { return Path; } - std::error_code status(file_status &result) const; + ErrorOr status() const; bool operator==(const directory_entry& rhs) const { return Path == rhs.Path; } bool operator!=(const directory_entry& rhs) const { return !(*this == rhs); } @@ -925,9 +954,9 @@ class recursive_directory_iterator { if (State->HasNoPushRequest) State->HasNoPushRequest = false; else { - file_status st; - if ((ec = State->Stack.top()->status(st))) return *this; - if (is_directory(st)) { + ErrorOr st = State->Stack.top()->status(); + if (!st) return *this; + if (is_directory(*st)) { State->Stack.push(directory_iterator(*State->Stack.top(), ec, Follow)); if (ec) return *this; if (State->Stack.top() != end_itr) { diff --git a/include/llvm/Support/FormatVariadic.h b/include/llvm/Support/FormatVariadic.h index 408c6d8b2e0d2..8c08a7d9488f6 100644 --- a/include/llvm/Support/FormatVariadic.h +++ b/include/llvm/Support/FormatVariadic.h @@ -230,9 +230,8 @@ template class formatv_object : public formatv_object_base { // For a given parameter of type T, the following steps are executed in order // until a match is found: // -// 1. If the parameter is of class type, and contains a method -// void format(raw_ostream &Stream, StringRef Options) -// Then this method is invoked to produce the formatted output. The +// 1. If the parameter is of class type, and inherits from format_adapter, +// Then format() is invoked on it to produce the formatted output. The // implementation should write the formatted text into `Stream`. // 2. If there is a suitable template specialization of format_provider<> // for type T containing a method whose signature is: @@ -259,6 +258,13 @@ inline auto formatv(const char *Fmt, Ts &&... Vals) -> formatv_object(Vals))...)); } +// Allow a formatv_object to be formatted (no options supported). +template struct format_provider> { + static void format(const formatv_object &V, raw_ostream &OS, StringRef) { + OS << V; + } +}; + } // end namespace llvm #endif // LLVM_SUPPORT_FORMATVARIADIC_H diff --git a/include/llvm/Support/FormatVariadicDetails.h b/include/llvm/Support/FormatVariadicDetails.h index b4a564ffc26c6..9b60462209dc7 100644 --- a/include/llvm/Support/FormatVariadicDetails.h +++ b/include/llvm/Support/FormatVariadicDetails.h @@ -31,7 +31,7 @@ template class provider_format_adapter : public format_adapter { T Item; public: - explicit provider_format_adapter(T &&Item) : Item(Item) {} + explicit provider_format_adapter(T &&Item) : Item(std::forward(Item)) {} void format(llvm::raw_ostream &S, StringRef Options) override { format_provider::type>::format(Item, S, Options); diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h index 066a61e1ec2ff..635c87a106f0b 100644 --- a/include/llvm/Support/GenericDomTree.h +++ b/include/llvm/Support/GenericDomTree.h @@ -522,7 +522,9 @@ class DominatorTreeBase { /// /// Batch updates should be generally faster when performing longer sequences /// of updates than calling insertEdge/deleteEdge manually multiple times, as - /// they can reorder the updates and remove redundant ones internally. + /// it can reorder the updates and remove redundant ones internally. + /// The batch updater is also able to detect sequences of zero and exactly one + /// update -- it's optimized to do less work in these cases. /// /// Note that for postdominators it automatically takes care of applying /// updates on reverse edges internally (so there's no need to swap the @@ -637,11 +639,12 @@ class DominatorTreeBase { assert(Node && "Removing node that isn't in dominator tree."); assert(Node->getChildren().empty() && "Node is not a leaf node."); + DFSInfoValid = false; + // Remove node from immediate dominator's children list. DomTreeNodeBase *IDom = Node->getIDom(); if (IDom) { - typename std::vector *>::iterator I = - find(IDom->Children, Node); + const auto I = find(IDom->Children, Node); assert(I != IDom->Children.end() && "Not in immediate dominator children set!"); // I am no longer your child... @@ -702,28 +705,25 @@ class DominatorTreeBase { return; } - unsigned DFSNum = 0; - SmallVector *, typename DomTreeNodeBase::const_iterator>, 32> WorkStack; const DomTreeNodeBase *ThisRoot = getRootNode(); - + assert((!Parent || ThisRoot) && "Empty constructed DomTree"); if (!ThisRoot) return; - // Even in the case of multiple exits that form the post dominator root - // nodes, do not iterate over all exits, but start from the virtual root - // node. Otherwise bbs, that are not post dominated by any exit but by the - // virtual root node, will never be assigned a DFS number. - WorkStack.push_back(std::make_pair(ThisRoot, ThisRoot->begin())); + // Both dominators and postdominators have a single root node. In the case + // case of PostDominatorTree, this node is a virtual root. + WorkStack.push_back({ThisRoot, ThisRoot->begin()}); + + unsigned DFSNum = 0; ThisRoot->DFSNumIn = DFSNum++; while (!WorkStack.empty()) { const DomTreeNodeBase *Node = WorkStack.back().first; - typename DomTreeNodeBase::const_iterator ChildIt = - WorkStack.back().second; + const auto ChildIt = WorkStack.back().second; // If we visited all of the children of this node, "recurse" back up the // stack setting the DFOutNum. @@ -735,7 +735,7 @@ class DominatorTreeBase { const DomTreeNodeBase *Child = *ChildIt; ++WorkStack.back().second; - WorkStack.push_back(std::make_pair(Child, Child->begin())); + WorkStack.push_back({Child, Child->begin()}); Child->DFSNumIn = DFSNum++; } } diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h index b0a1ffa31251f..8f801662d0fb6 100644 --- a/include/llvm/Support/GenericDomTreeConstruction.h +++ b/include/llvm/Support/GenericDomTreeConstruction.h @@ -1122,6 +1122,22 @@ struct SemiNCAInfo { //~~ static void ApplyUpdates(DomTreeT &DT, ArrayRef Updates) { + const size_t NumUpdates = Updates.size(); + if (NumUpdates == 0) + return; + + // Take the fast path for a single update and avoid running the batch update + // machinery. + if (NumUpdates == 1) { + const auto &Update = Updates.front(); + if (Update.getKind() == UpdateKind::Insert) + DT.insertEdge(Update.getFrom(), Update.getTo()); + else + DT.deleteEdge(Update.getFrom(), Update.getTo()); + + return; + } + BatchUpdateInfo BUI; LegalizeUpdates(Updates, BUI.Updates); @@ -1349,35 +1365,97 @@ struct SemiNCAInfo { return true; } - // Checks if for every edge From -> To in the graph - // NCD(From, To) == IDom(To) or To. - bool verifyNCD(const DomTreeT &DT) { - clear(); - doFullDFSWalk(DT, AlwaysDescend); + // Check if the computed DFS numbers are correct. Note that DFS info may not + // be valid, and when that is the case, we don't verify the numbers. + static bool VerifyDFSNumbers(const DomTreeT &DT) { + if (!DT.DFSInfoValid || !DT.Parent) + return true; - for (auto &BlockToInfo : NodeToInfo) { - auto &Info = BlockToInfo.second; + const NodePtr RootBB = IsPostDom ? nullptr : DT.getRoots()[0]; + const TreeNodePtr Root = DT.getNode(RootBB); - const NodePtr From = NumToNode[Info.Parent]; - if (!From) continue; + auto PrintNodeAndDFSNums = [](const TreeNodePtr TN) { + errs() << BlockNamePrinter(TN) << " {" << TN->getDFSNumIn() << ", " + << TN->getDFSNumOut() << '}'; + }; - const NodePtr To = BlockToInfo.first; - const TreeNodePtr ToTN = DT.getNode(To); - assert(ToTN); - - const NodePtr NCD = DT.findNearestCommonDominator(From, To); - const TreeNodePtr NCDTN = DT.getNode(NCD); - const TreeNodePtr ToIDom = ToTN->getIDom(); - if (NCDTN != ToTN && NCDTN != ToIDom) { - errs() << "NearestCommonDominator verification failed:\n\tNCD(From:" - << BlockNamePrinter(From) << ", To:" << BlockNamePrinter(To) - << ") = " << BlockNamePrinter(NCD) - << ",\t (should be To or IDom[To]: " << BlockNamePrinter(ToIDom) - << ")\n"; + // Verify the root's DFS In number. Although DFS numbering would also work + // if we started from some other value, we assume 0-based numbering. + if (Root->getDFSNumIn() != 0) { + errs() << "DFSIn number for the tree root is not:\n\t"; + PrintNodeAndDFSNums(Root); + errs() << '\n'; + errs().flush(); + return false; + } + + // For each tree node verify if children's DFS numbers cover their parent's + // DFS numbers with no gaps. + for (const auto &NodeToTN : DT.DomTreeNodes) { + const TreeNodePtr Node = NodeToTN.second.get(); + + // Handle tree leaves. + if (Node->getChildren().empty()) { + if (Node->getDFSNumIn() + 1 != Node->getDFSNumOut()) { + errs() << "Tree leaf should have DFSOut = DFSIn + 1:\n\t"; + PrintNodeAndDFSNums(Node); + errs() << '\n'; + errs().flush(); + return false; + } + + continue; + } + + // Make a copy and sort it such that it is possible to check if there are + // no gaps between DFS numbers of adjacent children. + SmallVector Children(Node->begin(), Node->end()); + std::sort(Children.begin(), Children.end(), + [](const TreeNodePtr Ch1, const TreeNodePtr Ch2) { + return Ch1->getDFSNumIn() < Ch2->getDFSNumIn(); + }); + + auto PrintChildrenError = [Node, &Children, PrintNodeAndDFSNums]( + const TreeNodePtr FirstCh, const TreeNodePtr SecondCh) { + assert(FirstCh); + + errs() << "Incorrect DFS numbers for:\n\tParent "; + PrintNodeAndDFSNums(Node); + + errs() << "\n\tChild "; + PrintNodeAndDFSNums(FirstCh); + + if (SecondCh) { + errs() << "\n\tSecond child "; + PrintNodeAndDFSNums(SecondCh); + } + + errs() << "\nAll children: "; + for (const TreeNodePtr Ch : Children) { + PrintNodeAndDFSNums(Ch); + errs() << ", "; + } + + errs() << '\n'; errs().flush(); + }; + + if (Children.front()->getDFSNumIn() != Node->getDFSNumIn() + 1) { + PrintChildrenError(Children.front(), nullptr); + return false; + } + if (Children.back()->getDFSNumOut() + 1 != Node->getDFSNumOut()) { + PrintChildrenError(Children.back(), nullptr); return false; } + + for (size_t i = 0, e = Children.size() - 1; i != e; ++i) { + if (Children[i]->getDFSNumOut() + 1 != Children[i + 1]->getDFSNumIn()) { + PrintChildrenError(Children[i], Children[i + 1]); + return false; + } + } } return true; @@ -1520,8 +1598,8 @@ template bool Verify(const DomTreeT &DT) { SemiNCAInfo SNCA(nullptr); return SNCA.verifyRoots(DT) && SNCA.verifyReachability(DT) && - SNCA.VerifyLevels(DT) && SNCA.verifyNCD(DT) && - SNCA.verifyParentProperty(DT) && SNCA.verifySiblingProperty(DT); + SNCA.VerifyLevels(DT) && SNCA.verifyParentProperty(DT) && + SNCA.verifySiblingProperty(DT) && SNCA.VerifyDFSNumbers(DT); } } // namespace DomTreeBuilder diff --git a/include/llvm/Support/LockFileManager.h b/include/llvm/Support/LockFileManager.h index 13d252425b93a..f14ac1cee94f6 100644 --- a/include/llvm/Support/LockFileManager.h +++ b/include/llvm/Support/LockFileManager.h @@ -88,7 +88,7 @@ class LockFileManager { std::string getErrorMessage() const; /// \brief Set error and error message - void setError(std::error_code &EC, StringRef ErrorMsg = "") { + void setError(const std::error_code &EC, StringRef ErrorMsg = "") { Error = EC; ErrorDiagMsg = ErrorMsg.str(); } diff --git a/include/llvm/Support/Printable.h b/include/llvm/Support/Printable.h index 28e875e8ff5e8..cb55d41316e3f 100644 --- a/include/llvm/Support/Printable.h +++ b/include/llvm/Support/Printable.h @@ -42,7 +42,7 @@ class Printable { : Print(std::move(Print)) {} }; -static inline raw_ostream &operator<<(raw_ostream &OS, const Printable &P) { +inline raw_ostream &operator<<(raw_ostream &OS, const Printable &P) { P.Print(OS); return OS; } diff --git a/include/llvm/Support/Process.h b/include/llvm/Support/Process.h index 780c7e2ddd6f7..82b0d9f6ba280 100644 --- a/include/llvm/Support/Process.h +++ b/include/llvm/Support/Process.h @@ -80,9 +80,15 @@ class Process { /// This function searches for an existing file in the list of directories /// in a PATH like environment variable, and returns the first file found, /// according to the order of the entries in the PATH like environment - /// variable. - static Optional FindInEnvPath(const std::string& EnvName, - const std::string& FileName); + /// variable. If an ignore list is specified, then any folder which is in + /// the PATH like environment variable but is also in IgnoreList is not + /// considered. + static Optional FindInEnvPath(StringRef EnvName, + StringRef FileName, + ArrayRef IgnoreList); + + static Optional FindInEnvPath(StringRef EnvName, + StringRef FileName); /// This function returns a SmallVector containing the arguments passed from /// the operating system to the program. This function expects to be handed diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h index 910174732994c..cfbdbc7516178 100644 --- a/include/llvm/Support/ScaledNumber.h +++ b/include/llvm/Support/ScaledNumber.h @@ -504,13 +504,13 @@ template class ScaledNumber : ScaledNumberBase { static_assert(Width <= 64, "invalid integer width for digits"); private: - DigitsType Digits; - int16_t Scale; + DigitsType Digits = 0; + int16_t Scale = 0; public: - ScaledNumber() : Digits(0), Scale(0) {} + ScaledNumber() = default; - ScaledNumber(DigitsType Digits, int16_t Scale) + constexpr ScaledNumber(DigitsType Digits, int16_t Scale) : Digits(Digits), Scale(Scale) {} private: diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h index 399f8dcd76fca..c08bf858760a1 100644 --- a/include/llvm/Support/SourceMgr.h +++ b/include/llvm/Support/SourceMgr.h @@ -43,7 +43,8 @@ class SourceMgr { enum DiagKind { DK_Error, DK_Warning, - DK_Note + DK_Remark, + DK_Note, }; /// Clients that want to handle their own diagnostics in a custom way can diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h index 9106e0856b11e..21913d5f01e3a 100644 --- a/include/llvm/Support/TargetRegistry.h +++ b/include/llvm/Support/TargetRegistry.h @@ -67,15 +67,21 @@ MCStreamer *createAsmStreamer(MCContext &Ctx, MCAsmBackend *TAB, bool ShowInst); /// Takes ownership of \p TAB and \p CE. -MCStreamer *createELFStreamer(MCContext &Ctx, MCAsmBackend &TAB, - raw_pwrite_stream &OS, MCCodeEmitter *CE, +MCStreamer *createELFStreamer(MCContext &Ctx, + std::unique_ptr &&TAB, + raw_pwrite_stream &OS, + std::unique_ptr &&CE, bool RelaxAll); -MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB, - raw_pwrite_stream &OS, MCCodeEmitter *CE, +MCStreamer *createMachOStreamer(MCContext &Ctx, + std::unique_ptr &&TAB, + raw_pwrite_stream &OS, + std::unique_ptr &&CE, bool RelaxAll, bool DWARFMustBeAtTheEnd, bool LabelSections = false); -MCStreamer *createWasmStreamer(MCContext &Ctx, MCAsmBackend &TAB, - raw_pwrite_stream &OS, MCCodeEmitter *CE, +MCStreamer *createWasmStreamer(MCContext &Ctx, + std::unique_ptr &&TAB, + raw_pwrite_stream &OS, + std::unique_ptr &&CE, bool RelaxAll); MCRelocationInfo *createMCRelocationInfo(const Triple &TT, MCContext &Ctx); @@ -134,26 +140,26 @@ class Target { using MCCodeEmitterCtorTy = MCCodeEmitter *(*)(const MCInstrInfo &II, const MCRegisterInfo &MRI, MCContext &Ctx); - using ELFStreamerCtorTy = MCStreamer *(*)(const Triple &T, MCContext &Ctx, - MCAsmBackend &TAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll); - using MachOStreamerCtorTy = MCStreamer *(*)(MCContext &Ctx, MCAsmBackend &TAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, - bool DWARFMustBeAtTheEnd); - using COFFStreamerCtorTy = MCStreamer *(*)(MCContext &Ctx, MCAsmBackend &TAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, - bool IncrementalLinkerCompatible); - using WasmStreamerCtorTy = MCStreamer *(*)(const Triple &T, MCContext &Ctx, - MCAsmBackend &TAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll); + using ELFStreamerCtorTy = + MCStreamer *(*)(const Triple &T, MCContext &Ctx, + std::unique_ptr &&TAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, bool RelaxAll); + using MachOStreamerCtorTy = + MCStreamer *(*)(MCContext &Ctx, std::unique_ptr &&TAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, bool RelaxAll, + bool DWARFMustBeAtTheEnd); + using COFFStreamerCtorTy = + MCStreamer *(*)(MCContext &Ctx, std::unique_ptr &&TAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible); + using WasmStreamerCtorTy = + MCStreamer *(*)(const Triple &T, MCContext &Ctx, + std::unique_ptr &&TAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, bool RelaxAll); using NullTargetStreamerCtorTy = MCTargetStreamer *(*)(MCStreamer &S); using AsmTargetStreamerCtorTy = MCTargetStreamer *(*)( MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, @@ -435,8 +441,9 @@ class Target { /// \param Emitter The target independent assembler object.Takes ownership. /// \param RelaxAll Relax all fixups? MCStreamer *createMCObjectStreamer(const Triple &T, MCContext &Ctx, - MCAsmBackend &TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, + std::unique_ptr &&TAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, const MCSubtargetInfo &STI, bool RelaxAll, bool IncrementalLinkerCompatible, bool DWARFMustBeAtTheEnd) const { @@ -446,28 +453,32 @@ class Target { llvm_unreachable("Unknown object format"); case Triple::COFF: assert(T.isOSWindows() && "only Windows COFF is supported"); - S = COFFStreamerCtorFn(Ctx, TAB, OS, Emitter, RelaxAll, - IncrementalLinkerCompatible); + S = COFFStreamerCtorFn(Ctx, std::move(TAB), OS, std::move(Emitter), + RelaxAll, IncrementalLinkerCompatible); break; case Triple::MachO: if (MachOStreamerCtorFn) - S = MachOStreamerCtorFn(Ctx, TAB, OS, Emitter, RelaxAll, - DWARFMustBeAtTheEnd); + S = MachOStreamerCtorFn(Ctx, std::move(TAB), OS, std::move(Emitter), + RelaxAll, DWARFMustBeAtTheEnd); else - S = createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, - DWARFMustBeAtTheEnd); + S = createMachOStreamer(Ctx, std::move(TAB), OS, std::move(Emitter), + RelaxAll, DWARFMustBeAtTheEnd); break; case Triple::ELF: if (ELFStreamerCtorFn) - S = ELFStreamerCtorFn(T, Ctx, TAB, OS, Emitter, RelaxAll); + S = ELFStreamerCtorFn(T, Ctx, std::move(TAB), OS, std::move(Emitter), + RelaxAll); else - S = createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll); + S = createELFStreamer(Ctx, std::move(TAB), OS, std::move(Emitter), + RelaxAll); break; case Triple::Wasm: if (WasmStreamerCtorFn) - S = WasmStreamerCtorFn(T, Ctx, TAB, OS, Emitter, RelaxAll); + S = WasmStreamerCtorFn(T, Ctx, std::move(TAB), OS, std::move(Emitter), + RelaxAll); else - S = createWasmStreamer(Ctx, TAB, OS, Emitter, RelaxAll); + S = createWasmStreamer(Ctx, std::move(TAB), OS, std::move(Emitter), + RelaxAll); break; } if (ObjectTargetStreamerCtorFn) diff --git a/include/llvm/Support/ThreadPool.h b/include/llvm/Support/ThreadPool.h index 9ada946c6dae3..fb82559005100 100644 --- a/include/llvm/Support/ThreadPool.h +++ b/include/llvm/Support/ThreadPool.h @@ -38,8 +38,8 @@ class ThreadPool { using TaskTy = std::function; using PackagedTaskTy = std::packaged_task; - /// Construct a pool with the number of core available on the system (or - /// whatever the value returned by std::thread::hardware_concurrency() is). + /// Construct a pool with the number of threads found by + /// hardware_concurrency(). ThreadPool(); /// Construct a pool of \p ThreadCount threads diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h index 03963a24c107e..6d813bccb93fc 100644 --- a/include/llvm/Support/Threading.h +++ b/include/llvm/Support/Threading.h @@ -131,6 +131,14 @@ void llvm_execute_on_thread(void (*UserFn)(void *), void *UserData, /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF unsigned heavyweight_hardware_concurrency(); + /// Get the number of threads that the current program can execute + /// concurrently. On some systems std::thread::hardware_concurrency() returns + /// the total number of cores, without taking affinity into consideration. + /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF. + /// Fallback to std::thread::hardware_concurrency() if sched_getaffinity is + /// not available. + unsigned hardware_concurrency(); + /// \brief Return the current thread id, as used in various OS system calls. /// Note that not all platforms guarantee that the value returned will be /// unique across the entire system, so portable code should not assume diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h index 549da3ccad51f..626507947a784 100644 --- a/include/llvm/Support/YAMLParser.h +++ b/include/llvm/Support/YAMLParser.h @@ -572,13 +572,15 @@ class document_iterator { document_iterator() = default; document_iterator(std::unique_ptr &D) : Doc(&D) {} - bool operator==(const document_iterator &Other) { + bool operator==(const document_iterator &Other) const { if (isAtEnd() || Other.isAtEnd()) return isAtEnd() && Other.isAtEnd(); return Doc == Other.Doc; } - bool operator!=(const document_iterator &Other) { return !(*this == Other); } + bool operator!=(const document_iterator &Other) const { + return !(*this == Other); + } document_iterator operator++() { assert(Doc && "incrementing iterator past the end."); diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h index 565833c95b70c..d11f5a837796a 100644 --- a/include/llvm/Support/raw_ostream.h +++ b/include/llvm/Support/raw_ostream.h @@ -362,9 +362,7 @@ class raw_fd_ostream : public raw_pwrite_stream { int FD; bool ShouldClose; - /// Error This flag is true if an error of any kind has been detected. - /// - bool Error; + std::error_code EC; uint64_t pos; @@ -383,7 +381,7 @@ class raw_fd_ostream : public raw_pwrite_stream { size_t preferred_buffer_size() const override; /// Set the flag indicating that an output error has been encountered. - void error_detected() { Error = true; } + void error_detected(std::error_code EC) { this->EC = EC; } public: /// Open the specified file for writing. If an error occurs, information @@ -424,13 +422,13 @@ class raw_fd_ostream : public raw_pwrite_stream { bool has_colors() const override; + std::error_code error() const { return EC; } + /// Return the value of the flag in this raw_fd_ostream indicating whether an /// output error has been encountered. /// This doesn't implicitly flush any pending output. Also, it doesn't /// guarantee to detect all errors unless the stream has been closed. - bool has_error() const { - return Error; - } + bool has_error() const { return bool(EC); } /// Set the flag read by has_error() to false. If the error flag is set at the /// time when this raw_ostream's destructor is called, report_fatal_error is @@ -441,9 +439,7 @@ class raw_fd_ostream : public raw_pwrite_stream { /// Unless explicitly silenced." /// - from The Zen of Python, by Tim Peters /// - void clear_error() { - Error = false; - } + void clear_error() { EC = std::error_code(); } }; /// This returns a reference to a raw_ostream for standard output. Use it like: diff --git a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index f6da58ba79630..c012b20fd7b28 100644 --- a/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -23,6 +23,11 @@ class GINodeEquiv { Instruction I = i; SDNode Node = node; + + // SelectionDAG has separate nodes for atomic and non-atomic memory operations + // (ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE) but GlobalISel + // stores this information in the MachineMemoryOperand. + bit CheckMMOIsNonAtomic = 0; } // These are defined in the same order as the G_* instructions. @@ -34,7 +39,7 @@ def : GINodeEquiv; // G_INTTOPTR - SelectionDAG has no equivalent. // G_PTRTOINT - SelectionDAG has no equivalent. def : GINodeEquiv; -// G_FCONSTANT - Not needed since constants aren't operators. +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; @@ -72,6 +77,23 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +// Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some +// complications that tablegen must take care of. For example, Predicates such +// as isSignExtLoad require that this is not a perfect 1:1 mapping since a +// sign-extending load is (G_SEXT (G_LOAD x)) in GlobalISel. Additionally, +// G_LOAD handles both atomic and non-atomic loads where as SelectionDAG had +// separate nodes for them. This GINodeEquiv maps the non-atomic loads to +// G_LOAD with a non-atomic MachineMemOperand. +def : GINodeEquiv { let CheckMMOIsNonAtomic = 1; } +// Broadly speaking G_STORE is equivalent to ISD::STORE but there are some +// complications that tablegen must take care of. For example, predicates such +// as isTruncStore require that this is not a perfect 1:1 mapping since a +// truncating store is (G_STORE (G_TRUNCATE x)) in GlobalISel. Additionally, +// G_STORE handles both atomic and non-atomic stores where as SelectionDAG had +// separate nodes for them. This GINodeEquiv maps the non-atomic stores to +// G_STORE with a non-atomic MachineMemOperand. +def : GINodeEquiv { let CheckMMOIsNonAtomic = 1; } + // Specifies the GlobalISel equivalents for SelectionDAG's ComplexPattern. // Should be used on defs that subclass GIComplexOperandMatcher<>. class GIComplexPatternEquiv { diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 1cf9040384262..048bd1f2a0cca 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -269,6 +269,21 @@ class RegisterClass regTypes, int alignment, // useful as it is sometimes beneficial to assign registers to highly // constrained classes first. The value has to be in the range [0,63]. int AllocationPriority = 0; + + // The diagnostic type to present when referencing this operand in a match + // failure error message. If this is empty, the default Match_InvalidOperand + // diagnostic type will be used. If this is "", a Match_ enum + // value will be generated and used for this operand type. The target + // assembly parser is responsible for converting this into a user-facing + // diagnostic message. + string DiagnosticType = ""; + + // A diagnostic message to emit when an invalid value is provided for this + // register class when it is being used an an assembly operand. If this is + // non-empty, an anonymous diagnostic type enum value will be generated, and + // the assembly matcher will provide a function to map from diagnostic types + // to message strings. + string DiagnosticString = ""; } // The memberList in a RegisterClass is a dag of set operations. TableGen @@ -677,6 +692,10 @@ class AsmOperandClass { // diagnostic. The target AsmParser maps these codes to text. string DiagnosticType = ""; + /// A diagnostic message to emit when an invalid value is provided for this + /// operand. + string DiagnosticString = ""; + /// Set to 1 if this operand is optional and not always required. Typically, /// the AsmParser will emit an error when it finishes parsing an /// instruction if it hasn't matched all the operands yet. However, this @@ -749,6 +768,12 @@ class RegisterOperand AsmOperandClass ParserMatchClass; string OperandType = "OPERAND_REGISTER"; + + // When referenced in the result of a CodeGen pattern, GlobalISel will + // normally copy the matched operand to the result. When this is set, it will + // emit a special copy that will replace zero-immediates with the specified + // zero-register. + Register GIZeroRegister = ?; } let OperandType = "OPERAND_IMMEDIATE" in { @@ -1126,6 +1151,14 @@ class AsmParser { // HasMnemonicFirst - Set to false if target instructions don't always // start with a mnemonic as the first token. bit HasMnemonicFirst = 1; + + // ReportMultipleNearMisses - + // When 0, the assembly matcher reports an error for one encoding or operand + // that did not match the parsed instruction. + // When 1, the assmebly matcher returns a list of encodings that were close + // to matching the parsed instruction, so to allow more detailed error + // messages. + bit ReportMultipleNearMisses = 0; } def DefaultAsmParser : AsmParser; diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 8e7e6a7fbfd7d..5d230d820dbf2 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -1646,7 +1646,8 @@ class TargetInstrInfo : public MCInstrInfo { /// A function \p MF is considered safe for outlining if an outlined function /// produced from instructions in F will produce a program which produces the /// same output for any set of given inputs. - virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const { + virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const { llvm_unreachable("Target didn't implement " "TargetInstrInfo::isFunctionSafeToOutlineFrom!"); } diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index ea3e3f7b04988..c1d0b32f7d75f 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1993,7 +1993,8 @@ class TargetLoweringBase { bool isExtFree(const Instruction *I) const { switch (I->getOpcode()) { case Instruction::FPExt: - if (isFPExtFree(EVT::getEVT(I->getType()))) + if (isFPExtFree(EVT::getEVT(I->getType()), + EVT::getEVT(I->getOperand(0)->getType()))) return true; break; case Instruction::ZExt: @@ -2120,11 +2121,21 @@ class TargetLoweringBase { /// Return true if an fpext operation is free (for instance, because /// single-precision floating-point numbers are implicitly extended to /// double-precision). - virtual bool isFPExtFree(EVT VT) const { - assert(VT.isFloatingPoint()); + virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { + assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && + "invalid fpext types"); return false; } + /// Return true if an fpext operation input to an \p Opcode operation is free + /// (for instance, because half-precision floating-point numbers are + /// implicitly extended to float-precision) for an FMA instruction. + virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const { + assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && + "invalid fpext types"); + return isFPExtFree(DestVT, SrcVT); + } + /// Return true if folding a vector load into ExtVal (a sign, zero, or any /// extend node) is profitable. virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } @@ -2654,7 +2665,7 @@ class TargetLowering : public TargetLoweringBase { bool AssumeSingleUse = false) const; /// Helper wrapper around SimplifyDemandedBits - bool SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, DAGCombinerInfo &DCI) const; /// Determine which of the bits specified in Mask are known to be either zero @@ -2756,18 +2767,6 @@ class TargetLowering : public TargetLoweringBase { return true; } - // Return true if it is profitable to combine a BUILD_VECTOR to a TRUNCATE. - // Example of such a combine: - // v4i32 build_vector((extract_elt V, 0), - // (extract_elt V, 2), - // (extract_elt V, 4), - // (extract_elt V, 6)) - // --> - // v4i32 truncate (bitcast V to v4i64) - virtual bool isDesirableToCombineBuildVectorToTruncate() const { - return false; - } - // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern // to a shuffle and a truncate. // Example of such a combine: @@ -2907,7 +2906,7 @@ class TargetLowering : public TargetLoweringBase { RetTy = ResultType; Callee = Target; CallConv = CC; - NumFixedArgs = Args.size(); + NumFixedArgs = ArgsList.size(); Args = std::move(ArgsList); DAG.getTargetLoweringInfo().markLibCallAttributes( @@ -2920,7 +2919,7 @@ class TargetLowering : public TargetLoweringBase { RetTy = ResultType; Callee = Target; CallConv = CC; - NumFixedArgs = Args.size(); + NumFixedArgs = ArgsList.size(); Args = std::move(ArgsList); return *this; } diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h index 33df133a4d582..ca01a3acac6ef 100644 --- a/include/llvm/Target/TargetOpcodes.h +++ b/include/llvm/Target/TargetOpcodes.h @@ -28,13 +28,13 @@ enum { /// Check whether the given Opcode is a generic opcode that is not supposed /// to appear after ISel. -static inline bool isPreISelGenericOpcode(unsigned Opcode) { +inline bool isPreISelGenericOpcode(unsigned Opcode) { return Opcode >= TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START && Opcode <= TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END; } /// Check whether the given Opcode is a target-specific opcode. -static inline bool isTargetSpecificOpcode(unsigned Opcode) { +inline bool isTargetSpecificOpcode(unsigned Opcode) { return Opcode > TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END; } } // end namespace llvm diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index afa6a89a890e4..b2f6f991ae574 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -40,6 +40,7 @@ class MachineFunction; class MachineInstr; class RegScavenger; class VirtRegMap; +class LiveIntervals; class TargetRegisterClass { public: @@ -959,7 +960,8 @@ class TargetRegisterInfo : public MCRegisterInfo { unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { return true; } //===--------------------------------------------------------------------===// diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index 0db58ba7a644c..511b7655e5012 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -132,7 +132,7 @@ def SDTFPSignOp : SDTypeProfile<1, 2, [ // fcopysign. def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0> ]>; -def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // ctlz +def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz SDTCisSameAs<0, 1>, SDTCisInt<0> ]>; def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext @@ -649,6 +649,39 @@ class PatFrag(N)->getAddressingMode() == ISD::UNINDEXED; + // cast(N)->getAddressingMode() == ISD::UNINDEXED; + bit IsUnindexed = ?; + + // cast(N)->getExtensionType() != ISD::NON_EXTLOAD + bit IsNonExtLoad = ?; + // cast(N)->getExtensionType() == ISD::EXTLOAD; + bit IsAnyExtLoad = ?; + // cast(N)->getExtensionType() == ISD::SEXTLOAD; + bit IsSignExtLoad = ?; + // cast(N)->getExtensionType() == ISD::ZEXTLOAD; + bit IsZeroExtLoad = ?; + // !cast(N)->isTruncatingStore(); + // cast(N)->isTruncatingStore(); + bit IsTruncStore = ?; + + // cast(N)->getMemoryVT() == MVT::; + // cast(N)->getMemoryVT() == MVT::; + ValueType MemoryVT = ?; + // cast(N)->getMemoryVT().getScalarType() == MVT::; + // cast(N)->getMemoryVT().getScalarType() == MVT::; + ValueType ScalarMemoryVT = ?; } // OutPatFrag is a pattern fragment that is used as part of an output pattern @@ -676,12 +709,41 @@ class PatLeaf // If FastIsel should ignore all instructions that have an operand of this type, // the FastIselShouldIgnore flag can be set. This is an optimization to reduce // the code size of the generated fast instruction selector. -class ImmLeaf - : PatFrag<(ops), (vt imm), [{}], xform> { +class ImmLeaf + : PatFrag<(ops), (vt ImmNode), [{}], xform> { let ImmediateCode = pred; bit FastIselShouldIgnore = 0; + + // Is the data type of the immediate an APInt? + bit IsAPInt = 0; + + // Is the data type of the immediate an APFloat? + bit IsAPFloat = 0; +} + +// An ImmLeaf except that Imm is an APInt. This is useful when you need to +// zero-extend the immediate instead of sign-extend it. +// +// Note that FastISel does not currently understand IntImmLeaf and will not +// generate code for rules that make use of it. As such, it does not make sense +// to replace ImmLeaf with IntImmLeaf. However, replacing PatLeaf with an +// IntImmLeaf will allow GlobalISel to import the rule. +class IntImmLeaf + : ImmLeaf { + let IsAPInt = 1; + let FastIselShouldIgnore = 1; } +// An ImmLeaf except that Imm is an APFloat. +// +// Note that FastISel does not currently understand FPImmLeaf and will not +// generate code for rules that make use of it. +class FPImmLeaf + : ImmLeaf { + let IsAPFloat = 1; + let FastIselShouldIgnore = 1; +} // Leaf fragments. @@ -710,170 +772,215 @@ def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>; def null_frag : SDPatternOperator; // load fragments. -def unindexedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{ - return cast(N)->getAddressingMode() == ISD::UNINDEXED; -}]>; -def load : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ - return cast(N)->getExtensionType() == ISD::NON_EXTLOAD; -}]>; +def unindexedload : PatFrag<(ops node:$ptr), (ld node:$ptr)> { + let IsLoad = 1; + let IsUnindexed = 1; +} +def load : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { + let IsLoad = 1; + let IsNonExtLoad = 1; +} // extending load fragments. -def extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ - return cast(N)->getExtensionType() == ISD::EXTLOAD; -}]>; -def sextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ - return cast(N)->getExtensionType() == ISD::SEXTLOAD; -}]>; -def zextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ - return cast(N)->getExtensionType() == ISD::ZEXTLOAD; -}]>; +def extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { + let IsLoad = 1; + let IsAnyExtLoad = 1; +} +def sextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { + let IsLoad = 1; + let IsSignExtLoad = 1; +} +def zextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { + let IsLoad = 1; + let IsZeroExtLoad = 1; +} -def extloadi1 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i1; -}]>; -def extloadi8 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; -def extloadi16 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; -def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; -def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::f32; -}]>; -def extloadf64 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::f64; -}]>; +def extloadi1 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i1; +} +def extloadi8 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i8; +} +def extloadi16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i16; +} +def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i32; +} +def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = f32; +} +def extloadf64 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = f64; +} -def sextloadi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i1; -}]>; -def sextloadi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; -def sextloadi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; -def sextloadi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; +def sextloadi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i1; +} +def sextloadi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i8; +} +def sextloadi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i16; +} +def sextloadi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i32; +} -def zextloadi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i1; -}]>; -def zextloadi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; -def zextloadi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; -def zextloadi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; +def zextloadi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i1; +} +def zextloadi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i8; +} +def zextloadi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i16; +} +def zextloadi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { + let IsLoad = 1; + let MemoryVT = i32; +} -def extloadvi1 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i1; -}]>; -def extloadvi8 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; -def extloadvi16 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i16; -}]>; -def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i32; -}]>; -def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::f32; -}]>; -def extloadvf64 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::f64; -}]>; +def extloadvi1 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i1; +} +def extloadvi8 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i8; +} +def extloadvi16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i16; +} +def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i32; +} +def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = f32; +} +def extloadvf64 : PatFrag<(ops node:$ptr), (extload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = f64; +} -def sextloadvi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i1; -}]>; -def sextloadvi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; -def sextloadvi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i16; -}]>; -def sextloadvi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i32; -}]>; +def sextloadvi1 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i1; +} +def sextloadvi8 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i8; +} +def sextloadvi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i16; +} +def sextloadvi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i32; +} -def zextloadvi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i1; -}]>; -def zextloadvi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; -def zextloadvi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i16; -}]>; -def zextloadvi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i32; -}]>; +def zextloadvi1 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i1; +} +def zextloadvi8 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i8; +} +def zextloadvi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i16; +} +def zextloadvi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { + let IsLoad = 1; + let ScalarMemoryVT = i32; +} // store fragments. def unindexedstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - return cast(N)->getAddressingMode() == ISD::UNINDEXED; -}]>; + (st node:$val, node:$ptr)> { + let IsStore = 1; + let IsUnindexed = 1; +} def store : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore node:$val, node:$ptr), [{ - return !cast(N)->isTruncatingStore(); -}]>; + (unindexedstore node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 0; +} // truncstore fragments. def truncstore : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore node:$val, node:$ptr), [{ - return cast(N)->isTruncatingStore(); -}]>; + (unindexedstore node:$val, node:$ptr)> { + let IsStore = 1; + let IsTruncStore = 1; +} def truncstorei8 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i8; +} def truncstorei16 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i16; +} def truncstorei32 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = i32; +} def truncstoref32 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::f32; -}]>; + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = f32; +} def truncstoref64 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::f64; -}]>; + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let MemoryVT = f64; +} def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i8; -}]>; + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let ScalarMemoryVT = i8; +} def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i16; -}]>; + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let ScalarMemoryVT = i16; +} def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT().getScalarType() == MVT::i32; -}]>; + (truncstore node:$val, node:$ptr)> { + let IsStore = 1; + let ScalarMemoryVT = i32; +} // indexed store fragments. def istore : PatFrag<(ops node:$val, node:$base, node:$offset), - (ist node:$val, node:$base, node:$offset), [{ - return !cast(N)->isTruncatingStore(); -}]>; + (ist node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let IsTruncStore = 0; +} def pre_store : PatFrag<(ops node:$val, node:$base, node:$offset), (istore node:$val, node:$base, node:$offset), [{ @@ -882,34 +989,40 @@ def pre_store : PatFrag<(ops node:$val, node:$base, node:$offset), }]>; def itruncstore : PatFrag<(ops node:$val, node:$base, node:$offset), - (ist node:$val, node:$base, node:$offset), [{ - return cast(N)->isTruncatingStore(); -}]>; + (ist node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let IsTruncStore = 1; +} def pre_truncst : PatFrag<(ops node:$val, node:$base, node:$offset), (itruncstore node:$val, node:$base, node:$offset), [{ ISD::MemIndexedMode AM = cast(N)->getAddressingMode(); return AM == ISD::PRE_INC || AM == ISD::PRE_DEC; }]>; def pre_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset), - (pre_truncst node:$val, node:$base, node:$offset), [{ - return cast(N)->getMemoryVT() == MVT::i1; -}]>; + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let MemoryVT = i1; +} def pre_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset), - (pre_truncst node:$val, node:$base, node:$offset), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let MemoryVT = i8; +} def pre_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset), - (pre_truncst node:$val, node:$base, node:$offset), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let MemoryVT = i16; +} def pre_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset), - (pre_truncst node:$val, node:$base, node:$offset), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let MemoryVT = i32; +} def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), - (pre_truncst node:$val, node:$base, node:$offset), [{ - return cast(N)->getMemoryVT() == MVT::f32; -}]>; + (pre_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let MemoryVT = f32; +} def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset), (istore node:$val, node:$ptr, node:$offset), [{ @@ -923,25 +1036,30 @@ def post_truncst : PatFrag<(ops node:$val, node:$base, node:$offset), return AM == ISD::POST_INC || AM == ISD::POST_DEC; }]>; def post_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset), - (post_truncst node:$val, node:$base, node:$offset), [{ - return cast(N)->getMemoryVT() == MVT::i1; -}]>; + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let MemoryVT = i1; +} def post_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset), - (post_truncst node:$val, node:$base, node:$offset), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let MemoryVT = i8; +} def post_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset), - (post_truncst node:$val, node:$base, node:$offset), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let MemoryVT = i16; +} def post_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset), - (post_truncst node:$val, node:$base, node:$offset), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let MemoryVT = i32; +} def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset), - (post_truncst node:$val, node:$base, node:$offset), [{ - return cast(N)->getMemoryVT() == MVT::f32; -}]>; + (post_truncst node:$val, node:$base, node:$offset)> { + let IsStore = 1; + let MemoryVT = f32; +} // nontemporal store fragments. def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index 0f42f39595eff..9d6b1b0fa209a 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -221,6 +221,11 @@ class TargetSubtargetInfo : public MCSubtargetInfo { /// a finer grain to tune the register allocator. virtual bool enableRALocalReassignment(CodeGenOpt::Level OptLevel) const; + /// \brief True if the subtarget should consider the cost of local intervals + /// created by a split candidate when choosing the best split candidate. This + /// heuristic may be compile time intensive. + virtual bool enableAdvancedRASplitCost() const; + /// \brief Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). virtual bool useAA() const; diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 39ceb19525b3c..ce20a726b7832 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -216,6 +216,10 @@ ModulePass *createMetaRenamerPass(); /// manager. ModulePass *createBarrierNoopPass(); +/// createCalledValuePropagationPass - Attach metadata to indirct call sites +/// indicating the set of functions they may target at run-time. +ModulePass *createCalledValuePropagationPass(); + /// What to do with the summary when running passes that operate on it. enum class PassSummaryAction { None, ///< Do nothing. diff --git a/include/llvm/Transforms/IPO/ArgumentPromotion.h b/include/llvm/Transforms/IPO/ArgumentPromotion.h index 724ff72f3b5a1..82ffc69a166ee 100644 --- a/include/llvm/Transforms/IPO/ArgumentPromotion.h +++ b/include/llvm/Transforms/IPO/ArgumentPromotion.h @@ -12,6 +12,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/IR/PassManager.h" namespace llvm { @@ -26,6 +27,6 @@ class ArgumentPromotionPass : public PassInfoMixin { LazyCallGraph &CG, CGSCCUpdateResult &UR); }; -} +} // end namespace llvm -#endif +#endif // LLVM_TRANSFORMS_IPO_ARGUMENTPROMOTION_H diff --git a/include/llvm/Transforms/IPO/CalledValuePropagation.h b/include/llvm/Transforms/IPO/CalledValuePropagation.h new file mode 100644 index 0000000000000..352bdc7ac17f1 --- /dev/null +++ b/include/llvm/Transforms/IPO/CalledValuePropagation.h @@ -0,0 +1,35 @@ +//===- CalledValuePropagation.h - Propagate called values -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a transformation that attaches !callees metadata to +// indirect call sites. For a given call site, the metadata, if present, +// indicates the set of functions the call site could possibly target at +// run-time. This metadata is added to indirect call sites when the set of +// possible targets can be determined by analysis and is known to be small. The +// analysis driving the transformation is similar to constant propagation and +// makes uses of the generic sparse propagation solver. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H +#define LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class CalledValuePropagationPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_CALLEDVALUEPROPAGATION_H diff --git a/include/llvm/Transforms/IPO/ConstantMerge.h b/include/llvm/Transforms/IPO/ConstantMerge.h index 1d4da43f6a7bb..e04d3ae1a40ed 100644 --- a/include/llvm/Transforms/IPO/ConstantMerge.h +++ b/include/llvm/Transforms/IPO/ConstantMerge.h @@ -20,16 +20,18 @@ #ifndef LLVM_TRANSFORMS_IPO_CONSTANTMERGE_H #define LLVM_TRANSFORMS_IPO_CONSTANTMERGE_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Module; + /// A pass that merges duplicate global constants into a single constant. class ConstantMergePass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_CONSTANTMERGE_H diff --git a/include/llvm/Transforms/IPO/DeadArgumentElimination.h b/include/llvm/Transforms/IPO/DeadArgumentElimination.h index e179afa956f6e..ba5666f20a9bf 100644 --- a/include/llvm/Transforms/IPO/DeadArgumentElimination.h +++ b/include/llvm/Transforms/IPO/DeadArgumentElimination.h @@ -20,15 +20,21 @@ #ifndef LLVM_TRANSFORMS_IPO_DEADARGUMENTELIMINATION_H #define LLVM_TRANSFORMS_IPO_DEADARGUMENTELIMINATION_H -#include "llvm/IR/Module.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" - #include #include #include +#include namespace llvm { +class Module; +class Use; +class Value; + /// Eliminate dead arguments (and return values) from functions. class DeadArgumentEliminationPass : public PassInfoMixin { @@ -37,12 +43,13 @@ class DeadArgumentEliminationPass /// argument. Used so that arguments and return values can be used /// interchangeably. struct RetOrArg { - RetOrArg(const Function *F, unsigned Idx, bool IsArg) - : F(F), Idx(Idx), IsArg(IsArg) {} const Function *F; unsigned Idx; bool IsArg; + RetOrArg(const Function *F, unsigned Idx, bool IsArg) + : F(F), Idx(Idx), IsArg(IsArg) {} + /// Make RetOrArg comparable, so we can put it into a map. bool operator<(const RetOrArg &O) const { return std::tie(F, Idx, IsArg) < std::tie(O.F, O.Idx, O.IsArg); @@ -67,16 +74,23 @@ class DeadArgumentEliminationPass /// thus become dead in the end. enum Liveness { Live, MaybeLive }; + DeadArgumentEliminationPass(bool ShouldHackArguments_ = false) + : ShouldHackArguments(ShouldHackArguments_) {} + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); + /// Convenience wrapper RetOrArg CreateRet(const Function *F, unsigned Idx) { return RetOrArg(F, Idx, false); } + /// Convenience wrapper RetOrArg CreateArg(const Function *F, unsigned Idx) { return RetOrArg(F, Idx, true); } - typedef std::multimap UseMap; + using UseMap = std::multimap; + /// This maps a return value or argument to any MaybeLive return values or /// arguments it uses. This allows the MaybeLive values to be marked live /// when any of its users is marked live. @@ -93,25 +107,21 @@ class DeadArgumentEliminationPass /// directly to F. UseMap Uses; - typedef std::set LiveSet; - typedef std::set LiveFuncSet; + using LiveSet = std::set; + using LiveFuncSet = std::set; /// This set contains all values that have been determined to be live. LiveSet LiveValues; + /// This set contains all values that are cannot be changed in any way. LiveFuncSet LiveFunctions; - typedef SmallVector UseVector; + using UseVector = SmallVector; /// This allows this pass to do double-duty as the dead arg hacking pass /// (used only by bugpoint). bool ShouldHackArguments = false; -public: - DeadArgumentEliminationPass(bool ShouldHackArguments_ = false) - : ShouldHackArguments(ShouldHackArguments_) {} - PreservedAnalyses run(Module &M, ModuleAnalysisManager &); - private: Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses, @@ -128,6 +138,7 @@ class DeadArgumentEliminationPass bool DeleteDeadVarargs(Function &Fn); bool RemoveDeadArgumentsFromCallers(Function &Fn); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_DEADARGUMENTELIMINATION_H diff --git a/include/llvm/Transforms/IPO/ElimAvailExtern.h b/include/llvm/Transforms/IPO/ElimAvailExtern.h index 88a0e9bd8ce0f..94cb954fd2d5a 100644 --- a/include/llvm/Transforms/IPO/ElimAvailExtern.h +++ b/include/llvm/Transforms/IPO/ElimAvailExtern.h @@ -15,17 +15,19 @@ #ifndef LLVM_TRANSFORMS_IPO_ELIMAVAILEXTERN_H #define LLVM_TRANSFORMS_IPO_ELIMAVAILEXTERN_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Module; + /// A pass that transforms external global definitions into declarations. class EliminateAvailableExternallyPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_ELIMAVAILEXTERN_H diff --git a/include/llvm/Transforms/IPO/FunctionAttrs.h b/include/llvm/Transforms/IPO/FunctionAttrs.h index 36dd06b85b417..dc9f18c794107 100644 --- a/include/llvm/Transforms/IPO/FunctionAttrs.h +++ b/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -1,4 +1,4 @@ -//===-- FunctionAttrs.h - Compute function attrs --------------------------===// +//===- FunctionAttrs.h - Compute function attributes ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,9 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// Provides passes for computing function attributes based on interprocedural /// analyses. +// //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H @@ -21,6 +23,9 @@ namespace llvm { class AAResults; +class Function; +class Module; +class Pass; /// The three kinds of memory access relevant to 'readonly' and /// 'readnone' attributes. @@ -66,6 +71,7 @@ class ReversePostOrderFunctionAttrsPass public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_FUNCTIONATTRS_H diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h index de35cdf052e1f..63c73af44e87f 100644 --- a/include/llvm/Transforms/IPO/FunctionImport.h +++ b/include/llvm/Transforms/IPO/FunctionImport.h @@ -7,23 +7,26 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_FUNCTIONIMPORT_H -#define LLVM_FUNCTIONIMPORT_H +#ifndef LLVM_TRANSFORMS_IPO_FUNCTIONIMPORT_H +#define LLVM_TRANSFORMS_IPO_FUNCTIONIMPORT_H +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Error.h" - #include #include +#include +#include +#include #include #include namespace llvm { -class LLVMContext; -class GlobalValueSummary; + class Module; /// The function importer is automatically importing function from other modules @@ -34,19 +37,19 @@ class FunctionImporter { /// containing all the functions to import for a source module. /// The keys is the GUID identifying a function to import, and the value /// is the threshold applied when deciding to import it. - typedef std::map FunctionsToImportTy; + using FunctionsToImportTy = std::map; /// The map contains an entry for every module to import from, the key being /// the module identifier to pass to the ModuleLoader. The value is the set of /// functions to import. - typedef StringMap ImportMapTy; + using ImportMapTy = StringMap; /// The set contains an entry for every global value the module exports. - typedef std::unordered_set ExportSetTy; + using ExportSetTy = std::unordered_set; /// A function of this type is used to load modules referenced by the index. - typedef std::function>(StringRef Identifier)> - ModuleLoaderTy; + using ModuleLoaderTy = + std::function>(StringRef Identifier)>; /// Create a Function Importer. FunctionImporter(const ModuleSummaryIndex &Index, ModuleLoaderTy ModuleLoader) @@ -132,6 +135,7 @@ void thinLTOResolveWeakForLinkerModule(Module &TheModule, /// during global summary-based analysis. void thinLTOInternalizeModule(Module &TheModule, const GVSummaryMapTy &DefinedGlobals); -} -#endif // LLVM_FUNCTIONIMPORT_H +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_FUNCTIONIMPORT_H diff --git a/include/llvm/Transforms/IPO/GlobalDCE.h b/include/llvm/Transforms/IPO/GlobalDCE.h index 9ca939c15b62e..7ca241f4645a9 100644 --- a/include/llvm/Transforms/IPO/GlobalDCE.h +++ b/include/llvm/Transforms/IPO/GlobalDCE.h @@ -35,7 +35,7 @@ class GlobalDCEPass : public PassInfoMixin { SmallPtrSet AliveGlobals; /// Global -> Global that uses this global. - std::unordered_multimap GVDependencies; + DenseMap> GVDependencies; /// Constant -> Globals that use this global cache. std::unordered_map> diff --git a/include/llvm/Transforms/IPO/GlobalOpt.h b/include/llvm/Transforms/IPO/GlobalOpt.h index ab9116810be1b..5b4878604eab1 100644 --- a/include/llvm/Transforms/IPO/GlobalOpt.h +++ b/include/llvm/Transforms/IPO/GlobalOpt.h @@ -16,17 +16,18 @@ #ifndef LLVM_TRANSFORMS_IPO_GLOBALOPT_H #define LLVM_TRANSFORMS_IPO_GLOBALOPT_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Module; + /// Optimize globals that never have their address taken. class GlobalOptPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -} +} // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_GLOBALOPT_H diff --git a/include/llvm/Transforms/IPO/GlobalSplit.h b/include/llvm/Transforms/IPO/GlobalSplit.h index fb2c2d27338e0..56cefb7886fec 100644 --- a/include/llvm/Transforms/IPO/GlobalSplit.h +++ b/include/llvm/Transforms/IPO/GlobalSplit.h @@ -17,14 +17,18 @@ #ifndef LLVM_TRANSFORMS_IPO_GLOBALSPLIT_H #define LLVM_TRANSFORMS_IPO_GLOBALSPLIT_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { + +class Module; + /// Pass to perform split of global variables. class GlobalSplitPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -} + +} // end namespace llvm + #endif // LLVM_TRANSFORMS_IPO_GLOBALSPLIT_H diff --git a/include/llvm/Transforms/IPO/Inliner.h b/include/llvm/Transforms/IPO/Inliner.h index b3ca5156e3883..eda8cf462b507 100644 --- a/include/llvm/Transforms/IPO/Inliner.h +++ b/include/llvm/Transforms/IPO/Inliner.h @@ -14,15 +14,15 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" +#include namespace llvm { + class AssumptionCacheTracker; -class CallSite; -class DataLayout; -class InlineCost; -class OptimizationRemarkEmitter; +class CallGraph; class ProfileSummaryInfo; /// This class contains all of the helper code which is used to perform the @@ -44,6 +44,7 @@ struct LegacyInlinerBase : public CallGraphSCCPass { bool runOnSCC(CallGraphSCC &SCC) override; using llvm::Pass::doFinalization; + /// Remove now-dead linkonce functions at the end of processing to avoid /// breaking the SCC traversal. bool doFinalization(CallGraph &CG) override; @@ -69,7 +70,7 @@ struct LegacyInlinerBase : public CallGraphSCCPass { private: // Insert @llvm.lifetime intrinsics. - bool InsertLifetime; + bool InsertLifetime = true; protected: AssumptionCacheTracker *ACT; @@ -103,6 +104,6 @@ class InlinerPass : public PassInfoMixin { InlineParams Params; }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_TRANSFORMS_IPO_INLINER_H diff --git a/include/llvm/Transforms/IPO/LowerTypeTests.h b/include/llvm/Transforms/IPO/LowerTypeTests.h index a2b888ce9ffa3..3bcfe65df5502 100644 --- a/include/llvm/Transforms/IPO/LowerTypeTests.h +++ b/include/llvm/Transforms/IPO/LowerTypeTests.h @@ -16,7 +16,6 @@ #define LLVM_TRANSFORMS_IPO_LOWERTYPETESTS_H #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include #include @@ -26,6 +25,7 @@ namespace llvm { +class Module; class raw_ostream; namespace lowertypetests { diff --git a/include/llvm/Transforms/IPO/PartialInlining.h b/include/llvm/Transforms/IPO/PartialInlining.h index 15407fc36a225..ec6dd36dae06e 100644 --- a/include/llvm/Transforms/IPO/PartialInlining.h +++ b/include/llvm/Transforms/IPO/PartialInlining.h @@ -1,4 +1,4 @@ -//===- PartialInlining.h - Inline parts of functions --------------------===// +//===- PartialInlining.h - Inline parts of functions ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,15 +15,18 @@ #ifndef LLVM_TRANSFORMS_IPO_PARTIALINLINING_H #define LLVM_TRANSFORMS_IPO_PARTIALINLINING_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Module; + /// Pass to remove unused function declarations. class PartialInlinerPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &); }; -} + +} // end namespace llvm + #endif // LLVM_TRANSFORMS_IPO_PARTIALINLINING_H diff --git a/include/llvm/Transforms/PGOInstrumentation.h b/include/llvm/Transforms/PGOInstrumentation.h index 19263f0f8071d..fa7a68624ec82 100644 --- a/include/llvm/Transforms/PGOInstrumentation.h +++ b/include/llvm/Transforms/PGOInstrumentation.h @@ -1,4 +1,4 @@ -//===- Transforms/PGOInstrumentation.h - PGO gen/use passes ---*- C++ -*-===// +//===- Transforms/PGOInstrumentation.h - PGO gen/use passes -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,19 +6,27 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file provides the interface for IR based instrumentation passes ( /// (profile-gen, and profile-use). +// //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_PGOINSTRUMENTATION_H #define LLVM_TRANSFORMS_PGOINSTRUMENTATION_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Instrumentation.h" +#include +#include namespace llvm { +class Function; +class Instruction; +class Module; + /// The instrumentation (profile-instr-gen) pass for IR based PGO. class PGOInstrumentationGen : public PassInfoMixin { public: @@ -28,9 +36,10 @@ class PGOInstrumentationGen : public PassInfoMixin { /// The profile annotation (profile-instr-use) pass for IR based PGO. class PGOInstrumentationUse : public PassInfoMixin { public: - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); PGOInstrumentationUse(std::string Filename = ""); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + private: std::string ProfileFileName; }; @@ -40,6 +49,7 @@ class PGOIndirectCallPromotion : public PassInfoMixin public: PGOIndirectCallPromotion(bool IsInLTO = false, bool SamplePGO = false) : InLTO(IsInLTO), SamplePGO(SamplePGO) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: @@ -50,12 +60,14 @@ class PGOIndirectCallPromotion : public PassInfoMixin /// The profile size based optimization pass for memory intrinsics. class PGOMemOPSizeOpt : public PassInfoMixin { public: - PGOMemOPSizeOpt() {} + PGOMemOPSizeOpt() = default; + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; void setProfMetadata(Module *M, Instruction *TI, ArrayRef EdgeCounts, uint64_t MaxCount); -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_PGOINSTRUMENTATION_H diff --git a/include/llvm/Transforms/SampleProfile.h b/include/llvm/Transforms/SampleProfile.h index c984fe74ba939..8f9707835651f 100644 --- a/include/llvm/Transforms/SampleProfile.h +++ b/include/llvm/Transforms/SampleProfile.h @@ -21,10 +21,12 @@ namespace llvm { class SampleProfileLoaderPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); - SampleProfileLoaderPass(std::string File = "") : ProfileFileName(File) {} + SampleProfileLoaderPass(std::string File = "", bool IsThinLTOPreLink = false) + : ProfileFileName(File), IsThinLTOPreLink(IsThinLTOPreLink) {} private: std::string ProfileFileName; + bool IsThinLTOPreLink; }; } // End llvm namespace diff --git a/include/llvm/Transforms/Scalar/ADCE.h b/include/llvm/Transforms/Scalar/ADCE.h index b9b7e1c0c99fd..f98af62c1a76f 100644 --- a/include/llvm/Transforms/Scalar/ADCE.h +++ b/include/llvm/Transforms/Scalar/ADCE.h @@ -1,4 +1,4 @@ -//===- ADCE.h - Aggressive dead code elimination --------------------------===// +//===- ADCE.h - Aggressive dead code elimination ----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,11 +17,12 @@ #ifndef LLVM_TRANSFORMS_SCALAR_ADCE_H #define LLVM_TRANSFORMS_SCALAR_ADCE_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + /// A DCE pass that assumes instructions are dead until proven otherwise. /// /// This pass eliminates dead code by optimistically assuming that all @@ -31,6 +32,7 @@ namespace llvm { struct ADCEPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_ADCE_H diff --git a/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h b/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h index 38816bbed0680..20930699b5578 100644 --- a/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h +++ b/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h @@ -1,4 +1,4 @@ -//===---- CorrelatedValuePropagation.h --------------------------*- C++ -*-===// +//===- CorrelatedValuePropagation.h -----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,15 +10,17 @@ #ifndef LLVM_TRANSFORMS_SCALAR_CORRELATEDVALUEPROPAGATION_H #define LLVM_TRANSFORMS_SCALAR_CORRELATEDVALUEPROPAGATION_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + struct CorrelatedValuePropagationPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_CORRELATEDVALUEPROPAGATION_H diff --git a/include/llvm/Transforms/Scalar/DeadStoreElimination.h b/include/llvm/Transforms/Scalar/DeadStoreElimination.h index 3ae999dfb5424..cfeb218142321 100644 --- a/include/llvm/Transforms/Scalar/DeadStoreElimination.h +++ b/include/llvm/Transforms/Scalar/DeadStoreElimination.h @@ -1,4 +1,4 @@ -//===- DeadStoreElimination.h - Fast Dead Store Elimination -------------===// +//===- DeadStoreElimination.h - Fast Dead Store Elimination -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,20 +15,22 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_SCALAR_DSE_H -#define LLVM_TRANSFORMS_SCALAR_DSE_H +#ifndef LLVM_TRANSFORMS_SCALAR_DEADSTOREELIMINATION_H +#define LLVM_TRANSFORMS_SCALAR_DEADSTOREELIMINATION_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + /// This class implements a trivial dead store elimination. We consider /// only the redundant stores that are local to a single Basic Block. class DSEPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); }; -} -#endif // LLVM_TRANSFORMS_SCALAR_DSE_H +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_DEADSTOREELIMINATION_H diff --git a/include/llvm/Transforms/Scalar/EarlyCSE.h b/include/llvm/Transforms/Scalar/EarlyCSE.h index 969ab78bfd19f..dca3b2dbf04f4 100644 --- a/include/llvm/Transforms/Scalar/EarlyCSE.h +++ b/include/llvm/Transforms/Scalar/EarlyCSE.h @@ -6,19 +6,21 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file provides the interface for a simple, fast CSE pass. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_SCALAR_EARLYCSE_H #define LLVM_TRANSFORMS_SCALAR_EARLYCSE_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + /// \brief A simple and fast domtree-based CSE pass. /// /// This pass does a simple depth-first walk over the dominator tree, @@ -35,6 +37,6 @@ struct EarlyCSEPass : PassInfoMixin { bool UseMemorySSA; }; -} +} // end namespace llvm -#endif +#endif // LLVM_TRANSFORMS_SCALAR_EARLYCSE_H diff --git a/include/llvm/Transforms/Scalar/IndVarSimplify.h b/include/llvm/Transforms/Scalar/IndVarSimplify.h index 4a4683f1a07df..e321c8fc6e9cf 100644 --- a/include/llvm/Transforms/Scalar/IndVarSimplify.h +++ b/include/llvm/Transforms/Scalar/IndVarSimplify.h @@ -15,17 +15,20 @@ #ifndef LLVM_TRANSFORMS_SCALAR_INDVARSIMPLIFY_H #define LLVM_TRANSFORMS_SCALAR_INDVARSIMPLIFY_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class Loop; +class LPMUpdater; + class IndVarSimplifyPass : public PassInfoMixin { public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_INDVARSIMPLIFY_H diff --git a/include/llvm/Transforms/Scalar/LoopDistribute.h b/include/llvm/Transforms/Scalar/LoopDistribute.h index ddde5954c2189..2bf1c9d696d5b 100644 --- a/include/llvm/Transforms/Scalar/LoopDistribute.h +++ b/include/llvm/Transforms/Scalar/LoopDistribute.h @@ -21,10 +21,13 @@ namespace llvm { +class Function; + class LoopDistributePass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H diff --git a/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h index 40349e8f7fe06..7added8d2c617 100644 --- a/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h +++ b/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h @@ -1,4 +1,4 @@ -//===- LoopIdiomRecognize.h - Loop Idiom Recognize Pass -------*- C++ -*-===// +//===- LoopIdiomRecognize.h - Loop Idiom Recognize Pass ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,18 +16,21 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPIDIOMRECOGNIZE_H #define LLVM_TRANSFORMS_SCALAR_LOOPIDIOMRECOGNIZE_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class Loop; +class LPMUpdater; + /// Performs Loop Idiom Recognize Pass. class LoopIdiomRecognizePass : public PassInfoMixin { public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPIDIOMRECOGNIZE_H diff --git a/include/llvm/Transforms/Scalar/LoopInstSimplify.h b/include/llvm/Transforms/Scalar/LoopInstSimplify.h index bb8bc29577a2d..04dc79c3fa573 100644 --- a/include/llvm/Transforms/Scalar/LoopInstSimplify.h +++ b/include/llvm/Transforms/Scalar/LoopInstSimplify.h @@ -1,4 +1,4 @@ -//===- LoopInstSimplify.h - Loop Inst Simplify Pass -------*- C++ -*-===// +//===- LoopInstSimplify.h - Loop Inst Simplify Pass -------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,18 +14,21 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPINSTSIMPLIFY_H #define LLVM_TRANSFORMS_SCALAR_LOOPINSTSIMPLIFY_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class Loop; +class LPMUpdater; + /// Performs Loop Inst Simplify Pass. class LoopInstSimplifyPass : public PassInfoMixin { public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPINSTSIMPLIFY_H diff --git a/include/llvm/Transforms/Scalar/LoopLoadElimination.h b/include/llvm/Transforms/Scalar/LoopLoadElimination.h index 7a007a7e822d2..b0514a4a7c989 100644 --- a/include/llvm/Transforms/Scalar/LoopLoadElimination.h +++ b/include/llvm/Transforms/Scalar/LoopLoadElimination.h @@ -1,4 +1,4 @@ -//===---- LoopLoadElimination.h ---------------------------------*- C++ -*-===// +//===- LoopLoadElimination.h ------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,11 +6,12 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This header defines the LoopLoadEliminationPass object. This pass forwards /// loaded values around loop backedges to allow their use in subsequent /// iterations. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_SCALAR_LOOPLOADELIMINATION_H @@ -20,11 +21,14 @@ namespace llvm { +class Function; + /// Pass to forward loads in a loop around the backedge to subsequent /// iterations. struct LoopLoadEliminationPass : public PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPLOADELIMINATION_H diff --git a/include/llvm/Transforms/Scalar/LoopStrengthReduce.h b/include/llvm/Transforms/Scalar/LoopStrengthReduce.h index ebcb32125262b..62c038a3857d6 100644 --- a/include/llvm/Transforms/Scalar/LoopStrengthReduce.h +++ b/include/llvm/Transforms/Scalar/LoopStrengthReduce.h @@ -1,4 +1,4 @@ -//===- LoopStrengthReduce.h - Loop Strength Reduce Pass -------*- C++ -*-===// +//===- LoopStrengthReduce.h - Loop Strength Reduce Pass ---------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -22,18 +22,21 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPSTRENGTHREDUCE_H #define LLVM_TRANSFORMS_SCALAR_LOOPSTRENGTHREDUCE_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class Loop; +class LPMUpdater; + /// Performs Loop Strength Reduce Pass. class LoopStrengthReducePass : public PassInfoMixin { public: PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPSTRENGTHREDUCE_H diff --git a/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/include/llvm/Transforms/Scalar/LoopUnrollPass.h index 64501837072c9..9848e0d54f2bf 100644 --- a/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ b/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -10,12 +10,15 @@ #ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H #define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H -#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { +class Function; +class Loop; +class LPMUpdater; + /// Loop unroll pass that only does full loop unrolling. class LoopFullUnrollPass : public PassInfoMixin { const int OptLevel; @@ -40,6 +43,7 @@ class LoopUnrollPass : public PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H diff --git a/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index f52872dd2ea78..046c808bd0511 100644 --- a/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -1,4 +1,4 @@ -//===---- MemCpyOptimizer.h - memcpy optimization ---------------*- C++ -*-===// +//===- MemCpyOptimizer.h - memcpy optimization ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,20 +16,27 @@ #define LLVM_TRANSFORMS_SCALAR_MEMCPYOPTIMIZER_H #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/PassManager.h" #include #include namespace llvm { +class AssumptionCache; +class CallInst; +class DominatorTree; +class Function; +class Instruction; +class MemCpyInst; +class MemMoveInst; +class MemoryDependenceResults; +class MemSetInst; +class StoreInst; +class TargetLibraryInfo; +class Value; + class MemCpyOptPass : public PassInfoMixin { MemoryDependenceResults *MD = nullptr; TargetLibraryInfo *TLI = nullptr; @@ -41,6 +48,7 @@ class MemCpyOptPass : public PassInfoMixin { MemCpyOptPass() = default; PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + // Glue for the old PM. bool runImpl(Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, diff --git a/include/llvm/Transforms/Scalar/NaryReassociate.h b/include/llvm/Transforms/Scalar/NaryReassociate.h index f35707eeb3f04..e835bd5f0761c 100644 --- a/include/llvm/Transforms/Scalar/NaryReassociate.h +++ b/include/llvm/Transforms/Scalar/NaryReassociate.h @@ -1,4 +1,4 @@ -//===- NaryReassociate.h - Reassociate n-ary expressions ------------------===// +//===- NaryReassociate.h - Reassociate n-ary expressions --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -81,15 +81,25 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" namespace llvm { + +class AssumptionCache; +class BinaryOperator; +class DataLayout; +class DominatorTree; +class Function; +class GetElementPtrInst; +class Instruction; +class ScalarEvolution; +class SCEV; +class TargetLibraryInfo; +class TargetTransformInfo; +class Type; +class Value; + class NaryReassociatePass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); @@ -109,6 +119,7 @@ class NaryReassociatePass : public PassInfoMixin { // Reassociate GEP for better CSE. Instruction *tryReassociateGEP(GetElementPtrInst *GEP); + // Try splitting GEP at the I-th index and see whether either part can be // CSE'ed. This is a helper function for tryReassociateGEP. // @@ -118,6 +129,7 @@ class NaryReassociatePass : public PassInfoMixin { // ..., i-th index). GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I, Type *IndexedType); + // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly. GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP, @@ -146,6 +158,7 @@ class NaryReassociatePass : public PassInfoMixin { // \c CandidateExpr. Returns null if not found. Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr, Instruction *Dominatee); + // GetElementPtrInst implicitly sign-extends an index if the index is shorter // than the pointer size. This function returns whether Index is shorter than // GEP's pointer size, i.e., whether Index needs to be sign-extended in order @@ -158,6 +171,7 @@ class NaryReassociatePass : public PassInfoMixin { ScalarEvolution *SE; TargetLibraryInfo *TLI; TargetTransformInfo *TTI; + // A lookup table quickly telling which instructions compute the given SCEV. // Note that there can be multiple instructions at different locations // computing to the same SCEV, so we map a SCEV to an instruction list. For @@ -169,6 +183,7 @@ class NaryReassociatePass : public PassInfoMixin { // bar(a + b); DenseMap> SeenExprs; }; -} // namespace llvm + +} // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H diff --git a/include/llvm/Transforms/Scalar/NewGVN.h b/include/llvm/Transforms/Scalar/NewGVN.h index d0425aa4345ff..05db25502dc3c 100644 --- a/include/llvm/Transforms/Scalar/NewGVN.h +++ b/include/llvm/Transforms/Scalar/NewGVN.h @@ -1,4 +1,4 @@ -//===----- NewGVN.h - Global Value Numbering Pass ---------------*- C++ -*-===// +//===- NewGVN.h - Global Value Numbering Pass -------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,9 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file provides the interface for LLVM's Global Value Numbering pass. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_SCALAR_NEWGVN_H @@ -17,12 +18,16 @@ #include "llvm/IR/PassManager.h" namespace llvm { + +class Function; + class NewGVNPass : public PassInfoMixin { public: /// \brief Run the pass over the function. PreservedAnalyses run(Function &F, AnalysisManager &AM); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_NEWGVN_H diff --git a/include/llvm/Transforms/Scalar/Reassociate.h b/include/llvm/Transforms/Scalar/Reassociate.h index a30a7176baa8b..fa87673e3e47b 100644 --- a/include/llvm/Transforms/Scalar/Reassociate.h +++ b/include/llvm/Transforms/Scalar/Reassociate.h @@ -23,22 +23,33 @@ #ifndef LLVM_TRANSFORMS_SCALAR_REASSOCIATE_H #define LLVM_TRANSFORMS_SCALAR_REASSOCIATE_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" namespace llvm { +class APInt; +class BasicBlock; +class BinaryOperator; +class Function; +class Instruction; +class Value; + /// A private "module" namespace for types and utilities used by Reassociate. /// These are implementation details and should not be used by clients. namespace reassociate { + struct ValueEntry { unsigned Rank; Value *Op; + ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {} }; + inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) { return LHS.Rank > RHS.Rank; // Sort so that highest rank goes to start. } @@ -48,11 +59,13 @@ inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) { struct Factor { Value *Base; unsigned Power; + Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {} }; class XorOpnd; -} + +} // end namespace reassociate /// Reassociate commutative expressions. class ReassociatePass : public PassInfoMixin { @@ -93,6 +106,7 @@ class ReassociatePass : public PassInfoMixin { void OptimizeInst(Instruction *I); Instruction *canonicalizeNegConstExpr(Instruction *I); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_REASSOCIATE_H diff --git a/include/llvm/Transforms/Scalar/SCCP.h b/include/llvm/Transforms/Scalar/SCCP.h index 6e7f77fe2c501..b93287fff907a 100644 --- a/include/llvm/Transforms/Scalar/SCCP.h +++ b/include/llvm/Transforms/Scalar/SCCP.h @@ -6,7 +6,8 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// \file +// +// \file // This file implements sparse conditional constant propagation and merging: // // Specifically, this: @@ -15,22 +16,23 @@ // * Proves values to be constant, and replaces them with constants // * Proves conditional branches to be unconditional // -/// //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_SCALAR_SCCP_H #define LLVM_TRANSFORMS_SCALAR_SCCP_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { +class Function; + /// This pass performs function-level constant propagation and merging. class SCCPPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_SCCP_H diff --git a/include/llvm/Transforms/Utils/AddDiscriminators.h b/include/llvm/Transforms/Utils/AddDiscriminators.h index a877583009922..4dad06e6c1254 100644 --- a/include/llvm/Transforms/Utils/AddDiscriminators.h +++ b/include/llvm/Transforms/Utils/AddDiscriminators.h @@ -1,4 +1,4 @@ -//===- AddDiscriminators.h -------------------------------------*- C++ -*-===// +//===- AddDiscriminators.h --------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,10 +20,13 @@ namespace llvm { +class Function; + class AddDiscriminatorsPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_ADDDISCRIMINATORS_H diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h index 663bef2594b02..6eca5ed2154e2 100644 --- a/include/llvm/Transforms/Utils/BypassSlowDivision.h +++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h @@ -1,4 +1,4 @@ -//===- llvm/Transforms/Utils/BypassSlowDivision.h --------------*- C++ -*-===// +//===- llvm/Transforms/Utils/BypassSlowDivision.h ---------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,10 +19,14 @@ #define LLVM_TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H #include "llvm/ADT/DenseMap.h" -#include "llvm/IR/Function.h" +#include "llvm/ADT/DenseMapInfo.h" +#include namespace llvm { +class BasicBlock; +class Value; + struct DivRemMapKey { bool SignedOp; Value *Dividend; @@ -61,6 +65,6 @@ template <> struct DenseMapInfo { bool bypassSlowDivision( BasicBlock *BB, const DenseMap &BypassWidth); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H diff --git a/include/llvm/Transforms/Utils/CodeExtractor.h b/include/llvm/Transforms/Utils/CodeExtractor.h index 682b353ab5ae8..acbea80439242 100644 --- a/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/include/llvm/Transforms/Utils/CodeExtractor.h @@ -1,4 +1,4 @@ -//===-- Transform/Utils/CodeExtractor.h - Code extraction util --*- C++ -*-===// +//===- Transform/Utils/CodeExtractor.h - Code extraction util ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,22 +15,24 @@ #ifndef LLVM_TRANSFORMS_UTILS_CODEEXTRACTOR_H #define LLVM_TRANSFORMS_UTILS_CODEEXTRACTOR_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" +#include namespace llvm { -template class ArrayRef; - class BasicBlock; - class BlockFrequency; - class BlockFrequencyInfo; - class BranchProbabilityInfo; - class DominatorTree; - class Function; - class Instruction; - class Loop; - class Module; - class RegionNode; - class Type; - class Value; + +class BasicBlock; +class BlockFrequency; +class BlockFrequencyInfo; +class BranchProbabilityInfo; +class DominatorTree; +class Function; +class Instruction; +class Loop; +class Module; +class Type; +class Value; /// \brief Utility class for extracting code into a new function. /// @@ -46,7 +48,7 @@ template class ArrayRef; /// 3) Add allocas for any scalar outputs, adding all of the outputs' allocas /// as arguments, and inserting stores to the arguments for any scalars. class CodeExtractor { - typedef SetVector ValueSet; + using ValueSet = SetVector; // Various bits of state computed on construction. DominatorTree *const DT; @@ -56,16 +58,10 @@ template class ArrayRef; // Bits of intermediate state computed at various phases of extraction. SetVector Blocks; - unsigned NumExitBlocks; + unsigned NumExitBlocks = std::numeric_limits::max(); Type *RetTy; public: - - /// \brief Check to see if a block is valid for extraction. - /// - /// Blocks containing EHPads, allocas, invokes, or vastarts are not valid. - static bool isBlockValidForExtraction(const BasicBlock &BB); - /// \brief Create a code extractor for a sequence of blocks. /// /// Given a sequence of basic blocks where the first block in the sequence @@ -84,6 +80,11 @@ template class ArrayRef; BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr); + /// \brief Check to see if a block is valid for extraction. + /// + /// Blocks containing EHPads, allocas, invokes, or vastarts are not valid. + static bool isBlockValidForExtraction(const BasicBlock &BB); + /// \brief Perform the extraction, returning the new function. /// /// Returns zero when called on a CodeExtractor instance where isEligible @@ -112,6 +113,7 @@ template class ArrayRef; /// /// Returns true if it is safe to do the code motion. bool isLegalToShrinkwrapLifetimeMarkers(Instruction *AllocaAddr) const; + /// Find the set of allocas whose life ranges are contained within the /// outlined region. /// @@ -155,6 +157,7 @@ template class ArrayRef; ValueSet &inputs, ValueSet &outputs); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_CODEEXTRACTOR_H diff --git a/include/llvm/Transforms/Utils/Evaluator.h b/include/llvm/Transforms/Utils/Evaluator.h index 07f12f41b3bcd..0e987b93177aa 100644 --- a/include/llvm/Transforms/Utils/Evaluator.h +++ b/include/llvm/Transforms/Utils/Evaluator.h @@ -1,4 +1,4 @@ -//===-- Evaluator.h - LLVM IR evaluator -------------------------*- C++ -*-===// +//===- Evaluator.h - LLVM IR evaluator --------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,9 +18,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constant.h" #include "llvm/IR/GlobalVariable.h" - +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include #include #include @@ -114,6 +115,6 @@ class Evaluator { const TargetLibraryInfo *TLI; }; -} +} // end namespace llvm -#endif +#endif // LLVM_TRANSFORMS_UTILS_EVALUATOR_H diff --git a/include/llvm/Transforms/Utils/FunctionComparator.h b/include/llvm/Transforms/Utils/FunctionComparator.h index b0f10eafaa95f..7698a068717a9 100644 --- a/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/include/llvm/Transforms/Utils/FunctionComparator.h @@ -15,10 +15,10 @@ #ifndef LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H #define LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H -#include "llvm/ADT/APFloat.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueMap.h" #include "llvm/Support/AtomicOrdering.h" @@ -28,7 +28,17 @@ namespace llvm { -class GetElementPtrInst; +class APFloat; +class APInt; +class BasicBlock; +class Constant; +class Function; +class GlobalValue; +class InlineAsm; +class Instruction; +class MDNode; +class Type; +class Value; /// GlobalNumberState assigns an integer to each global value in the program, /// which is used by the comparison routine to order references to globals. This @@ -43,14 +53,16 @@ class GetElementPtrInst; /// compare those, but this would not work for stripped bitcodes or for those /// few symbols without a name. class GlobalNumberState { - struct Config : ValueMapConfig { + struct Config : ValueMapConfig { enum { FollowRAUW = false }; }; + // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW // occurs, the mapping does not change. Tracking changes is unnecessary, and // also problematic for weak symbols (which may be overwritten). - typedef ValueMap ValueNumberMap; + using ValueNumberMap = ValueMap; ValueNumberMap GlobalNumbers; + // The next unused serial number to assign to a global. uint64_t NextNumber = 0; @@ -66,6 +78,10 @@ class GlobalNumberState { return MapIter->second; } + void erase(GlobalValue *Global) { + GlobalNumbers.erase(Global); + } + void clear() { GlobalNumbers.clear(); } @@ -83,9 +99,10 @@ class FunctionComparator { /// Test whether the two functions have equivalent behaviour. int compare(); + /// Hash a function. Equivalent functions will have the same hash, and unequal /// functions will have different hashes with high probability. - typedef uint64_t FunctionHash; + using FunctionHash = uint64_t; static FunctionHash functionHash(Function &); protected: diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index b445bbd496671..fe344a7fac678 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -1,4 +1,4 @@ -//===-- Local.h - Functions to perform local transformations ----*- C++ -*-===// +//===- Local.h - Functions to perform local transformations -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,41 +15,44 @@ #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H #define LLVM_TRANSFORMS_UTILS_LOCAL_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include +#include namespace llvm { -class User; +class AllocaInst; +class AssumptionCache; class BasicBlock; -class Function; class BranchInst; -class Instruction; class CallInst; -class DbgDeclareInst; class DbgInfoIntrinsic; class DbgValueInst; -class StoreInst; +class DIBuilder; +class Function; +class Instruction; +class LazyValueInfo; class LoadInst; -class Value; +class MDNode; class PHINode; -class AllocaInst; -class AssumptionCache; -class ConstantExpr; -class DataLayout; +class StoreInst; class TargetLibraryInfo; class TargetTransformInfo; -class DIBuilder; -class DominatorTree; -class LazyValueInfo; - -template class SmallVectorImpl; /// A set of parameters used to control the transforms in the SimplifyCFG pass. /// Options may change depending on the position in the optimization pipeline. @@ -57,14 +60,18 @@ template class SmallVectorImpl; /// replaced by lookup tables and selects. struct SimplifyCFGOptions { int BonusInstThreshold; + bool ForwardSwitchCondToPhi; bool ConvertSwitchToLookupTable; bool NeedCanonicalLoop; + AssumptionCache *AC; - SimplifyCFGOptions(int BonusThreshold = 1, bool SwitchToLookup = false, - bool CanonicalLoops = true) + SimplifyCFGOptions(int BonusThreshold = 1, bool ForwardSwitchCond = false, + bool SwitchToLookup = false, bool CanonicalLoops = true, + AssumptionCache *AssumpCache = nullptr) : BonusInstThreshold(BonusThreshold), + ForwardSwitchCondToPhi(ForwardSwitchCond), ConvertSwitchToLookupTable(SwitchToLookup), - NeedCanonicalLoop(CanonicalLoops) {} + NeedCanonicalLoop(CanonicalLoops), AC(AssumpCache) {} }; //===----------------------------------------------------------------------===// @@ -157,8 +164,7 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB); /// It returns true if a modification was made, possibly deleting the basic /// block that was pointed to. LoopHeaders is an optional input parameter /// providing the set of loop headers that SimplifyCFG should not eliminate. -bool SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - AssumptionCache *AC = nullptr, +bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, const SimplifyCFGOptions &Options = {}, SmallPtrSetImpl *LoopHeaders = nullptr); @@ -202,10 +208,10 @@ unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DominatorTree *DT = nullptr); /// Try to infer an alignment for the specified pointer. -static inline unsigned getKnownAlignment(Value *V, const DataLayout &DL, - const Instruction *CxtI = nullptr, - AssumptionCache *AC = nullptr, - const DominatorTree *DT = nullptr) { +inline unsigned getKnownAlignment(Value *V, const DataLayout &DL, + const Instruction *CxtI = nullptr, + AssumptionCache *AC = nullptr, + const DominatorTree *DT = nullptr) { return getOrEnforceKnownAlignment(V, 0, DL, CxtI, AC, DT); } @@ -227,7 +233,8 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, // Build a mask for high order bits. unsigned IntPtrWidth = IntPtrTy->getScalarType()->getIntegerBitWidth(); - uint64_t PtrSizeMask = ~0ULL >> (64 - IntPtrWidth); + uint64_t PtrSizeMask = + std::numeric_limits::max() >> (64 - IntPtrWidth); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; @@ -388,7 +395,6 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlock *BB); - /// Return true if the CallSite CS calls a gc leaf function. /// /// A leaf function is a function that does not safepoint the thread during its @@ -450,6 +456,6 @@ void maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI, /// value? bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_TRANSFORMS_UTILS_LOCAL_H diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h index 75b32902f3022..650224610ad26 100644 --- a/include/llvm/Transforms/Utils/LoopUtils.h +++ b/include/llvm/Transforms/Utils/LoopUtils.h @@ -439,6 +439,20 @@ bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); +/// This function deletes dead loops. The caller of this function needs to +/// guarantee that the loop is infact dead. +/// The function requires a bunch or prerequisites to be present: +/// - The loop needs to be in LCSSA form +/// - The loop needs to have a Preheader +/// - A unique dedicated exit block must exist +/// +/// This also updates the relevant analysis information in \p DT, \p SE, and \p +/// LI if pointers to those are provided. +/// It also updates the loop PM if an updater struct is provided. + +void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, + LoopInfo *LI); + /// \brief Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are diff --git a/include/llvm/Transforms/Utils/Mem2Reg.h b/include/llvm/Transforms/Utils/Mem2Reg.h index 1fe186d6c3ad9..407684338a3b7 100644 --- a/include/llvm/Transforms/Utils/Mem2Reg.h +++ b/include/llvm/Transforms/Utils/Mem2Reg.h @@ -15,14 +15,17 @@ #ifndef LLVM_TRANSFORMS_UTILS_MEM2REG_H #define LLVM_TRANSFORMS_UTILS_MEM2REG_H -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { + +class Function; + class PromotePass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_MEM2REG_H diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h index e9793fe4b6666..4b9bc82938106 100644 --- a/include/llvm/Transforms/Utils/ModuleUtils.h +++ b/include/llvm/Transforms/Utils/ModuleUtils.h @@ -85,7 +85,8 @@ void filterDeadComdatFunctions( Module &M, SmallVectorImpl &DeadComdatFunctions); /// \brief Produce a unique identifier for this module by taking the MD5 sum of -/// the names of the module's strong external symbols. +/// the names of the module's strong external symbols that are not comdat +/// members. /// /// This identifier is normally guaranteed to be unique, or the program would /// fail to link due to multiply defined symbols. diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h index 8cbcdf47156ea..6cd9f1539b0b3 100644 --- a/include/llvm/Transforms/Utils/SSAUpdater.h +++ b/include/llvm/Transforms/Utils/SSAUpdater.h @@ -1,4 +1,4 @@ -//===-- SSAUpdater.h - Unstructured SSA Update Tool -------------*- C++ -*-===// +//===- SSAUpdater.h - Unstructured SSA Update Tool --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,6 +14,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATER_H #define LLVM_TRANSFORMS_UTILS_SSAUPDATER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include @@ -22,10 +23,9 @@ namespace llvm { class BasicBlock; class Instruction; class LoadInst; -template class ArrayRef; +class PHINode; template class SmallVectorImpl; template class SSAUpdaterTraits; -class PHINode; class Type; class Use; class Value; @@ -42,7 +42,6 @@ class SSAUpdater { private: /// This keeps track of which value to use on a per-block basis. When we /// insert PHI nodes, we keep track of them here. - //typedef DenseMap AvailableValsTy; void *AV = nullptr; /// ProtoType holds the type of the values being rewritten. @@ -53,12 +52,12 @@ class SSAUpdater { /// If this is non-null, the SSAUpdater adds all PHI nodes that it creates to /// the vector. - SmallVectorImpl *InsertedPHIs; + SmallVectorImpl *InsertedPHIs; public: /// If InsertedPHIs is specified, it will be filled /// in with all PHI Nodes created by rewriting. - explicit SSAUpdater(SmallVectorImpl *InsertedPHIs = nullptr); + explicit SSAUpdater(SmallVectorImpl *InsertedPHIs = nullptr); SSAUpdater(const SSAUpdater &) = delete; SSAUpdater &operator=(const SSAUpdater &) = delete; ~SSAUpdater(); @@ -136,7 +135,7 @@ class LoadAndStorePromoter { SSAUpdater &SSA; public: - LoadAndStorePromoter(ArrayRef Insts, + LoadAndStorePromoter(ArrayRef Insts, SSAUpdater &S, StringRef Name = StringRef()); virtual ~LoadAndStorePromoter() = default; @@ -145,32 +144,28 @@ class LoadAndStorePromoter { /// Insts is a list of loads and stores to promote, and Name is the basename /// for the PHIs to insert. After this is complete, the loads and stores are /// removed from the code. - void run(const SmallVectorImpl &Insts) const; + void run(const SmallVectorImpl &Insts) const; /// \brief Return true if the specified instruction is in the Inst list. /// /// The Insts list is the one passed into the constructor. Clients should /// implement this with a more efficient version if possible. virtual bool isInstInList(Instruction *I, - const SmallVectorImpl &Insts) const; + const SmallVectorImpl &Insts) const; /// \brief This hook is invoked after all the stores are found and inserted as /// available values. - virtual void doExtraRewritesBeforeFinalDeletion() const { - } + virtual void doExtraRewritesBeforeFinalDeletion() const {} /// \brief Clients can choose to implement this to get notified right before /// a load is RAUW'd another value. - virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const { - } + virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {} /// \brief Called before each instruction is deleted. - virtual void instructionDeleted(Instruction *I) const { - } + virtual void instructionDeleted(Instruction *I) const {} /// \brief Called to update debug info associated with the instruction. - virtual void updateDebugInfo(Instruction *I) const { - } + virtual void updateDebugInfo(Instruction *I) const {} }; } // end namespace llvm diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h index 2dd205d8b2af2..b1611d49a456e 100644 --- a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h +++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h @@ -1,4 +1,4 @@ -//===-- SSAUpdaterImpl.h - SSA Updater Implementation -----------*- C++ -*-===// +//===- SSAUpdaterImpl.h - SSA Updater Implementation ------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,17 +17,14 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/ValueHandle.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #define DEBUG_TYPE "ssaupdater" namespace llvm { -class CastInst; -class PHINode; template class SSAUpdaterTraits; template @@ -35,51 +32,67 @@ class SSAUpdaterImpl { private: UpdaterT *Updater; - typedef SSAUpdaterTraits Traits; - typedef typename Traits::BlkT BlkT; - typedef typename Traits::ValT ValT; - typedef typename Traits::PhiT PhiT; + using Traits = SSAUpdaterTraits; + using BlkT = typename Traits::BlkT; + using ValT = typename Traits::ValT; + using PhiT = typename Traits::PhiT; /// BBInfo - Per-basic block information used internally by SSAUpdaterImpl. /// The predecessors of each block are cached here since pred_iterator is /// slow and we need to iterate over the blocks at least a few times. class BBInfo { public: - BlkT *BB; // Back-pointer to the corresponding block. - ValT AvailableVal; // Value to use in this block. - BBInfo *DefBB; // Block that defines the available value. - int BlkNum; // Postorder number. - BBInfo *IDom; // Immediate dominator. - unsigned NumPreds; // Number of predecessor blocks. - BBInfo **Preds; // Array[NumPreds] of predecessor blocks. - PhiT *PHITag; // Marker for existing PHIs that match. + // Back-pointer to the corresponding block. + BlkT *BB; + + // Value to use in this block. + ValT AvailableVal; + + // Block that defines the available value. + BBInfo *DefBB; + + // Postorder number. + int BlkNum = 0; + + // Immediate dominator. + BBInfo *IDom = nullptr; + + // Number of predecessor blocks. + unsigned NumPreds = 0; + + // Array[NumPreds] of predecessor blocks. + BBInfo **Preds = nullptr; + + // Marker for existing PHIs that match. + PhiT *PHITag = nullptr; BBInfo(BlkT *ThisBB, ValT V) - : BB(ThisBB), AvailableVal(V), DefBB(V ? this : nullptr), BlkNum(0), - IDom(nullptr), NumPreds(0), Preds(nullptr), PHITag(nullptr) {} + : BB(ThisBB), AvailableVal(V), DefBB(V ? this : nullptr) {} }; - typedef DenseMap AvailableValsTy; + using AvailableValsTy = DenseMap; + AvailableValsTy *AvailableVals; - SmallVectorImpl *InsertedPHIs; + SmallVectorImpl *InsertedPHIs; + + using BlockListTy = SmallVectorImpl; + using BBMapTy = DenseMap; - typedef SmallVectorImpl BlockListTy; - typedef DenseMap BBMapTy; BBMapTy BBMap; BumpPtrAllocator Allocator; public: explicit SSAUpdaterImpl(UpdaterT *U, AvailableValsTy *A, - SmallVectorImpl *Ins) : - Updater(U), AvailableVals(A), InsertedPHIs(Ins) { } + SmallVectorImpl *Ins) : + Updater(U), AvailableVals(A), InsertedPHIs(Ins) {} /// GetValue - Check to see if AvailableVals has an entry for the specified /// BB and if so, return it. If not, construct SSA form by first /// calculating the required placement of PHIs and then inserting new PHIs /// where needed. ValT GetValue(BlkT *BB) { - SmallVector BlockList; + SmallVector BlockList; BBInfo *PseudoEntry = BuildBlockList(BB, &BlockList); // Special case: bail out if BB is unreachable. @@ -101,8 +114,8 @@ class SSAUpdaterImpl { /// Create BBInfo structures for the blocks and append them to the block /// list. BBInfo *BuildBlockList(BlkT *BB, BlockListTy *BlockList) { - SmallVector RootList; - SmallVector WorkList; + SmallVector RootList; + SmallVector WorkList; BBInfo *Info = new (Allocator) BBInfo(BB, 0); BBMap[BB] = Info; @@ -111,7 +124,7 @@ class SSAUpdaterImpl { // Search backward from BB, creating BBInfos along the way and stopping // when reaching blocks that define the value. Record those defining // blocks on the RootList. - SmallVector Preds; + SmallVector Preds; while (!WorkList.empty()) { Info = WorkList.pop_back_val(); Preds.clear(); @@ -395,7 +408,7 @@ class SSAUpdaterImpl { /// CheckIfPHIMatches - Check if a PHI node matches the placement and values /// in the BBMap. bool CheckIfPHIMatches(PhiT *PHI) { - SmallVector WorkList; + SmallVector WorkList; WorkList.push_back(PHI); // Mark that the block containing this PHI has been visited. @@ -453,7 +466,7 @@ class SSAUpdaterImpl { } }; -} // end llvm namespace +} // end namespace llvm #undef DEBUG_TYPE // "ssaupdater" diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h index 8d50aeb10d6eb..a1dfed29a22d3 100644 --- a/include/llvm/Transforms/Utils/SimplifyIndVar.h +++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h @@ -26,6 +26,7 @@ class Loop; class LoopInfo; class PHINode; class ScalarEvolution; +class SCEVExpander; /// Interface for visiting interesting IV users that are recognized but not /// simplified by this utility. @@ -47,7 +48,7 @@ class IVVisitor { /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl &Dead, - IVVisitor *V = nullptr); + SCEVExpander &Rewriter, IVVisitor *V = nullptr); /// SimplifyLoopIVs - Simplify users of induction variables within this /// loop. This does not actually change or add IVs. diff --git a/include/llvm/Transforms/Utils/SplitModule.h b/include/llvm/Transforms/Utils/SplitModule.h index b7a3bcf4f86a5..d2c31f2701acc 100644 --- a/include/llvm/Transforms/Utils/SplitModule.h +++ b/include/llvm/Transforms/Utils/SplitModule.h @@ -22,7 +22,6 @@ namespace llvm { class Module; -class StringRef; /// Splits the module M into N linkable partitions. The function ModuleCallback /// is called N times passing each individual partition as the MPart argument. @@ -39,6 +38,6 @@ void SplitModule( function_ref MPart)> ModuleCallback, bool PreserveLocals = false); -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_TRANSFORMS_UTILS_SPLITMODULE_H diff --git a/include/llvm/Transforms/Utils/SymbolRewriter.h b/include/llvm/Transforms/Utils/SymbolRewriter.h index 93658989fba57..e0caf7741ff39 100644 --- a/include/llvm/Transforms/Utils/SymbolRewriter.h +++ b/include/llvm/Transforms/Utils/SymbolRewriter.h @@ -1,4 +1,4 @@ -//===-- SymbolRewriter.h - Symbol Rewriting Pass ----------------*- C++ -*-===// +//===- SymbolRewriter.h - Symbol Rewriting Pass -----------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -33,7 +33,6 @@ #ifndef LLVM_TRANSFORMS_UTILS_SYMBOLREWRITER_H #define LLVM_TRANSFORMS_UTILS_SYMBOLREWRITER_H -#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include #include @@ -42,6 +41,8 @@ namespace llvm { class MemoryBuffer; +class Module; +class ModulePass; namespace yaml { @@ -89,7 +90,7 @@ class RewriteDescriptor { const Type Kind; }; -typedef std::list> RewriteDescriptorList; +using RewriteDescriptorList = std::list>; class RewriteMapParser { public: @@ -120,6 +121,7 @@ ModulePass *createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &); class RewriteSymbolPass : public PassInfoMixin { public: RewriteSymbolPass() { loadAndParseMapFiles(); } + RewriteSymbolPass(SymbolRewriter::RewriteDescriptorList &DL) { Descriptors.splice(Descriptors.begin(), DL); } diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h index 5893726710d05..5439207577868 100644 --- a/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/include/llvm/Transforms/Utils/UnrollLoop.h @@ -16,24 +16,22 @@ #ifndef LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H #define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H -// Needed because we can't forward-declare the nested struct -// TargetTransformInfo::UnrollingPreferences +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" namespace llvm { -class StringRef; class AssumptionCache; +class BasicBlock; class DominatorTree; class Loop; class LoopInfo; -class LPPassManager; class MDNode; -class Pass; class OptimizationRemarkEmitter; class ScalarEvolution; -typedef SmallDenseMap NewLoopsMap; +using NewLoopsMap = SmallDenseMap; const Loop* addClonedBlockToLoopInfo(BasicBlock *OriginalBB, BasicBlock *ClonedBB, LoopInfo *LI, @@ -80,6 +78,7 @@ bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name); -} -#endif +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h index 45ef8246dcd16..4ecb23ea19518 100644 --- a/include/llvm/Transforms/Utils/ValueMapper.h +++ b/include/llvm/Transforms/Utils/ValueMapper.h @@ -21,9 +21,17 @@ namespace llvm { -class Value; +class Constant; +class Function; +class GlobalAlias; +class GlobalVariable; class Instruction; -typedef ValueMap ValueToValueMapTy; +class MDNode; +class Metadata; +class Type; +class Value; + +using ValueToValueMapTy = ValueMap; /// This is a class that can be implemented by clients to remap types when /// cloning constants and instructions. @@ -44,10 +52,10 @@ class ValueMaterializer { virtual void anchor(); // Out of line method. protected: - ~ValueMaterializer() = default; ValueMaterializer() = default; ValueMaterializer(const ValueMaterializer &) = default; ValueMaterializer &operator=(const ValueMaterializer &) = default; + ~ValueMaterializer() = default; public: /// This method can be implemented to generate a mapped Value on demand. For @@ -91,7 +99,7 @@ enum RemapFlags { RF_NullMapMissingGlobalValues = 8, }; -static inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) { +inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) { return RemapFlags(unsigned(LHS) | unsigned(RHS)); } diff --git a/include/llvm/Transforms/Vectorize/LoopVectorize.h b/include/llvm/Transforms/Vectorize/LoopVectorize.h index 57d10c4c74734..32b56d372ea16 100644 --- a/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ b/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -1,4 +1,4 @@ -//===---- LoopVectorize.h ---------------------------------------*- C++ -*-===// +//===- LoopVectorize.h ------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -49,27 +49,29 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H -#include "llvm/ADT/MapVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/DemandedBits.h" -#include "llvm/Analysis/LoopAccessAnalysis.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include namespace llvm { +class AssumptionCache; +class BlockFrequencyInfo; +class DemandedBits; +class DominatorTree; +class Function; +class Loop; +class LoopAccessInfo; +class LoopInfo; +class OptimizationRemarkEmitter; +class ScalarEvolution; +class TargetLibraryInfo; +class TargetTransformInfo; + /// The LoopVectorize Pass. struct LoopVectorizePass : public PassInfoMixin { bool DisableUnrolling = false; + /// If true, consider all loops for vectorization. /// If false, only loops that explicitly request vectorization are /// considered. @@ -99,6 +101,7 @@ struct LoopVectorizePass : public PassInfoMixin { bool processLoop(Loop *L); }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZE_H diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap index 4b177540a500f..cb8ac04a867e4 100644 --- a/include/llvm/module.modulemap +++ b/include/llvm/module.modulemap @@ -123,6 +123,7 @@ module LLVM_DebugInfo_CodeView { module * { export * } // These are intended for (repeated) textual inclusion. + textual header "DebugInfo/CodeView/CodeViewRegisters.def" textual header "DebugInfo/CodeView/CodeViewTypes.def" textual header "DebugInfo/CodeView/CodeViewSymbols.def" } @@ -225,12 +226,6 @@ module LLVM_LTO { requires cplusplus umbrella "LTO" module * { export * } } module LLVM_MC { requires cplusplus - // FIXME: Mislayered? - module Support_TargetRegistry { - header "Support/TargetRegistry.h" - export * - } - umbrella "MC" module * { export * } @@ -238,6 +233,16 @@ module LLVM_MC { exclude header "MC/MCTargetOptionsCommandFlags.h" } +// Used by llvm-tblgen +module LLVM_MC_TableGen { + requires cplusplus + module MC_LaneBitmask { header "MC/LaneBitmask.h" export * } + module MC_FixedLenDisassembler { header "MC/MCFixedLenDisassembler.h" export * } + module MC_InstrItineraries { header "MC/MCInstrItineraries.h" export * } + module MC_Schedule { header "MC/MCSchedule.h" export * } + module MC_SubtargetFeature { header "MC/SubtargetFeature.h" export * } +} + module LLVM_Object { requires cplusplus umbrella "Object" @@ -255,6 +260,13 @@ module LLVM_ProfileData { textual header "ProfileData/InstrProfData.inc" } +// FIXME: Mislayered? +module LLVM_Support_TargetRegistry { + requires cplusplus + header "Support/TargetRegistry.h" + export * +} + module LLVM_TableGen { requires cplusplus umbrella "TableGen" module * { export * } } module LLVM_Transforms { diff --git a/include/llvm/module.modulemap.build b/include/llvm/module.modulemap.build index 0f6f82af6e125..162a262a00a78 100644 --- a/include/llvm/module.modulemap.build +++ b/include/llvm/module.modulemap.build @@ -7,3 +7,7 @@ module LLVM_Config_ABI_Breaking { header "Config/abi-breaking.h" export * } +module LLVM_Config_Config { + header "Config/llvm-config.h" + export * +} diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 1b2de163d6518..af2e30db2c127 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -61,7 +61,7 @@ add_llvm_library(LLVMAnalysis ObjCARCAliasAnalysis.cpp ObjCARCAnalysisUtils.cpp ObjCARCInstKind.cpp - OptimizationDiagnosticInfo.cpp + OptimizationRemarkEmitter.cpp OrderedBasicBlock.cpp PHITransAddr.cpp PostDominators.cpp @@ -74,7 +74,6 @@ add_llvm_library(LLVMAnalysis ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionExpander.cpp ScalarEvolutionNormalization.cpp - SparsePropagation.cpp TargetLibraryInfo.cpp TargetTransformInfo.cpp Trace.cpp @@ -82,6 +81,7 @@ add_llvm_library(LLVMAnalysis TypeMetadataUtils.cpp ScopedNoAliasAA.cpp ValueLattice.cpp + ValueLatticeUtils.cpp ValueTracking.cpp VectorUtils.cpp diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 2ee75c83d7076..b7fe884cc22c8 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -172,6 +172,7 @@ class CallAnalyzer : public InstVisitor { void accumulateSROACost(DenseMap::iterator CostIt, int InstructionCost); bool isGEPFree(GetElementPtrInst &GEP); + bool canFoldInboundsGEP(GetElementPtrInst &I); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallSite CS); template @@ -431,40 +432,34 @@ bool CallAnalyzer::visitPHI(PHINode &I) { return true; } +/// \brief Check we can fold GEPs of constant-offset call site argument pointers. +/// This requires target data and inbounds GEPs. +/// +/// \return true if the specified GEP can be folded. +bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) { + // Check if we have a base + offset for the pointer. + std::pair BaseAndOffset = + ConstantOffsetPtrs.lookup(I.getPointerOperand()); + if (!BaseAndOffset.first) + return false; + + // Check if the offset of this GEP is constant, and if so accumulate it + // into Offset. + if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) + return false; + + // Add the result as a new mapping to Base + Offset. + ConstantOffsetPtrs[&I] = BaseAndOffset; + + return true; +} + bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { Value *SROAArg; DenseMap::iterator CostIt; bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), SROAArg, CostIt); - // Try to fold GEPs of constant-offset call site argument pointers. This - // requires target data and inbounds GEPs. - if (I.isInBounds()) { - // Check if we have a base + offset for the pointer. - Value *Ptr = I.getPointerOperand(); - std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); - if (BaseAndOffset.first) { - // Check if the offset of this GEP is constant, and if so accumulate it - // into Offset. - if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) { - // Non-constant GEPs aren't folded, and disable SROA. - if (SROACandidate) - disableSROA(CostIt); - return isGEPFree(I); - } - - // Add the result as a new mapping to Base + Offset. - ConstantOffsetPtrs[&I] = BaseAndOffset; - - // Also handle SROA candidates here, we already know that the GEP is - // all-constant indexed. - if (SROACandidate) - SROAArgValues[&I] = SROAArg; - - return true; - } - } - // Lambda to check whether a GEP's indices are all constant. auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) @@ -473,7 +468,7 @@ bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { return true; }; - if (IsGEPOffsetConstant(I)) { + if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { if (SROACandidate) SROAArgValues[&I] = SROAArg; @@ -1445,10 +1440,12 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || HasIndirectBr || HasFrameEscape) { if (ORE) - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", - CandidateCS.getInstruction()) - << NV("Callee", &F) - << " has uninlinable pattern and cost is not fully computed"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", + CandidateCS.getInstruction()) + << NV("Callee", &F) + << " has uninlinable pattern and cost is not fully computed"; + }); return false; } @@ -1458,12 +1455,13 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, if (IsCallerRecursive && AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { if (ORE) - ORE->emit( - OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", - CandidateCS.getInstruction()) - << NV("Callee", &F) - << " is recursive and allocates too much stack space. Cost is " - "not fully computed"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", + CandidateCS.getInstruction()) + << NV("Callee", &F) + << " is recursive and allocates too much stack space. Cost is " + "not fully computed"; + }); return false; } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 05afc4f55012a..78ae0abf2a154 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -27,7 +27,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/ConstantRange.h" @@ -3580,6 +3580,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, // select true, X, Y -> X // select false, X, Y -> Y if (Constant *CB = dyn_cast(CondVal)) { + if (Constant *CT = dyn_cast(TrueVal)) + if (Constant *CF = dyn_cast(FalseVal)) + return ConstantFoldSelectInstruction(CB, CT, CF); if (CB->isAllOnesValue()) return TrueVal; if (CB->isNullValue()) diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index ada600a69b872..ae92f502f5c88 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -683,7 +683,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, if (Instruction::isCast(CE->getOpcode())) { if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), CE->getOperand(0)->getType(), CE->getType(), - DL->getIntPtrType(V->getType()))) + *DL)) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { ArrayRef Indices = CE->getIndices(); diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index eb633196d338e..19889658b13c6 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -29,7 +29,7 @@ #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" diff --git a/lib/Analysis/LoopAnalysisManager.cpp b/lib/Analysis/LoopAnalysisManager.cpp index 84a891c3f4fab..7647f85019d5e 100644 --- a/lib/Analysis/LoopAnalysisManager.cpp +++ b/lib/Analysis/LoopAnalysisManager.cpp @@ -56,8 +56,10 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate( // analysis manager's cache. So we just walk the keys and forcibly clear // those results. Note that the order doesn't matter here as this will just // directly destroy the results without calling methods on them. - for (Loop *L : PreOrderLoops) - InnerAM->clear(*L, L->getName()); + for (Loop *L : PreOrderLoops) { + // NB! `L` may not be in a good enough state to run Loop::getName. + InnerAM->clear(*L, ""); + } // We also need to null out the inner AM so that when the object gets // destroyed as invalid we don't try to clear the inner AM again. At that diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 0471213b69c5a..c6019f267a30f 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -268,6 +268,39 @@ void Loop::setLoopID(MDNode *LoopID) const { } } +void Loop::setLoopAlreadyUnrolled() { + MDNode *LoopID = getLoopID(); + // First remove any existing loop unrolling metadata. + SmallVector MDs; + // Reserve first location for self reference to the LoopID metadata node. + MDs.push_back(nullptr); + + if (LoopID) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + bool IsUnrollMetadata = false; + MDNode *MD = dyn_cast(LoopID->getOperand(i)); + if (MD) { + const MDString *S = dyn_cast(MD->getOperand(0)); + IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); + } + if (!IsUnrollMetadata) + MDs.push_back(LoopID->getOperand(i)); + } + } + + // Add unroll(disable) metadata to disable future unrolling. + LLVMContext &Context = getHeader()->getContext(); + SmallVector DisableOperands; + DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); + MDNode *DisableNode = MDNode::get(Context, DisableOperands); + MDs.push_back(DisableNode); + + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + setLoopID(NewLoopID); +} + bool Loop::isAnnotatedParallel() const { MDNode *DesiredLoopIdMetadata = getLoopID(); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index a6ffe20b27f06..ba90f1cf2fbd3 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -196,14 +196,17 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom( // Walk backwards through the block, looking for dependencies. while (ScanIt != BB->begin()) { + Instruction *Inst = &*--ScanIt; + // Debug intrinsics don't cause dependences and should not affect Limit + if (isa(Inst)) + continue; + // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. --Limit; if (!Limit) return MemDepResult::getUnknown(); - Instruction *Inst = &*--ScanIt; - // If this inst is a memory op, get the pointer it accessed MemoryLocation Loc; ModRefInfo MR = GetLocation(Inst, Loc, TLI); @@ -215,9 +218,6 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom( } if (auto InstCS = CallSite(Inst)) { - // Debug intrinsics don't cause dependences. - if (isa(Inst)) - continue; // If these two calls do not interfere, look past it. switch (AA.getModRefInfo(CS, InstCS)) { case MRI_NoModRef: diff --git a/lib/Analysis/OptimizationDiagnosticInfo.cpp b/lib/Analysis/OptimizationRemarkEmitter.cpp similarity index 56% rename from lib/Analysis/OptimizationDiagnosticInfo.cpp rename to lib/Analysis/OptimizationRemarkEmitter.cpp index c31de0fc42e7b..cd6a93668010d 100644 --- a/lib/Analysis/OptimizationDiagnosticInfo.cpp +++ b/lib/Analysis/OptimizationRemarkEmitter.cpp @@ -1,4 +1,4 @@ -//===- OptimizationDiagnosticInfo.cpp - Optimization Diagnostic -*- C++ -*-===// +//===- OptimizationRemarkEmitter.cpp - Optimization Diagnostic --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,7 +12,7 @@ // used to compute the "hotness" of the diagnostic message. //===----------------------------------------------------------------------===// -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" @@ -64,86 +64,6 @@ Optional OptimizationRemarkEmitter::computeHotness(const Value *V) { return BFI->getBlockProfileCount(cast(V)); } -namespace llvm { -namespace yaml { - -void MappingTraits::mapping( - IO &io, DiagnosticInfoOptimizationBase *&OptDiag) { - assert(io.outputting() && "input not yet implemented"); - - if (io.mapTag("!Passed", - (OptDiag->getKind() == DK_OptimizationRemark || - OptDiag->getKind() == DK_MachineOptimizationRemark))) - ; - else if (io.mapTag( - "!Missed", - (OptDiag->getKind() == DK_OptimizationRemarkMissed || - OptDiag->getKind() == DK_MachineOptimizationRemarkMissed))) - ; - else if (io.mapTag( - "!Analysis", - (OptDiag->getKind() == DK_OptimizationRemarkAnalysis || - OptDiag->getKind() == DK_MachineOptimizationRemarkAnalysis))) - ; - else if (io.mapTag("!AnalysisFPCommute", - OptDiag->getKind() == - DK_OptimizationRemarkAnalysisFPCommute)) - ; - else if (io.mapTag("!AnalysisAliasing", - OptDiag->getKind() == - DK_OptimizationRemarkAnalysisAliasing)) - ; - else if (io.mapTag("!Failure", OptDiag->getKind() == DK_OptimizationFailure)) - ; - else - llvm_unreachable("Unknown remark type"); - - // These are read-only for now. - DiagnosticLocation DL = OptDiag->getLocation(); - StringRef FN = - GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName()); - - StringRef PassName(OptDiag->PassName); - io.mapRequired("Pass", PassName); - io.mapRequired("Name", OptDiag->RemarkName); - if (!io.outputting() || DL.isValid()) - io.mapOptional("DebugLoc", DL); - io.mapRequired("Function", FN); - io.mapOptional("Hotness", OptDiag->Hotness); - io.mapOptional("Args", OptDiag->Args); -} - -template <> struct MappingTraits { - static void mapping(IO &io, DiagnosticLocation &DL) { - assert(io.outputting() && "input not yet implemented"); - - StringRef File = DL.getFilename(); - unsigned Line = DL.getLine(); - unsigned Col = DL.getColumn(); - - io.mapRequired("File", File); - io.mapRequired("Line", Line); - io.mapRequired("Column", Col); - } - - static const bool flow = true; -}; - -// Implement this as a mapping for now to get proper quotation for the value. -template <> struct MappingTraits { - static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) { - assert(io.outputting() && "input not yet implemented"); - io.mapRequired(A.Key.data(), A.Val); - if (A.Loc.isValid()) - io.mapOptional("DebugLoc", A.Loc); - } -}; - -} // end namespace yaml -} // end namespace llvm - -LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument) - void OptimizationRemarkEmitter::computeHotness( DiagnosticInfoIROptimization &OptDiag) { const Value *V = OptDiag.getCodeRegion(); @@ -163,16 +83,7 @@ void OptimizationRemarkEmitter::emit( return; } - yaml::Output *Out = F->getContext().getDiagnosticsOutputFile(); - if (Out) { - // For remarks the << operator takes a reference to a pointer. - auto *P = &OptDiagBase; - *Out << P; - } - // FIXME: now that IsVerbose is part of DI, filtering for this will be moved - // from here to clang. - if (!OptDiag.isVerbose() || shouldEmitVerbose()) - F->getContext().diagnose(OptDiag); + F->getContext().diagnose(OptDiag); } OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass() diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 89897c6da5ec9..d48e8a57562cc 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -1290,6 +1290,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -1580,6 +1581,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -1766,6 +1768,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -1803,6 +1806,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -2014,6 +2018,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -2662,6 +2667,7 @@ ScalarEvolution::getOrCreateAddExpr(SmallVectorImpl &Ops, S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); } S->setNoWrapFlags(Flags); return S; @@ -2683,6 +2689,7 @@ ScalarEvolution::getOrCreateMulExpr(SmallVectorImpl &Ops, S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); } S->setNoWrapFlags(Flags); return S; @@ -3135,6 +3142,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -3315,6 +3323,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Operands.size(), L); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); } S->setNoWrapFlags(Flags); return S; @@ -3470,6 +3479,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -3571,6 +3581,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); return S; } @@ -4523,8 +4534,7 @@ ScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI : SCEVWrapPredicate::IncrementNUSW; const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); Predicates.push_back(AddRecPred); - } else - assert(isa(PHISCEV) && "Expected constant SCEV"); + } // Create the Equal Predicates P2,P3: @@ -6393,6 +6403,13 @@ void ScalarEvolution::forgetLoop(const Loop *L) { ++I; } + auto LoopUsersItr = LoopUsers.find(CurrL); + if (LoopUsersItr != LoopUsers.end()) { + for (auto *S : LoopUsersItr->second) + forgetMemoizedResults(S); + LoopUsers.erase(LoopUsersItr); + } + // Drop information about expressions based on loop-header PHIs. PushLoopPHIs(CurrL, Worklist); @@ -9672,14 +9689,54 @@ const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, return getUDivExpr(Delta, Step); } +const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, + const SCEV *Stride, + const SCEV *End, + unsigned BitWidth, + bool IsSigned) { + + assert(!isKnownNonPositive(Stride) && + "Stride is expected strictly positive!"); + // Calculate the maximum backedge count based on the range of values + // permitted by Start, End, and Stride. + const SCEV *MaxBECount; + APInt MinStart = + IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start); + + APInt StrideForMaxBECount = + IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride); + + // We already know that the stride is positive, so we paper over conservatism + // in our range computation by forcing StrideForMaxBECount to be at least one. + // In theory this is unnecessary, but we expect MaxBECount to be a + // SCEVConstant, and (udiv 0) is not constant folded by SCEV (there + // is nothing to constant fold it to). + APInt One(BitWidth, 1, IsSigned); + StrideForMaxBECount = APIntOps::smax(One, StrideForMaxBECount); + + APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth) + : APInt::getMaxValue(BitWidth); + APInt Limit = MaxValue - (StrideForMaxBECount - 1); + + // Although End can be a MAX expression we estimate MaxEnd considering only + // the case End = RHS of the loop termination condition. This is safe because + // in the other case (End - Start) is zero, leading to a zero maximum backedge + // taken count. + APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit) + : APIntOps::umin(getUnsignedRangeMax(End), Limit); + + MaxBECount = computeBECount(getConstant(MaxEnd - MinStart) /* Delta */, + getConstant(StrideForMaxBECount) /* Step */, + false /* Equality */); + + return MaxBECount; +} + ScalarEvolution::ExitLimit ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned, bool ControlsExit, bool AllowPredicates) { SmallPtrSet Predicates; - // We handle only IV < Invariant - if (!isLoopInvariant(RHS, L)) - return getCouldNotCompute(); const SCEVAddRecExpr *IV = dyn_cast(LHS); bool PredicatedIV = false; @@ -9762,6 +9819,17 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, : ICmpInst::ICMP_ULT; const SCEV *Start = IV->getStart(); const SCEV *End = RHS; + // When the RHS is not invariant, we do not know the end bound of the loop and + // cannot calculate the ExactBECount needed by ExitLimit. However, we can + // calculate the MaxBECount, given the start, stride and max value for the end + // bound of the loop (RHS), and the fact that IV does not overflow (which is + // checked above). + if (!isLoopInvariant(RHS, L)) { + const SCEV *MaxBECount = computeMaxBECountForLT( + Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); + return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, + false /*MaxOrZero*/, Predicates); + } // If the backedge is taken at least once, then it will be taken // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start // is the LHS value of the less-than comparison the first time it is evaluated @@ -9794,37 +9862,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, MaxBECount = BECountIfBackedgeTaken; MaxOrZero = true; } else { - // Calculate the maximum backedge count based on the range of values - // permitted by Start, End, and Stride. - APInt MinStart = IsSigned ? getSignedRangeMin(Start) - : getUnsignedRangeMin(Start); - - unsigned BitWidth = getTypeSizeInBits(LHS->getType()); - - APInt StrideForMaxBECount; - - if (PositiveStride) - StrideForMaxBECount = - IsSigned ? getSignedRangeMin(Stride) - : getUnsignedRangeMin(Stride); - else - // Using a stride of 1 is safe when computing max backedge taken count for - // a loop with unknown stride. - StrideForMaxBECount = APInt(BitWidth, 1, IsSigned); - - APInt Limit = - IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1) - : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1); - - // Although End can be a MAX expression we estimate MaxEnd considering only - // the case End = RHS. This is safe because in the other case (End - Start) - // is zero, leading to a zero maximum backedge taken count. - APInt MaxEnd = - IsSigned ? APIntOps::smin(getSignedRangeMax(RHS), Limit) - : APIntOps::umin(getUnsignedRangeMax(RHS), Limit); - - MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), - getConstant(StrideForMaxBECount), false); + MaxBECount = computeMaxBECountForLT( + Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); } if (isa(MaxBECount) && @@ -10575,6 +10614,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) UniqueSCEVs(std::move(Arg.UniqueSCEVs)), UniquePreds(std::move(Arg.UniquePreds)), SCEVAllocator(std::move(Arg.SCEVAllocator)), + LoopUsers(std::move(Arg.LoopUsers)), PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)), FirstUnknown(Arg.FirstUnknown) { Arg.FirstUnknown = nullptr; @@ -11017,6 +11057,25 @@ ScalarEvolution::forgetMemoizedResults(const SCEV *S, bool EraseExitLimit) { ExitLimits.erase(I); } +void ScalarEvolution::addToLoopUseLists(const SCEV *S) { + struct FindUsedLoops { + SmallPtrSet LoopsUsed; + bool follow(const SCEV *S) { + if (auto *AR = dyn_cast(S)) + LoopsUsed.insert(AR->getLoop()); + return true; + } + + bool isDone() const { return false; } + }; + + FindUsedLoops F; + SCEVTraversal(F).visitAll(S); + + for (auto *L : F.LoopsUsed) + LoopUsers[L].push_back(S); +} + void ScalarEvolution::verify() const { ScalarEvolution &SE = *const_cast(this); ScalarEvolution SE2(F, TLI, AC, DT, LI); diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 47bdac00ae1f3..964a79803fa9d 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -2250,10 +2250,6 @@ namespace { // only needed when the expression includes some subexpression that is not IV // derived. // -// Currently, we only allow division by a nonzero constant here. If this is -// inadequate, we could easily allow division by SCEVUnknown by using -// ValueTracking to check isKnownNonZero(). -// // We cannot generally expand recurrences unless the step dominates the loop // header. The expander handles the special case of affine recurrences by // scaling the recurrence outside the loop, but this technique isn't generally @@ -2268,13 +2264,11 @@ struct SCEVFindUnsafe { bool follow(const SCEV *S) { if (const SCEVUDivExpr *D = dyn_cast(S)) { - const SCEVConstant *SC = dyn_cast(D->getRHS()); - if (!SC || SC->getValue()->isZero()) { + if (!SE.isKnownNonZero(D->getRHS())) { IsUnsafe = true; return false; } - } - if (const SCEVAddRecExpr *AR = dyn_cast(S)) { + } else if (const SCEVAddRecExpr *AR = dyn_cast(S)) { const SCEV *Step = AR->getStepRecurrence(SE); if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { IsUnsafe = true; diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp deleted file mode 100644 index 91e49b4e6a282..0000000000000 --- a/lib/Analysis/SparsePropagation.cpp +++ /dev/null @@ -1,351 +0,0 @@ -//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements an abstract sparse conditional propagation algorithm, -// modeled after SCCP, but with a customizable lattice function. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/SparsePropagation.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Argument.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constant.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/User.h" -#include "llvm/Support/Casting.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#define DEBUG_TYPE "sparseprop" - -//===----------------------------------------------------------------------===// -// AbstractLatticeFunction Implementation -//===----------------------------------------------------------------------===// - -AbstractLatticeFunction::~AbstractLatticeFunction() = default; - -/// PrintValue - Render the specified lattice value to the specified stream. -void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) { - if (V == UndefVal) - OS << "undefined"; - else if (V == OverdefinedVal) - OS << "overdefined"; - else if (V == UntrackedVal) - OS << "untracked"; - else - OS << "unknown lattice value"; -} - -//===----------------------------------------------------------------------===// -// SparseSolver Implementation -//===----------------------------------------------------------------------===// - -/// getOrInitValueState - Return the LatticeVal object that corresponds to the -/// value, initializing the value's state if it hasn't been entered into the -/// map yet. This function is necessary because not all values should start -/// out in the underdefined state... Arguments should be overdefined, and -/// constants should be marked as constants. -SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) { - DenseMap::iterator I = ValueState.find(V); - if (I != ValueState.end()) return I->second; // Common case, in the map - - LatticeVal LV; - if (LatticeFunc->IsUntrackedValue(V)) - return LatticeFunc->getUntrackedVal(); - else if (Constant *C = dyn_cast(V)) - LV = LatticeFunc->ComputeConstant(C); - else if (Argument *A = dyn_cast(V)) - LV = LatticeFunc->ComputeArgument(A); - else if (!isa(V)) - // All other non-instructions are overdefined. - LV = LatticeFunc->getOverdefinedVal(); - else - // All instructions are underdefined by default. - LV = LatticeFunc->getUndefVal(); - - // If this value is untracked, don't add it to the map. - if (LV == LatticeFunc->getUntrackedVal()) - return LV; - return ValueState[V] = LV; -} - -/// UpdateState - When the state for some instruction is potentially updated, -/// this function notices and adds I to the worklist if needed. -void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) { - DenseMap::iterator I = ValueState.find(&Inst); - if (I != ValueState.end() && I->second == V) - return; // No change. - - // An update. Visit uses of I. - ValueState[&Inst] = V; - InstWorkList.push_back(&Inst); -} - -/// MarkBlockExecutable - This method can be used by clients to mark all of -/// the blocks that are known to be intrinsically live in the processed unit. -void SparseSolver::MarkBlockExecutable(BasicBlock *BB) { - DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n"); - BBExecutable.insert(BB); // Basic block is executable! - BBWorkList.push_back(BB); // Add the block to the work list! -} - -/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB -/// work list if it is not already executable... -void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { - if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second) - return; // This edge is already known to be executable! - - DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName() - << " -> " << Dest->getName() << "\n"); - - if (BBExecutable.count(Dest)) { - // The destination is already executable, but we just made an edge - // feasible that wasn't before. Revisit the PHI nodes in the block - // because they have potentially new operands. - for (BasicBlock::iterator I = Dest->begin(); isa(I); ++I) - visitPHINode(*cast(I)); - } else { - MarkBlockExecutable(Dest); - } -} - -/// getFeasibleSuccessors - Return a vector of booleans to indicate which -/// successors are reachable from a given terminator instruction. -void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, - SmallVectorImpl &Succs, - bool AggressiveUndef) { - Succs.resize(TI.getNumSuccessors()); - if (TI.getNumSuccessors() == 0) return; - - if (BranchInst *BI = dyn_cast(&TI)) { - if (BI->isUnconditional()) { - Succs[0] = true; - return; - } - - LatticeVal BCValue; - if (AggressiveUndef) - BCValue = getOrInitValueState(BI->getCondition()); - else - BCValue = getLatticeState(BI->getCondition()); - - if (BCValue == LatticeFunc->getOverdefinedVal() || - BCValue == LatticeFunc->getUntrackedVal()) { - // Overdefined condition variables can branch either way. - Succs[0] = Succs[1] = true; - return; - } - - // If undefined, neither is feasible yet. - if (BCValue == LatticeFunc->getUndefVal()) - return; - - Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this); - if (!C || !isa(C)) { - // Non-constant values can go either way. - Succs[0] = Succs[1] = true; - return; - } - - // Constant condition variables mean the branch can only go a single way - Succs[C->isNullValue()] = true; - return; - } - - if (isa(TI)) { - // Invoke instructions successors are always executable. - // TODO: Could ask the lattice function if the value can throw. - Succs[0] = Succs[1] = true; - return; - } - - if (isa(TI)) { - Succs.assign(Succs.size(), true); - return; - } - - SwitchInst &SI = cast(TI); - LatticeVal SCValue; - if (AggressiveUndef) - SCValue = getOrInitValueState(SI.getCondition()); - else - SCValue = getLatticeState(SI.getCondition()); - - if (SCValue == LatticeFunc->getOverdefinedVal() || - SCValue == LatticeFunc->getUntrackedVal()) { - // All destinations are executable! - Succs.assign(TI.getNumSuccessors(), true); - return; - } - - // If undefined, neither is feasible yet. - if (SCValue == LatticeFunc->getUndefVal()) - return; - - Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this); - if (!C || !isa(C)) { - // All destinations are executable! - Succs.assign(TI.getNumSuccessors(), true); - return; - } - SwitchInst::CaseHandle Case = *SI.findCaseValue(cast(C)); - Succs[Case.getSuccessorIndex()] = true; -} - -/// isEdgeFeasible - Return true if the control flow edge from the 'From' -/// basic block to the 'To' basic block is currently feasible... -bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To, - bool AggressiveUndef) { - SmallVector SuccFeasible; - TerminatorInst *TI = From->getTerminator(); - getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef); - - for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - if (TI->getSuccessor(i) == To && SuccFeasible[i]) - return true; - - return false; -} - -void SparseSolver::visitTerminatorInst(TerminatorInst &TI) { - SmallVector SuccFeasible; - getFeasibleSuccessors(TI, SuccFeasible, true); - - BasicBlock *BB = TI.getParent(); - - // Mark all feasible successors executable... - for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i) - if (SuccFeasible[i]) - markEdgeExecutable(BB, TI.getSuccessor(i)); -} - -void SparseSolver::visitPHINode(PHINode &PN) { - // The lattice function may store more information on a PHINode than could be - // computed from its incoming values. For example, SSI form stores its sigma - // functions as PHINodes with a single incoming value. - if (LatticeFunc->IsSpecialCasedPHI(&PN)) { - LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this); - if (IV != LatticeFunc->getUntrackedVal()) - UpdateState(PN, IV); - return; - } - - LatticeVal PNIV = getOrInitValueState(&PN); - LatticeVal Overdefined = LatticeFunc->getOverdefinedVal(); - - // If this value is already overdefined (common) just return. - if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal()) - return; // Quick exit - - // Super-extra-high-degree PHI nodes are unlikely to ever be interesting, - // and slow us down a lot. Just mark them overdefined. - if (PN.getNumIncomingValues() > 64) { - UpdateState(PN, Overdefined); - return; - } - - // Look at all of the executable operands of the PHI node. If any of them - // are overdefined, the PHI becomes overdefined as well. Otherwise, ask the - // transfer function to give us the merge of the incoming values. - for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { - // If the edge is not yet known to be feasible, it doesn't impact the PHI. - if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true)) - continue; - - // Merge in this value. - LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i)); - if (OpVal != PNIV) - PNIV = LatticeFunc->MergeValues(PNIV, OpVal); - - if (PNIV == Overdefined) - break; // Rest of input values don't matter. - } - - // Update the PHI with the compute value, which is the merge of the inputs. - UpdateState(PN, PNIV); -} - -void SparseSolver::visitInst(Instruction &I) { - // PHIs are handled by the propagation logic, they are never passed into the - // transfer functions. - if (PHINode *PN = dyn_cast(&I)) - return visitPHINode(*PN); - - // Otherwise, ask the transfer function what the result is. If this is - // something that we care about, remember it. - LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this); - if (IV != LatticeFunc->getUntrackedVal()) - UpdateState(I, IV); - - if (TerminatorInst *TI = dyn_cast(&I)) - visitTerminatorInst(*TI); -} - -void SparseSolver::Solve(Function &F) { - MarkBlockExecutable(&F.getEntryBlock()); - - // Process the work lists until they are empty! - while (!BBWorkList.empty() || !InstWorkList.empty()) { - // Process the instruction work list. - while (!InstWorkList.empty()) { - Instruction *I = InstWorkList.back(); - InstWorkList.pop_back(); - - DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n"); - - // "I" got into the work list because it made a transition. See if any - // users are both live and in need of updating. - for (User *U : I->users()) { - Instruction *UI = cast(U); - if (BBExecutable.count(UI->getParent())) // Inst is executable? - visitInst(*UI); - } - } - - // Process the basic block work list. - while (!BBWorkList.empty()) { - BasicBlock *BB = BBWorkList.back(); - BBWorkList.pop_back(); - - DEBUG(dbgs() << "\nPopped off BBWL: " << *BB); - - // Notify all instructions in this basic block that they are newly - // executable. - for (Instruction &I : *BB) - visitInst(I); - } - } -} - -void SparseSolver::Print(Function &F, raw_ostream &OS) const { - OS << "\nFUNCTION: " << F.getName() << "\n"; - for (auto &BB : F) { - if (!BBExecutable.count(&BB)) - OS << "INFEASIBLE: "; - OS << "\t"; - if (BB.hasName()) - OS << BB.getName() << ":\n"; - else - OS << "; anon bb\n"; - for (auto &I : BB) { - LatticeFunc->PrintValue(getLatticeState(&I), OS); - OS << I << "\n"; - } - - OS << "\n"; - } -} diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index fad918dabb510..c3185bf2bbde3 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -180,6 +180,11 @@ bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { return TTIImpl->hasDivRemOp(DataType, IsSigned); } +bool TargetTransformInfo::hasVolatileVariant(Instruction *I, + unsigned AddrSpace) const { + return TTIImpl->hasVolatileVariant(I, AddrSpace); +} + bool TargetTransformInfo::prefersVectorizedAddressing() const { return TTIImpl->prefersVectorizedAddressing(); } diff --git a/lib/Analysis/ValueLatticeUtils.cpp b/lib/Analysis/ValueLatticeUtils.cpp new file mode 100644 index 0000000000000..22c9de4fe94d9 --- /dev/null +++ b/lib/Analysis/ValueLatticeUtils.cpp @@ -0,0 +1,44 @@ +//===-- ValueLatticeUtils.cpp - Utils for solving lattices ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements common functions useful for performing data-flow +// analyses that propagate values across function boundaries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ValueLatticeUtils.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +using namespace llvm; + +bool llvm::canTrackArgumentsInterprocedurally(Function *F) { + return F->hasLocalLinkage() && !F->hasAddressTaken(); +} + +bool llvm::canTrackReturnsInterprocedurally(Function *F) { + return F->hasExactDefinition() && !F->hasFnAttribute(Attribute::Naked); +} + +bool llvm::canTrackGlobalVariableInterprocedurally(GlobalVariable *GV) { + if (GV->isConstant() || !GV->hasLocalLinkage() || + !GV->hasDefinitiveInitializer()) + return false; + return !any_of(GV->users(), [&](User *U) { + if (auto *Store = dyn_cast(U)) { + if (Store->getValueOperand() == GV || Store->isVolatile()) + return true; + } else if (auto *Load = dyn_cast(U)) { + if (Load->isVolatile()) + return true; + } else { + return true; + } + return false; + }); +} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index a49da3a861e02..182377d39c0c4 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -29,7 +29,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -83,12 +83,6 @@ const unsigned MaxDepth = 6; static cl::opt DomConditionsMaxUses("dom-conditions-max-uses", cl::Hidden, cl::init(20)); -// This optimization is known to cause performance regressions is some cases, -// keep it under a temporary flag for now. -static cl::opt -DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits", - cl::Hidden, cl::init(true)); - /// Returns the bitwidth of the given scalar or pointer type. For vector types, /// returns the element type's bitwidth. static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { @@ -777,24 +771,26 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known, if (Known.Zero.intersects(Known.One)) { Known.resetAll(); - if (Q.ORE) { - auto *CxtI = const_cast(Q.CxtI); - OptimizationRemarkAnalysis ORA("value-tracking", "BadAssumption", CxtI); - Q.ORE->emit(ORA << "Detected conflicting code assumptions. Program may " - "have undefined behavior, or compiler may have " - "internal error."); - } + if (Q.ORE) + Q.ORE->emit([&]() { + auto *CxtI = const_cast(Q.CxtI); + return OptimizationRemarkAnalysis("value-tracking", "BadAssumption", + CxtI) + << "Detected conflicting code assumptions. Program may " + "have undefined behavior, or compiler may have " + "internal error."; + }); } } -// Compute known bits from a shift operator, including those with a -// non-constant shift amount. Known is the outputs of this function. Known2 is a -// pre-allocated temporary with the/ same bit width as Known. KZF and KOF are -// operator-specific functors that, given the known-zero or known-one bits -// respectively, and a shift amount, compute the implied known-zero or known-one -// bits of the shift operator's result respectively for that shift amount. The -// results from calling KZF and KOF are conservatively combined for all -// permitted shift amounts. +/// Compute known bits from a shift operator, including those with a +/// non-constant shift amount. Known is the output of this function. Known2 is a +/// pre-allocated temporary with the same bit width as Known. KZF and KOF are +/// operator-specific functors that, given the known-zero or known-one bits +/// respectively, and a shift amount, compute the implied known-zero or +/// known-one bits of the shift operator's result respectively for that shift +/// amount. The results from calling KZF and KOF are conservatively combined for +/// all permitted shift amounts. static void computeKnownBitsFromShiftOperator( const Operator *I, KnownBits &Known, KnownBits &Known2, unsigned Depth, const Query &Q, @@ -808,19 +804,20 @@ static void computeKnownBitsFromShiftOperator( computeKnownBits(I->getOperand(0), Known, Depth + 1, Q); Known.Zero = KZF(Known.Zero, ShiftAmt); Known.One = KOF(Known.One, ShiftAmt); - // If there is conflict between Known.Zero and Known.One, this must be an - // overflowing left shift, so the shift result is undefined. Clear Known - // bits so that other code could propagate this undef. - if ((Known.Zero & Known.One) != 0) - Known.resetAll(); + // If the known bits conflict, this must be an overflowing left shift, so + // the shift result is poison. We can return anything we want. Choose 0 for + // the best folding opportunity. + if (Known.hasConflict()) + Known.setAllZero(); return; } computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); - // If the shift amount could be greater than or equal to the bit-width of the LHS, the - // value could be undef, so we don't know anything about it. + // If the shift amount could be greater than or equal to the bit-width of the + // LHS, the value could be poison, but bail out because the check below is + // expensive. TODO: Should we just carry on? if ((~Known.Zero).uge(BitWidth)) { Known.resetAll(); return; @@ -844,8 +841,7 @@ static void computeKnownBitsFromShiftOperator( // Early exit if we can't constrain any well-defined shift amount. if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) && !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) { - ShifterOperandIsNonZero = - isKnownNonZero(I->getOperand(1), Depth + 1, Q); + ShifterOperandIsNonZero = isKnownNonZero(I->getOperand(1), Depth + 1, Q); if (!*ShifterOperandIsNonZero) return; } @@ -876,13 +872,10 @@ static void computeKnownBitsFromShiftOperator( Known.One &= KOF(Known2.One, ShiftAmt); } - // If there are no compatible shift amounts, then we've proven that the shift - // amount must be >= the BitWidth, and the result is undefined. We could - // return anything we'd like, but we need to make sure the sets of known bits - // stay disjoint (it should be better for some other code to actually - // propagate the undef than to pick a value here using known bits). - if (Known.Zero.intersects(Known.One)) - Known.resetAll(); + // If the known bits conflict, the result is poison. Return a 0 and hope the + // caller can further optimize that. + if (Known.hasConflict()) + Known.setAllZero(); } static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, @@ -1095,7 +1088,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, break; } case Instruction::LShr: { - // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + // (lshr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 auto KZF = [](const APInt &KnownZero, unsigned ShiftAmt) { APInt KZResult = KnownZero.lshr(ShiftAmt); // High bits known zero. @@ -1290,9 +1283,6 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), Known3.countMinTrailingZeros())); - if (DontImproveNonNegativePhiBits) - break; - auto *OverflowOp = dyn_cast(LU); if (OverflowOp && OverflowOp->hasNoSignedWrap()) { // If initial value of recurrence is nonnegative, and we are adding @@ -1517,9 +1507,8 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, // We know that CDS must be a vector of integers. Take the intersection of // each element. Known.Zero.setAllBits(); Known.One.setAllBits(); - APInt Elt(BitWidth, 0); for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - Elt = CDS->getElementAsInteger(i); + APInt Elt = CDS->getElementAsAPInt(i); Known.Zero &= ~Elt; Known.One &= Elt; } @@ -1530,7 +1519,6 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, // We know that CV must be a vector of integers. Take the intersection of // each element. Known.Zero.setAllBits(); Known.One.setAllBits(); - APInt Elt(BitWidth, 0); for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) { Constant *Element = CV->getAggregateElement(i); auto *ElementCI = dyn_cast_or_null(Element); @@ -1538,7 +1526,7 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, Known.resetAll(); return; } - Elt = ElementCI->getValue(); + const APInt &Elt = ElementCI->getValue(); Known.Zero &= ~Elt; Known.One &= Elt; } @@ -2109,11 +2097,7 @@ static unsigned computeNumSignBitsVectorConstant(const Value *V, if (!Elt) return 0; - // If the sign bit is 1, flip the bits, so we always count leading zeros. - APInt EltVal = Elt->getValue(); - if (EltVal.isNegative()) - EltVal = ~EltVal; - MinSignBits = std::min(MinSignBits, EltVal.countLeadingZeros()); + MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits()); } return MinSignBits; @@ -3428,7 +3412,8 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { /// This is a wrapper around GetUnderlyingObjects and adds support for basic /// ptrtoint+arithmetic+inttoptr sequences. -void llvm::getUnderlyingObjectsForCodeGen(const Value *V, +/// It returns false if unidentified object is found in GetUnderlyingObjects. +bool llvm::getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl &Objects, const DataLayout &DL) { SmallPtrSet Visited; @@ -3454,11 +3439,12 @@ void llvm::getUnderlyingObjectsForCodeGen(const Value *V, // getUnderlyingObjectsForCodeGen also fails for safety. if (!isIdentifiedObject(V)) { Objects.clear(); - return; + return false; } Objects.push_back(const_cast(V)); } } while (!Working.empty()); + return true; } /// Return true if the only users of this pointer are lifetime markers. @@ -4298,6 +4284,20 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS); } +/// Helps to match a select pattern in case of a type mismatch. +/// +/// The function processes the case when type of true and false values of a +/// select instruction differs from type of the cmp instruction operands because +/// of a cast instructon. The function checks if it is legal to move the cast +/// operation after "select". If yes, it returns the new second value of +/// "select" (with the assumption that cast is moved): +/// 1. As operand of cast instruction when both values of "select" are same cast +/// instructions. +/// 2. As restored constant (by applying reverse cast operation) when the first +/// value of the "select" is a cast operation and the second value is a +/// constant. +/// NOTE: We return only the new second value because the first value could be +/// accessed as operand of cast instruction. static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, Instruction::CastOps *CastOp) { auto *Cast1 = dyn_cast(V1); @@ -4328,7 +4328,34 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, CastedTo = ConstantExpr::getTrunc(C, SrcTy, true); break; case Instruction::Trunc: - CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned()); + Constant *CmpConst; + if (match(CmpI->getOperand(1), m_Constant(CmpConst)) && + CmpConst->getType() == SrcTy) { + // Here we have the following case: + // + // %cond = cmp iN %x, CmpConst + // %tr = trunc iN %x to iK + // %narrowsel = select i1 %cond, iK %t, iK C + // + // We can always move trunc after select operation: + // + // %cond = cmp iN %x, CmpConst + // %widesel = select i1 %cond, iN %x, iN CmpConst + // %tr = trunc iN %widesel to iK + // + // Note that C could be extended in any way because we don't care about + // upper bits after truncation. It can't be abs pattern, because it would + // look like: + // + // select i1 %cond, x, -x. + // + // So only min/max pattern could be matched. Such match requires widened C + // == CmpConst. That is why set widened C = CmpConst, condition trunc + // CmpConst == C is checked below. + CastedTo = CmpConst; + } else { + CastedTo = ConstantExpr::getIntegerCast(C, SrcTy, CmpI->isSigned()); + } break; case Instruction::FPTrunc: CastedTo = ConstantExpr::getFPExtend(C, SrcTy, true); diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 5ce55f52276d1..52c02cc162ecb 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -601,7 +601,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(hhvm_ccc); KEYWORD(cxx_fast_tlscc); KEYWORD(amdgpu_vs); + KEYWORD(amdgpu_ls); KEYWORD(amdgpu_hs); + KEYWORD(amdgpu_es); KEYWORD(amdgpu_gs); KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index f8f709a03bc64..565b1a27daf1f 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -237,9 +237,11 @@ bool LLParser::ValidateEndOfModule() { } } - UpgradeDebugInfo(*M); + if (UpgradeDebugInfo) + llvm::UpgradeDebugInfo(*M); UpgradeModuleFlags(*M); + UpgradeSectionAttributes(*M); if (!Slots) return false; @@ -1692,7 +1694,9 @@ void LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'hhvm_ccc' /// ::= 'cxx_fast_tlscc' /// ::= 'amdgpu_vs' +/// ::= 'amdgpu_ls' /// ::= 'amdgpu_hs' +/// ::= 'amdgpu_es' /// ::= 'amdgpu_gs' /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' @@ -1734,7 +1738,9 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break; case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break; case lltok::kw_amdgpu_vs: CC = CallingConv::AMDGPU_VS; break; + case lltok::kw_amdgpu_ls: CC = CallingConv::AMDGPU_LS; break; case lltok::kw_amdgpu_hs: CC = CallingConv::AMDGPU_HS; break; + case lltok::kw_amdgpu_es: CC = CallingConv::AMDGPU_ES; break; case lltok::kw_amdgpu_gs: CC = CallingConv::AMDGPU_GS; break; case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; @@ -4772,7 +4778,6 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { unsigned Alignment; std::string GC; GlobalValue::UnnamedAddr UnnamedAddr = GlobalValue::UnnamedAddr::None; - LocTy UnnamedAddrLoc; Constant *Prefix = nullptr; Constant *Prologue = nullptr; Constant *PersonalityFn = nullptr; @@ -5566,7 +5571,6 @@ bool LLParser::ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'catchswitch' within Parent bool LLParser::ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS) { Value *ParentPad; - LocTy BBLoc; if (ParseToken(lltok::kw_within, "expected 'within' after catchswitch")) return true; @@ -6070,7 +6074,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, /// ParseAlloc /// ::= 'alloca' 'inalloca'? 'swifterror'? Type (',' TypeAndValue)? -/// (',' 'align' i32)? +/// (',' 'align' i32)? (',', 'addrspace(n))? int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { Value *Size = nullptr; LocTy SizeLoc, TyLoc, ASLoc; @@ -6100,11 +6104,22 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { } else if (Lex.getKind() == lltok::MetadataVar) { AteExtraComma = true; } else { - if (ParseTypeAndValue(Size, SizeLoc, PFS) || - ParseOptionalCommaAlign(Alignment, AteExtraComma) || - (!AteExtraComma && - ParseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma))) + if (ParseTypeAndValue(Size, SizeLoc, PFS)) return true; + if (EatIfPresent(lltok::comma)) { + if (Lex.getKind() == lltok::kw_align) { + if (ParseOptionalAlignment(Alignment)) + return true; + if (ParseOptionalCommaAddrSpace(AddrSpace, ASLoc, AteExtraComma)) + return true; + } else if (Lex.getKind() == lltok::kw_addrspace) { + ASLoc = Lex.getLoc(); + if (ParseOptionalAddrSpace(AddrSpace)) + return true; + } else if (Lex.getKind() == lltok::MetadataVar) { + AteExtraComma = true; + } + } } } diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index d5b059355c423..5dadf521538cd 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -139,11 +139,16 @@ namespace llvm { std::map > ForwardRefAttrGroups; std::map NumberedAttrBuilders; + /// Only the llvm-as tool may set this to false to bypass + /// UpgradeDebuginfo so it can generate broken bitcode. + bool UpgradeDebugInfo; + public: LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *M, - SlotMapping *Slots = nullptr) + SlotMapping *Slots = nullptr, bool UpgradeDebugInfo = true) : Context(M->getContext()), Lex(F, SM, Err, M->getContext()), M(M), - Slots(Slots), BlockAddressPFS(nullptr) {} + Slots(Slots), BlockAddressPFS(nullptr), + UpgradeDebugInfo(UpgradeDebugInfo) {} bool Run(); bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 09e502d7a354e..a729ce4c7012d 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -153,7 +153,9 @@ enum Kind { kw_hhvm_ccc, kw_cxx_fast_tlscc, kw_amdgpu_vs, + kw_amdgpu_ls, kw_amdgpu_hs, + kw_amdgpu_es, kw_amdgpu_gs, kw_amdgpu_ps, kw_amdgpu_cs, diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index bee07ad9e0a52..a43ae2b5577ab 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -23,22 +23,21 @@ using namespace llvm; bool llvm::parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err, - SlotMapping *Slots) { + SlotMapping *Slots, bool UpgradeDebugInfo) { SourceMgr SM; std::unique_ptr Buf = MemoryBuffer::getMemBuffer(F); SM.AddNewSourceBuffer(std::move(Buf), SMLoc()); - return LLParser(F.getBuffer(), SM, Err, &M, Slots).Run(); + return LLParser(F.getBuffer(), SM, Err, &M, Slots, UpgradeDebugInfo).Run(); } -std::unique_ptr llvm::parseAssembly(MemoryBufferRef F, - SMDiagnostic &Err, - LLVMContext &Context, - SlotMapping *Slots) { +std::unique_ptr +llvm::parseAssembly(MemoryBufferRef F, SMDiagnostic &Err, LLVMContext &Context, + SlotMapping *Slots, bool UpgradeDebugInfo) { std::unique_ptr M = make_unique(F.getBufferIdentifier(), Context); - if (parseAssemblyInto(F, *M, Err, Slots)) + if (parseAssemblyInto(F, *M, Err, Slots, UpgradeDebugInfo)) return nullptr; return M; @@ -47,7 +46,8 @@ std::unique_ptr llvm::parseAssembly(MemoryBufferRef F, std::unique_ptr llvm::parseAssemblyFile(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context, - SlotMapping *Slots) { + SlotMapping *Slots, + bool UpgradeDebugInfo) { ErrorOr> FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = FileOrErr.getError()) { @@ -56,15 +56,17 @@ std::unique_ptr llvm::parseAssemblyFile(StringRef Filename, return nullptr; } - return parseAssembly(FileOrErr.get()->getMemBufferRef(), Err, Context, Slots); + return parseAssembly(FileOrErr.get()->getMemBufferRef(), Err, Context, Slots, + UpgradeDebugInfo); } std::unique_ptr llvm::parseAssemblyString(StringRef AsmString, SMDiagnostic &Err, LLVMContext &Context, - SlotMapping *Slots) { + SlotMapping *Slots, + bool UpgradeDebugInfo) { MemoryBufferRef F(AsmString, ""); - return parseAssembly(F, Err, Context, Slots); + return parseAssembly(F, Err, Context, Slots, UpgradeDebugInfo); } Constant *llvm::parseConstantValue(StringRef Asm, SMDiagnostic &Err, diff --git a/lib/BinaryFormat/Dwarf.cpp b/lib/BinaryFormat/Dwarf.cpp index 37c4579ef0f89..86e3b02577fd8 100644 --- a/lib/BinaryFormat/Dwarf.cpp +++ b/lib/BinaryFormat/Dwarf.cpp @@ -575,3 +575,10 @@ bool llvm::dwarf::isValidFormForVersion(Form F, unsigned Version, } return ExtensionsOk; } + +uint32_t llvm::dwarf::djbHash(StringRef Buffer) { + uint32_t H = 5381; + for (char C : Buffer.bytes()) + H = ((H << 5) + H) + C; + return H; +} diff --git a/lib/BinaryFormat/Magic.cpp b/lib/BinaryFormat/Magic.cpp index e9b8df93b9028..42546eaa732b4 100644 --- a/lib/BinaryFormat/Magic.cpp +++ b/lib/BinaryFormat/Magic.cpp @@ -185,8 +185,8 @@ file_magic llvm::identify_magic(StringRef Magic) { if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) { uint32_t off = read32le(Magic.data() + 0x3c); // PE/COFF file, either EXE or DLL. - if (off < Magic.size() && - memcmp(Magic.data() + off, COFF::PEMagic, sizeof(COFF::PEMagic)) == 0) + if (Magic.substr(off).startswith( + StringRef(COFF::PEMagic, sizeof(COFF::PEMagic)))) return file_magic::pecoff_executable; } break; diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 560a71bbf76b0..68b36eef5ebe6 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -265,7 +265,7 @@ static Expected hasObjCCategoryInModule(BitstreamCursor &Stream) { if (convertToString(Record, 0, S)) return error("Invalid record"); // Check for the i386 and other (x86_64, ARM) conventions - if (S.find("__DATA, __objc_catlist") != std::string::npos || + if (S.find("__DATA,__objc_catlist") != std::string::npos || S.find("__OBJC,__category") != std::string::npos) return true; break; diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 5abf50e5bd10c..d7f91fc1ce3b4 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -1,4 +1,4 @@ -//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===// +//===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===// // // The LLVM Compiler Infrastructure // @@ -15,16 +15,33 @@ //===----------------------------------------------------------------------===// #include "AggressiveAntiDepBreaker.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "post-RA-sched" @@ -34,18 +51,17 @@ static cl::opt DebugDiv("agg-antidep-debugdiv", cl::desc("Debug control for aggressive anti-dep breaker"), cl::init(0), cl::Hidden); + static cl::opt DebugMod("agg-antidep-debugmod", cl::desc("Debug control for aggressive anti-dep breaker"), cl::init(0), cl::Hidden); AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, - MachineBasicBlock *BB) : - NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), - GroupNodeIndices(TargetRegs, 0), - KillIndices(TargetRegs, 0), - DefIndices(TargetRegs, 0) -{ + MachineBasicBlock *BB) + : NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), + GroupNodeIndices(TargetRegs, 0), KillIndices(TargetRegs, 0), + DefIndices(TargetRegs, 0) { const unsigned BBSize = BB->size(); for (unsigned i = 0; i < NumTargetRegs; ++i) { // Initialize all registers to be in their own group. Initially we @@ -76,8 +92,7 @@ void AggressiveAntiDepState::GetGroupRegs( } } -unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) -{ +unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) { assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!"); assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!"); @@ -92,8 +107,7 @@ unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) return Parent; } -unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) -{ +unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) { // Create a new GroupNode for Reg. Reg's existing GroupNode must // stay as is because there could be other GroupNodes referring to // it. @@ -103,8 +117,7 @@ unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) return idx; } -bool AggressiveAntiDepState::IsLive(unsigned Reg) -{ +bool AggressiveAntiDepState::IsLive(unsigned Reg) { // KillIndex must be defined and DefIndex not defined for a register // to be live. return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u)); @@ -115,8 +128,7 @@ AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( TargetSubtargetInfo::RegClassVector &CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), TII(MF.getSubtarget().getInstrInfo()), - TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI), - State(nullptr) { + TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { @@ -250,7 +262,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs( /// AntiDepEdges - Return in Edges the anti- and output- dependencies /// in SU that we want to consider for breaking. -static void AntiDepEdges(const SUnit *SU, std::vector& Edges) { +static void AntiDepEdges(const SUnit *SU, std::vector &Edges) { SmallSet RegSet; for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { @@ -544,8 +556,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // break the anti-dependence. std::vector Regs; State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs); - assert(Regs.size() > 0 && "Empty register group!"); - if (Regs.size() == 0) + assert(!Regs.empty() && "Empty register group!"); + if (Regs.empty()) return false; // Find the "superest" register in the group. At the same time, @@ -732,14 +744,12 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( /// BreakAntiDependencies - Identifiy anti-dependencies within the /// ScheduleDAG and break them by renaming registers. -/// unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( - const std::vector& SUnits, + const std::vector &SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, DbgValueVector &DbgValues) { - std::vector &KillIndices = State->GetKillIndices(); std::vector &DefIndices = State->GetDefIndices(); std::multimap& diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index f97e6666b2195..d3308db410a96 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +//==- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -19,29 +19,35 @@ #include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" #include "llvm/Target/TargetSubtargetInfo.h" #include +#include +#include namespace llvm { + +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class MachineRegisterInfo; class RegisterClassInfo; +class TargetInstrInfo; +class TargetRegisterClass; +class TargetRegisterInfo; /// Contains all the state necessary for anti-dep breaking. class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { public: /// Information about a register reference within a liverange - typedef struct { + struct RegisterReference { /// The registers operand MachineOperand *Operand; + /// The register class const TargetRegisterClass *RC; - } RegisterReference; + }; private: /// Number of non-virtual target registers (i.e. TRI->getNumRegs()). @@ -110,7 +116,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepBreaker : public AntiDepBreaker { - MachineFunction& MF; + MachineFunction &MF; MachineRegisterInfo &MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -121,10 +127,10 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { BitVector CriticalPathSet; /// The state used to identify and rename anti-dependence registers. - AggressiveAntiDepState *State; + AggressiveAntiDepState *State = nullptr; public: - AggressiveAntiDepBreaker(MachineFunction& MFi, + AggressiveAntiDepBreaker(MachineFunction &MFi, const RegisterClassInfo &RCI, TargetSubtargetInfo::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker() override; @@ -134,8 +140,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { /// Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. - /// - unsigned BreakAntiDependencies(const std::vector& SUnits, + unsigned BreakAntiDependencies(const std::vector &SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, @@ -143,7 +148,6 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { /// Update liveness information to account for the current /// instruction, which will not be scheduled. - /// void Observe(MachineInstr &MI, unsigned Count, unsigned InsertPosIndex) override; @@ -152,7 +156,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { private: /// Keep track of a position in the allocation order for each regclass. - typedef std::map RenameOrderType; + using RenameOrderType = std::map; /// Return true if MO represents a register /// that is both implicitly used and defined in MI @@ -174,6 +178,7 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { RenameOrderType& RenameOrder, std::map &RenameMap); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index d14d93100adbf..181da83dc88b1 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=// +//===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,12 +15,14 @@ #ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H #define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Compiler.h" +#include +#include #include namespace llvm { @@ -29,17 +31,17 @@ namespace llvm { /// registers to break register anti-dependencies (WAR hazards). class LLVM_LIBRARY_VISIBILITY AntiDepBreaker { public: - typedef std::vector > - DbgValueVector; + using DbgValueVector = + std::vector>; virtual ~AntiDepBreaker(); /// Initialize anti-dep breaking for a new basic block. - virtual void StartBlock(MachineBasicBlock *BB) =0; + virtual void StartBlock(MachineBasicBlock *BB) = 0; /// Identifiy anti-dependencies within a basic-block region and break them by /// renaming registers. Return the number of anti-dependencies broken. - virtual unsigned BreakAntiDependencies(const std::vector& SUnits, + virtual unsigned BreakAntiDependencies(const std::vector &SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, @@ -51,7 +53,7 @@ class LLVM_LIBRARY_VISIBILITY AntiDepBreaker { unsigned InsertPosIndex) = 0; /// Finish anti-dep breaking for a basic block. - virtual void FinishBlock() =0; + virtual void FinishBlock() = 0; /// Update DBG_VALUE if dependency breaker is updating /// other machine instruction to use NewReg. @@ -81,6 +83,6 @@ class LLVM_LIBRARY_VISIBILITY AntiDepBreaker { } }; -} +} // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 26ca58a76e4b9..a35fcdaaf9aa5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===// +//===- AsmPrinter.cpp - Common AsmPrinter code ----------------------------===// // // The LLVM Compiler Infrastructure // @@ -29,10 +29,11 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/ObjectUtils.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/GCStrategy.h" @@ -46,10 +47,12 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Comdat.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -62,14 +65,18 @@ #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodePadder.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSection.h" @@ -78,11 +85,13 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/SectionKind.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" @@ -96,12 +105,15 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include #include #include #include +#include #include #include #include @@ -130,7 +142,8 @@ static cl::opt char AsmPrinter::ID = 0; -typedef DenseMap> gcp_map_type; +using gcp_map_type = DenseMap>; + static gcp_map_type &getGCMap(void *&P) { if (!P) P = new gcp_map_type(); @@ -185,7 +198,6 @@ bool AsmPrinter::isPositionIndependent() const { } /// getFunctionNumber - Return a unique ID for the current function. -/// unsigned AsmPrinter::getFunctionNumber() const { return MF->getFunctionNumber(); } @@ -222,8 +234,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); - if (isVerbose()) - AU.addRequired(); + AU.addRequired(); } bool AsmPrinter::doInitialization(Module &M) { @@ -733,7 +744,7 @@ void AsmPrinter::EmitFunctionEntryLabel() { /// emitComments - Pretty-print comments for instructions. static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS, AsmPrinter *AP) { - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); // Check for spills and reloads @@ -983,7 +994,6 @@ void AsmPrinter::EmitFunctionBody() { // Print a label for the basic block. EmitBasicBlockStart(MBB); for (auto &MI : MBB) { - // Print the assembly for the instruction. if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && !MI.isDebugValue()) { @@ -1007,11 +1017,9 @@ void AsmPrinter::EmitFunctionBody() { case TargetOpcode::CFI_INSTRUCTION: emitCFIInstruction(MI); break; - case TargetOpcode::LOCAL_ESCAPE: emitFrameAlloc(MI); break; - case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol()); @@ -1432,8 +1440,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { } ORE = &getAnalysis().getORE(); - if (isVerbose()) - LI = &getAnalysis(); + LI = &getAnalysis(); const TargetSubtargetInfo &STI = MF.getSubtarget(); EnablePrintSchedInfo = PrintSchedule.getNumOccurrences() @@ -1458,7 +1465,6 @@ namespace { /// representations of the constants in the constant pool MCP. This is /// used to print out constants which have been "spilled to memory" by /// the code generator. -/// void AsmPrinter::EmitConstantPool() { const MachineConstantPool *MCP = MF->getConstantPool(); const std::vector &CP = MCP->getConstants(); @@ -1538,7 +1544,6 @@ void AsmPrinter::EmitConstantPool() { /// EmitJumpTableInfo - Print assembly representations of the jump tables used /// by the current function to the current output stream. -/// void AsmPrinter::EmitJumpTableInfo() { const DataLayout &DL = MF->getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); @@ -1735,7 +1740,7 @@ struct Structor { Structor() = default; }; -} // end anonymous namespace +} // end anonymous namespace /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. @@ -1830,13 +1835,11 @@ void AsmPrinter::EmitInt8(int Value) const { } /// EmitInt16 - Emit a short directive and value. -/// void AsmPrinter::EmitInt16(int Value) const { OutStreamer->EmitIntValue(Value, 2); } /// EmitInt32 - Emit a long directive and value. -/// void AsmPrinter::EmitInt32(int Value) const { OutStreamer->EmitIntValue(Value, 4); } @@ -1878,7 +1881,6 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // byte alignment. If a global value is specified, and if that global has // an explicit alignment requested, it will override the alignment request // if required for correctness. -// void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { if (GV) NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits); @@ -2329,7 +2331,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // // cstexpr := - + gotpcrelcst, where // gotpcrelcst := + - // MCValue MV; if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute()) return; @@ -2360,7 +2361,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // If gotpcrelcst is positive it means that we can safely fold the pc rel // displacement into the GOTPCREL. We can also can have an extra offset // if the target knows how to encode it. - // int64_t GOTPCRelCst = Offset + MV.getConstant(); if (GOTPCRelCst < 0) return; @@ -2382,7 +2382,6 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, // .long 42 // foo: // .long bar@GOTPCREL+ - // AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym]; const GlobalVariable *GV = Result.first; int NumUses = (int)Result.second; @@ -2562,7 +2561,6 @@ static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop, << " Depth=" << Loop->getLoopDepth() << '\n'; } - /// PrintChildLoopComment - Print comments about child loops within /// the loop for this basic block, with nesting. static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, @@ -2615,6 +2613,23 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); } +void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, + MCCodePaddingContext &Context) const { + assert(MF != nullptr && "Machine function must be valid"); + assert(LI != nullptr && "Loop info must be valid"); + Context.IsPaddingActive = !MF->hasInlineAsm() && + !MF->getFunction()->optForSize() && + TM.getOptLevel() != CodeGenOpt::None; + const MachineLoop *CurrentLoop = LI->getLoopFor(&MBB); + Context.IsBasicBlockInsideInnermostLoop = + CurrentLoop != nullptr && CurrentLoop->getSubLoops().empty(); + Context.IsBasicBlockReachableViaFallthrough = + std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) != + MBB.pred_end(); + Context.IsBasicBlockReachableViaBranch = + MBB.pred_size() > 0 && !isBlockOnlyReachableByFallthrough(&MBB); +} + /// EmitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. @@ -2630,6 +2645,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { // Emit an alignment directive for this block, if needed. if (unsigned Align = MBB.getAlignment()) EmitAlignment(Align); + MCCodePaddingContext Context; + setupCodePaddingContext(MBB, Context); + OutStreamer->EmitCodePaddingBasicBlockStart(Context); // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label @@ -2671,6 +2689,12 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { } } +void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) { + MCCodePaddingContext Context; + setupCodePaddingContext(MBB, Context); + OutStreamer->EmitCodePaddingBasicBlockEnd(Context); +} + void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition) const { MCSymbolAttr Attr = MCSA_Invalid; @@ -2847,7 +2871,7 @@ void AsmPrinter::emitXRayTable() { void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind, uint8_t Version) { - auto Fn = MI.getParent()->getParent()->getFunction(); + auto Fn = MI.getMF()->getFunction(); auto Attr = Fn->getFnAttribute("function-instrument"); bool LogArgs = Fn->hasFnAttribute("xray-log-args"); bool AlwaysInstrument = diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 021cee526d818..7d50d643c991d 100644 --- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -808,6 +808,10 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, if (FuncName.empty()) FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); + // Emit FPO data, but only on 32-bit x86. No other platforms use it. + if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86) + OS.EmitCVFPOData(Fn); + // Emit a symbol subsection, required by VS2012+ to find function boundaries. OS.AddComment("Symbol subsection for " + Twine(FuncName)); MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 92b3d50cd2f3a..f56199dc8e721 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -68,13 +68,6 @@ class AsmPrinter; class DwarfDebug; class DwarfAccelTable { - static uint32_t HashDJB(StringRef Str) { - uint32_t h = 5381; - for (unsigned i = 0, e = Str.size(); i != e; ++i) - h = ((h << 5) + h) + Str[i]; - return h; - } - // Helper function to compute the number of buckets needed based on // the number of unique hashes. void ComputeBucketCount(); @@ -199,7 +192,7 @@ class DwarfAccelTable { HashData(StringRef S, DwarfAccelTable::DataArray &Data) : Str(S), Data(Data) { - HashValue = DwarfAccelTable::HashDJB(S); + HashValue = dwarf::djbHash(S); } #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 8b732765bf348..06b5b06c41bf3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -810,6 +810,12 @@ void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty, /// DbgVariable based on provided MachineLocation. void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, MachineLocation Location) { + // addBlockByrefAddress is obsolete and will be removed soon. + // The clang frontend always generates block byref variables with a + // complex expression that encodes exactly what addBlockByrefAddress + // would do. + assert((!DV.isBlockByrefVariable() || DV.hasComplexAddress()) && + "block byref variable without a complex expression"); if (DV.hasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); else if (DV.isBlockByrefVariable()) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 499780a173b4b..9676191612763 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -220,9 +220,44 @@ ArrayRef DbgVariable::getFrameIndexExprs() const { return A.Expr->getFragmentInfo()->OffsetInBits < B.Expr->getFragmentInfo()->OffsetInBits; }); + return FrameIndexExprs; } +void DbgVariable::addMMIEntry(const DbgVariable &V) { + assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); + assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry"); + assert(V.Var == Var && "conflicting variable"); + assert(V.IA == IA && "conflicting inlined-at location"); + + assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); + assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); + + // FIXME: This logic should not be necessary anymore, as we now have proper + // deduplication. However, without it, we currently run into the assertion + // below, which means that we are likely dealing with broken input, i.e. two + // non-fragment entries for the same variable at different frame indices. + if (FrameIndexExprs.size()) { + auto *Expr = FrameIndexExprs.back().Expr; + if (!Expr || !Expr->isFragment()) + return; + } + + for (const auto &FIE : V.FrameIndexExprs) + // Ignore duplicate entries. + if (llvm::none_of(FrameIndexExprs, [&](const FrameIndexExpr &Other) { + return FIE.FI == Other.FI && FIE.Expr == Other.Expr; + })) + FrameIndexExprs.push_back(FIE); + + assert((FrameIndexExprs.size() == 1 || + llvm::all_of(FrameIndexExprs, + [](FrameIndexExpr &FIE) { + return FIE.Expr && FIE.Expr->isFragment(); + })) && + "conflicting locations for variable"); +} + static const DwarfAccelTable::Atom TypeAtoms[] = { DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), @@ -508,13 +543,18 @@ static SmallVectorImpl & sortGlobalExprs(SmallVectorImpl &GVEs) { std::sort(GVEs.begin(), GVEs.end(), [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { - if (A.Expr != B.Expr && A.Expr && B.Expr) { - auto FragmentA = A.Expr->getFragmentInfo(); - auto FragmentB = B.Expr->getFragmentInfo(); - if (FragmentA && FragmentB) - return FragmentA->OffsetInBits < FragmentB->OffsetInBits; - } - return false; + // Sort order: first null exprs, then exprs without fragment + // info, then sort by fragment offset in bits. + // FIXME: Come up with a more comprehensive comparator so + // the sorting isn't non-deterministic, and so the following + // std::unique call works correctly. + if (!A.Expr || !B.Expr) + return !!B.Expr; + auto FragmentA = A.Expr->getFragmentInfo(); + auto FragmentB = B.Expr->getFragmentInfo(); + if (!FragmentA || !FragmentB) + return !!FragmentB; + return FragmentA->OffsetInBits < FragmentB->OffsetInBits; }); GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), [](DwarfCompileUnit::GlobalExpr A, @@ -1123,7 +1163,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); assert(CurMI); - const auto *SP = MI->getParent()->getParent()->getFunction()->getSubprogram(); + const auto *SP = MI->getMF()->getFunction()->getSubprogram(); if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 24a50c63b4978..7b8cb348e48b4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -138,30 +138,7 @@ class DbgVariable { /// Get the FI entries, sorted by fragment offset. ArrayRef getFrameIndexExprs() const; bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); } - - void addMMIEntry(const DbgVariable &V) { - assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); - assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry"); - assert(V.Var == Var && "conflicting variable"); - assert(V.IA == IA && "conflicting inlined-at location"); - - assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); - assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); - - if (FrameIndexExprs.size()) { - auto *Expr = FrameIndexExprs.back().Expr; - // Get rid of duplicate non-fragment entries. More than one non-fragment - // dbg.declare makes no sense so ignore all but the first. - if (!Expr || !Expr->isFragment()) - return; - } - FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end()); - assert(llvm::all_of(FrameIndexExprs, - [](FrameIndexExpr &FIE) { - return FIE.Expr && FIE.Expr->isFragment(); - }) && - "conflicting locations for variable"); - } + void addMMIEntry(const DbgVariable &V); // Translate tag to proper Dwarf tag. dwarf::Tag getTag() const { diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index 429269d36d886..131497b1b7f33 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -130,6 +130,8 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, unsigned Size = TRI.getSubRegIdxSize(Idx); unsigned Offset = TRI.getSubRegIdxOffset(Idx); Reg = TRI.getDwarfRegNum(*SR, false); + if (Reg < 0) + continue; // Intersection between the bits we already emitted and the bits // covered by this subregister. @@ -138,7 +140,7 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, // If this sub-register has a DWARF number and we haven't covered // its range, emit a DWARF piece for it. - if (Reg >= 0 && CurSubReg.test(Coverage)) { + if (CurSubReg.test(Coverage)) { // Emit a piece for any gap in the coverage. if (Offset > CurPos) DwarfRegs.push_back({-1, Offset - CurPos, nullptr}); diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 8767da76ff91b..0d7305b899785 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -1,4 +1,4 @@ -//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===// +//===- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer ---===// // // The LLVM Compiler Infrastructure // @@ -12,22 +12,34 @@ //===----------------------------------------------------------------------===// #include "EHStreamer.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/LEB128.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include +#include +#include +#include using namespace llvm; EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} -EHStreamer::~EHStreamer() {} +EHStreamer::~EHStreamer() = default; /// How many leading type ids two landing pads have in common. unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L, @@ -50,7 +62,6 @@ unsigned EHStreamer:: computeActionsTable(const SmallVectorImpl &LandingPads, SmallVectorImpl &Actions, SmallVectorImpl &FirstActions) { - // The action table follows the call-site table in the LSDA. The individual // records are of two types: // diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h index 080fdd14b4670..7962b761d8de7 100644 --- a/lib/CodeGen/AsmPrinter/EHStreamer.h +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -1,4 +1,4 @@ -//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===// +//===- EHStreamer.h - Exception Handling Directive Streamer -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,17 +16,16 @@ #include "AsmPrinterHandler.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Compiler.h" namespace llvm { + +class AsmPrinter; struct LandingPadInfo; -class MachineModuleInfo; class MachineInstr; -class MachineFunction; +class MachineModuleInfo; class MCSymbol; -class MCSymbolRefExpr; - -template -class SmallVectorImpl; +template class SmallVectorImpl; /// Emits exception handling directives. class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler { @@ -45,11 +44,12 @@ class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler { struct PadRange { // The index of the landing pad. unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. unsigned RangeIndex; }; - typedef DenseMap RangeMapType; + using RangeMapType = DenseMap; /// Structure describing an entry in the actions table. struct ActionEntry { @@ -66,6 +66,7 @@ class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler { // LPad contains the landing pad start labels. const LandingPadInfo *LPad; // Null indicates that there is no landing pad. + unsigned Action; }; @@ -131,7 +132,7 @@ class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler { /// `false' otherwise. static bool callToNoUnwindFunction(const MachineInstr *MI); }; -} -#endif +} // end namespace llvm +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index be93ff0dad29d..fb26be55a11a5 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -15,21 +15,20 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/TargetTransformInfoImpl.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" -#include +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; // This flag is used by the template base class for BasicTTIImpl, and here to // provide a definition. cl::opt - llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), - cl::desc("Threshold for partial unrolling"), - cl::Hidden); +llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), + cl::desc("Threshold for partial unrolling"), + cl::Hidden); BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F) : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index fdd282ce124ea..40cb0c0cdf192 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -19,12 +19,14 @@ #include "BranchFolding.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -41,6 +43,7 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" @@ -51,6 +54,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include @@ -82,8 +86,8 @@ TailMergeThreshold("tail-merge-threshold", // TODO: This should be replaced with a target query. static cl::opt TailMergeSize("tail-merge-size", - cl::desc("Min number of instructions to consider tail merging"), - cl::init(3), cl::Hidden); + cl::desc("Min number of instructions to consider tail merging"), + cl::init(3), cl::Hidden); namespace { @@ -107,6 +111,7 @@ namespace { } // end anonymous namespace char BranchFolderPass::ID = 0; + char &llvm::BranchFolderPassID = BranchFolderPass::ID; INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE, @@ -1865,7 +1870,6 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(*PI)) return MBB->end(); - // Find out what registers are live. Note this routine is ignoring other live // registers which are only used by instructions in successor blocks. for (const MachineOperand &MO : PI->operands()) { diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index f6efcb718c919..0f09525501370 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -1,4 +1,4 @@ -//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===// +//===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -10,20 +10,27 @@ #ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H #define LLVM_LIB_CODEGEN_BRANCHFOLDING_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/Compiler.h" +#include #include namespace llvm { - class MachineBlockFrequencyInfo; - class MachineBranchProbabilityInfo; - class MachineFunction; - class MachineModuleInfo; - class MachineLoopInfo; - class TargetInstrInfo; - class TargetRegisterInfo; + +class BasicBlock; +class MachineBlockFrequencyInfo; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineLoopInfo; +class MachineModuleInfo; +class MachineRegisterInfo; +class raw_ostream; +class TargetInstrInfo; +class TargetRegisterInfo; class LLVM_LIBRARY_VISIBILITY BranchFolder { public: @@ -49,6 +56,7 @@ namespace llvm { class MergePotentialsElt { unsigned Hash; MachineBasicBlock *Block; + public: MergePotentialsElt(unsigned h, MachineBasicBlock *b) : Hash(h), Block(b) {} @@ -62,7 +70,9 @@ namespace llvm { bool operator<(const MergePotentialsElt &) const; }; - typedef std::vector::iterator MPIterator; + + using MPIterator = std::vector::iterator; + std::vector MergePotentials; SmallPtrSet TriedMerging; DenseMap FuncletMembership; @@ -70,6 +80,7 @@ namespace llvm { class SameTailElt { MPIterator MPIter; MachineBasicBlock::iterator TailStartPos; + public: SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp) : MPIter(mp), TailStartPos(tsp) {} @@ -77,18 +88,23 @@ namespace llvm { MPIterator getMPIter() const { return MPIter; } + MergePotentialsElt &getMergePotentialsElt() const { return *getMPIter(); } + MachineBasicBlock::iterator getTailStartPos() const { return TailStartPos; } + unsigned getHash() const { return getMergePotentialsElt().getHash(); } + MachineBasicBlock *getBlock() const { return getMergePotentialsElt().getBlock(); } + bool tailIsWholeBlock() const { return TailStartPos == getBlock()->begin(); } @@ -96,6 +112,7 @@ namespace llvm { void setBlock(MachineBasicBlock *MBB) { getMergePotentialsElt().setBlock(MBB); } + void setTailStartPos(MachineBasicBlock::iterator Pos) { TailStartPos = Pos; } @@ -120,6 +137,7 @@ namespace llvm { class MBFIWrapper { public: MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {} + BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); raw_ostream &printBlockFreq(raw_ostream &OS, @@ -203,6 +221,7 @@ namespace llvm { /// the function, move the instructions before MBB terminator if it's legal. bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB); }; -} -#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */ +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_BRANCHFOLDING_H diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index c2ced19458ed6..588f1791ce3c5 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -1,4 +1,4 @@ -//===------------------------ CalcSpillWeights.cpp ------------------------===// +//===- CalcSpillWeights.cpp -----------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,10 +8,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" @@ -19,6 +22,9 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include + using namespace llvm; #define DEBUG_TYPE "calcspillweights" @@ -127,8 +133,21 @@ static bool isRematerializable(const LiveInterval &LI, return true; } -void -VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { +void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { + float weight = weightCalcHelper(li); + // Check if unspillable. + if (weight < 0) + return; + li.weight = weight; +} + +float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start, + SlotIndex end) { + return weightCalcHelper(li, &start, &end); +} + +float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start, + SlotIndex *end) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo(); MachineBasicBlock *mbb = nullptr; @@ -148,10 +167,38 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { // Don't recompute spill weight for an unspillable register. bool Spillable = li.isSpillable(); + bool localSplitArtifact = start && end; + + // Do not update future local split artifacts. + bool updateLI = !localSplitArtifact; + + if (localSplitArtifact) { + MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*end); + assert(localMBB == LIS.getMBBFromIndex(*start) && + "start and end are expected to be in the same basic block"); + + // Local split artifact will have 2 additional copy instructions and they + // will be in the same BB. + // localLI = COPY other + // ... + // other = COPY localLI + totalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB); + totalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB); + + numInstr += 2; + } + for (MachineRegisterInfo::reg_instr_iterator I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); I != E; ) { MachineInstr *mi = &*(I++); + + // For local split artifacts, we are interested only in instructions between + // the expected start and end of the range. + SlotIndex si = LIS.getInstructionIndex(*mi); + if (localSplitArtifact && ((si < *start) || (si > *end))) + continue; + numInstr++; if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) continue; @@ -206,23 +253,25 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { Hint.clear(); // Always prefer the physreg hint. - if (unsigned hint = hintPhys ? hintPhys : hintVirt) { - mri.setRegAllocationHint(li.reg, 0, hint); - // Weakly boost the spill weight of hinted registers. - totalWeight *= 1.01F; + if (updateLI) { + if (unsigned hint = hintPhys ? hintPhys : hintVirt) { + mri.setRegAllocationHint(li.reg, 0, hint); + // Weakly boost the spill weight of hinted registers. + totalWeight *= 1.01F; + } } // If the live interval was already unspillable, leave it that way. if (!Spillable) - return; + return -1.0; // Mark li as unspillable if all live ranges are tiny and the interval // is not live at any reg mask. If the interval is live at a reg mask // spilling may be required. - if (li.isZeroLength(LIS.getSlotIndexes()) && + if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) && !li.isLiveAtIndexes(LIS.getRegMaskSlots())) { li.markNotSpillable(); - return; + return -1.0; } // If all of the definitions of the interval are re-materializable, @@ -232,5 +281,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo())) totalWeight *= 0.5F; - li.weight = normalize(totalWeight, li.getSize(), numInstr); + if (localSplitArtifact) + return normalize(totalWeight, start->distance(*end), numInstr); + return normalize(totalWeight, li.getSize(), numInstr); } diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index bbd1f59eb2f76..1e5f15397bb53 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -1710,43 +1710,69 @@ class MemCmpExpansion { ResultBlock() = default; }; - CallInst *CI; + CallInst *const CI; ResultBlock ResBlock; + const uint64_t Size; unsigned MaxLoadSize; - unsigned NumBlocks; - unsigned NumBlocksNonOneByte; - unsigned NumLoadsPerBlock; + uint64_t NumLoads; + uint64_t NumLoadsNonOneByte; + const uint64_t NumLoadsPerBlock; std::vector LoadCmpBlocks; BasicBlock *EndBlock; PHINode *PhiRes; - bool IsUsedForZeroCmp; + const bool IsUsedForZeroCmp; const DataLayout &DL; IRBuilder<> Builder; + // Represents the decomposition in blocks of the expansion. For example, + // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and + // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {32, 1}. + // TODO(courbet): Involve the target more in this computation. On X86, 7 + // bytes can be done more efficiently with two overlaping 4-byte loads than + // covering the interval with [{4, 0},{2, 4},{1, 6}}. + struct LoadEntry { + LoadEntry(unsigned LoadSize, uint64_t Offset) + : LoadSize(LoadSize), Offset(Offset) { + assert(Offset % LoadSize == 0 && "invalid load entry"); + } + + uint64_t getGEPIndex() const { return Offset / LoadSize; } + + // The size of the load for this block, in bytes. + const unsigned LoadSize; + // The offset of this load WRT the base pointer, in bytes. + const uint64_t Offset; + }; + SmallVector LoadSequence; + void computeLoadSequence(); - unsigned calculateNumBlocks(unsigned Size); void createLoadCmpBlocks(); void createResultBlock(); void setupResultBlockPHINodes(); void setupEndBlockPHINodes(); - void emitLoadCompareBlock(unsigned Index, unsigned LoadSize, - unsigned GEPIndex); - Value *getCompareLoadPairs(unsigned Index, unsigned Size, - unsigned &NumBytesProcessed); - void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size, - unsigned &NumBytesProcessed); - void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex); + Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex); + void emitLoadCompareBlock(unsigned BlockIndex); + void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, + unsigned &LoadIndex); + void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex); void emitMemCmpResultBlock(); - Value *getMemCmpExpansionZeroCase(unsigned Size); - Value *getMemCmpEqZeroOneBlock(unsigned Size); - Value *getMemCmpOneBlock(unsigned Size); - unsigned getLoadSize(unsigned Size); - unsigned getNumLoads(unsigned Size); + Value *getMemCmpExpansionZeroCase(); + Value *getMemCmpEqZeroOneBlock(); + Value *getMemCmpOneBlock(); -public: + // Computes the decomposition. THis is the common code to compute the number + // of loads and the actual load sequence. `callback` is called with each load + // size and number of loads for the block size. + template + void getDecomposition(CallBackT callback) const; + + public: MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize, unsigned NumLoadsPerBlock, const DataLayout &DL); - Value *getMemCmpExpansion(uint64_t Size); + unsigned getNumBlocks(); + uint64_t getNumLoads() const { return NumLoads; } + + Value *getMemCmpExpansion(); }; } // end anonymous namespace @@ -1759,43 +1785,74 @@ class MemCmpExpansion { // return from. // 3. ResultBlock, block to branch to for early exit when a // LoadCmpBlock finds a difference. -MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size, - unsigned MaxLoadSize, unsigned LoadsPerBlock, +MemCmpExpansion::MemCmpExpansion(CallInst *const CI, uint64_t Size, + const unsigned MaxLoadSize, + const unsigned LoadsPerBlock, const DataLayout &TheDataLayout) - : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock), - DL(TheDataLayout), Builder(CI) { - // A memcmp with zero-comparison with only one block of load and compare does - // not need to set up any extra blocks. This case could be handled in the DAG, - // but since we have all of the machinery to flexibly expand any memcpy here, - // we choose to handle this case too to avoid fragmented lowering. - IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); - NumBlocks = calculateNumBlocks(Size); - if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || NumBlocks != 1) { - BasicBlock *StartBlock = CI->getParent(); - EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); - setupEndBlockPHINodes(); - createResultBlock(); - - // If return value of memcmp is not used in a zero equality, we need to - // calculate which source was larger. The calculation requires the - // two loaded source values of each load compare block. - // These will be saved in the phi nodes created by setupResultBlockPHINodes. - if (!IsUsedForZeroCmp) - setupResultBlockPHINodes(); - - // Create the number of required load compare basic blocks. - createLoadCmpBlocks(); + : CI(CI), + Size(Size), + MaxLoadSize(MaxLoadSize), + NumLoads(0), + NumLoadsNonOneByte(0), + NumLoadsPerBlock(LoadsPerBlock), + IsUsedForZeroCmp(isOnlyUsedInZeroEqualityComparison(CI)), + DL(TheDataLayout), + Builder(CI) { + // Scale the max size down if the target can load more bytes than we need. + while (this->MaxLoadSize > Size) { + this->MaxLoadSize /= 2; + } + // Compute the number of loads. At that point we don't want to compute the + // actual decomposition because it might be too large to fit in memory. + getDecomposition([this](unsigned LoadSize, uint64_t NumLoadsForSize) { + NumLoads += NumLoadsForSize; + }); +} - // Update the terminator added by splitBasicBlock to branch to the first - // LoadCmpBlock. - StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); +template +void MemCmpExpansion::getDecomposition(CallBackT callback) const { + unsigned LoadSize = this->MaxLoadSize; + assert(Size > 0 && "zero blocks"); + uint64_t CurSize = Size; + while (CurSize) { + assert(LoadSize > 0 && "zero load size"); + const uint64_t NumLoadsForThisSize = CurSize / LoadSize; + if (NumLoadsForThisSize > 0) { + callback(LoadSize, NumLoadsForThisSize); + CurSize = CurSize % LoadSize; + } + // FIXME: This can result in a non-native load size (e.g. X86-32+SSE can + // load 16 and 4 but not 8), which throws the load count off (e.g. in the + // aforementioned case, 16 bytes will count for 2 loads but will generate + // 4). + LoadSize /= 2; } +} - Builder.SetCurrentDebugLocation(CI->getDebugLoc()); +void MemCmpExpansion::computeLoadSequence() { + uint64_t Offset = 0; + getDecomposition( + [this, &Offset](unsigned LoadSize, uint64_t NumLoadsForSize) { + for (uint64_t I = 0; I < NumLoadsForSize; ++I) { + LoadSequence.push_back({LoadSize, Offset}); + Offset += LoadSize; + } + if (LoadSize > 1) { + ++NumLoadsNonOneByte; + } + }); + assert(LoadSequence.size() == getNumLoads() && "mismatch in numbe rof loads"); +} + +unsigned MemCmpExpansion::getNumBlocks() { + if (IsUsedForZeroCmp) + return getNumLoads() / NumLoadsPerBlock + + (getNumLoads() % NumLoadsPerBlock != 0 ? 1 : 0); + return getNumLoads(); } void MemCmpExpansion::createLoadCmpBlocks() { - for (unsigned i = 0; i < NumBlocks; i++) { + for (unsigned i = 0; i < getNumBlocks(); i++) { BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", EndBlock->getParent(), EndBlock); LoadCmpBlocks.push_back(BB); @@ -1811,12 +1868,12 @@ void MemCmpExpansion::createResultBlock() { // It loads 1 byte from each source of the memcmp parameters with the given // GEPIndex. It then subtracts the two loaded values and adds this result to the // final phi node for selecting the memcmp result. -void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, +void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, unsigned GEPIndex) { Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); - Builder.SetInsertPoint(LoadCmpBlocks[Index]); + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); // Cast source to LoadSizeType*. if (Source1->getType() != LoadSizeType) @@ -1839,15 +1896,15 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); - PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]); + PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); - if (Index < (LoadCmpBlocks.size() - 1)) { + if (BlockIndex < (LoadCmpBlocks.size() - 1)) { // Early exit branch if difference found to EndBlock. Otherwise, continue to // next LoadCmpBlock, Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, ConstantInt::get(Diff->getType(), 0)); BranchInst *CmpBr = - BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp); + BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp); Builder.Insert(CmpBr); } else { // The last block has an unconditional branch to EndBlock. @@ -1856,42 +1913,37 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index, } } -unsigned MemCmpExpansion::getNumLoads(unsigned Size) { - return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize); -} - -unsigned MemCmpExpansion::getLoadSize(unsigned Size) { - return MinAlign(PowerOf2Floor(Size), MaxLoadSize); -} - /// Generate an equality comparison for one or more pairs of loaded values. /// This is used in the case where the memcmp() call is compared equal or not /// equal to zero. -Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, - unsigned &NumBytesProcessed) { +Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, + unsigned &LoadIndex) { + assert(LoadIndex < getNumLoads() && + "getCompareLoadPairs() called with no remaining loads"); std::vector XorList, OrList; Value *Diff; - unsigned RemainingBytes = Size - NumBytesProcessed; - unsigned NumLoadsRemaining = getNumLoads(RemainingBytes); - unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock); + const unsigned NumLoads = + std::min(getNumLoads() - LoadIndex, NumLoadsPerBlock); // For a single-block expansion, start inserting before the memcmp call. if (LoadCmpBlocks.empty()) Builder.SetInsertPoint(CI); else - Builder.SetInsertPoint(LoadCmpBlocks[Index]); + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); Value *Cmp = nullptr; - for (unsigned i = 0; i < NumLoads; ++i) { - unsigned LoadSize = getLoadSize(RemainingBytes); - unsigned GEPIndex = NumBytesProcessed / LoadSize; - NumBytesProcessed += LoadSize; - RemainingBytes -= LoadSize; - - Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); - Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); - assert(LoadSize <= MaxLoadSize && "Unexpected load type"); + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. The type for the combinations is the largest load + // type. + IntegerType *const MaxLoadType = + NumLoads == 1 ? nullptr + : IntegerType::get(CI->getContext(), MaxLoadSize * 8); + for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { + const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; + + IntegerType *LoadSizeType = + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); @@ -1902,12 +1954,14 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, if (Source2->getType() != LoadSizeType) Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - // Get the base address using the GEPIndex. - if (GEPIndex != 0) { - Source1 = Builder.CreateGEP(LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, GEPIndex)); - Source2 = Builder.CreateGEP(LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, GEPIndex)); + // Get the base address using a GEP. + if (CurLoadEntry.Offset != 0) { + Source1 = Builder.CreateGEP( + LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + Source2 = Builder.CreateGEP( + LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); } // Get a constant or load a value for each source address. @@ -1964,13 +2018,13 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size, return Cmp; } -void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( - unsigned Index, unsigned Size, unsigned &NumBytesProcessed) { - Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed); +void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, + unsigned &LoadIndex) { + Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex); - BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) + BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) ? EndBlock - : LoadCmpBlocks[Index + 1]; + : LoadCmpBlocks[BlockIndex + 1]; // Early exit branch if difference found to ResultBlock. Otherwise, // continue to next LoadCmpBlock or EndBlock. BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); @@ -1979,9 +2033,9 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 // since early exit to ResultBlock was not taken (no difference was found in // any of the bytes). - if (Index == LoadCmpBlocks.size() - 1) { + if (BlockIndex == LoadCmpBlocks.size() - 1) { Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); - PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); + PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); } } @@ -1994,33 +2048,39 @@ void MemCmpExpansion::emitLoadCompareBlockMultipleLoads( // the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with // a special case through emitLoadCompareByteBlock. The special handling can // simply subtract the loaded values and add it to the result phi node. -void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, - unsigned GEPIndex) { - if (LoadSize == 1) { - MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex); +void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) { + // There is one load per block in this case, BlockIndex == LoadIndex. + const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex]; + + if (CurLoadEntry.LoadSize == 1) { + MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, + CurLoadEntry.getGEPIndex()); return; } - Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8); + Type *LoadSizeType = + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); - assert(LoadSize <= MaxLoadSize && "Unexpected load type"); + assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type"); Value *Source1 = CI->getArgOperand(0); Value *Source2 = CI->getArgOperand(1); - Builder.SetInsertPoint(LoadCmpBlocks[Index]); + Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); // Cast source to LoadSizeType*. if (Source1->getType() != LoadSizeType) Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); if (Source2->getType() != LoadSizeType) Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - // Get the base address using the GEPIndex. - if (GEPIndex != 0) { - Source1 = Builder.CreateGEP(LoadSizeType, Source1, - ConstantInt::get(LoadSizeType, GEPIndex)); - Source2 = Builder.CreateGEP(LoadSizeType, Source2, - ConstantInt::get(LoadSizeType, GEPIndex)); + // Get the base address using a GEP. + if (CurLoadEntry.Offset != 0) { + Source1 = Builder.CreateGEP( + LoadSizeType, Source1, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); + Source2 = Builder.CreateGEP( + LoadSizeType, Source2, + ConstantInt::get(LoadSizeType, CurLoadEntry.getGEPIndex())); } // Load LoadSizeType from the base address. @@ -2042,14 +2102,14 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, // Add the loaded values to the phi nodes for calculating memcmp result only // if result is not used in a zero equality. if (!IsUsedForZeroCmp) { - ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]); - ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]); + ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]); + ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]); } Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); - BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) + BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) ? EndBlock - : LoadCmpBlocks[Index + 1]; + : LoadCmpBlocks[BlockIndex + 1]; // Early exit branch if difference found to ResultBlock. Otherwise, continue // to next LoadCmpBlock or EndBlock. BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); @@ -2058,9 +2118,9 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize, // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 // since early exit to ResultBlock was not taken (no difference was found in // any of the bytes). - if (Index == LoadCmpBlocks.size() - 1) { + if (BlockIndex == LoadCmpBlocks.size() - 1) { Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); - PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]); + PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); } } @@ -2094,34 +2154,14 @@ void MemCmpExpansion::emitMemCmpResultBlock() { PhiRes->addIncoming(Res, ResBlock.BB); } -unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) { - unsigned NumBlocks = 0; - bool HaveOneByteLoad = false; - unsigned RemainingSize = Size; - unsigned LoadSize = MaxLoadSize; - while (RemainingSize) { - if (LoadSize == 1) - HaveOneByteLoad = true; - NumBlocks += RemainingSize / LoadSize; - RemainingSize = RemainingSize % LoadSize; - LoadSize = LoadSize / 2; - } - NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks; - - if (IsUsedForZeroCmp) - NumBlocks = NumBlocks / NumLoadsPerBlock + - (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0); - - return NumBlocks; -} - void MemCmpExpansion::setupResultBlockPHINodes() { Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); Builder.SetInsertPoint(ResBlock.BB); + // Note: this assumes one load per block. ResBlock.PhiSrc1 = - Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1"); + Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1"); ResBlock.PhiSrc2 = - Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2"); + Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2"); } void MemCmpExpansion::setupEndBlockPHINodes() { @@ -2129,12 +2169,13 @@ void MemCmpExpansion::setupEndBlockPHINodes() { PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); } -Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) { - unsigned NumBytesProcessed = 0; +Value *MemCmpExpansion::getMemCmpExpansionZeroCase() { + unsigned LoadIndex = 0; // This loop populates each of the LoadCmpBlocks with the IR sequence to // handle multiple loads per block. - for (unsigned i = 0; i < NumBlocks; ++i) - emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed); + for (unsigned I = 0; I < getNumBlocks(); ++I) { + emitLoadCompareBlockMultipleLoads(I, LoadIndex); + } emitMemCmpResultBlock(); return PhiRes; @@ -2143,15 +2184,16 @@ Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) { /// A memcmp expansion that compares equality with 0 and only has one block of /// load and compare can bypass the compare, branch, and phi IR that is required /// in the general case. -Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) { - unsigned NumBytesProcessed = 0; - Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed); +Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { + unsigned LoadIndex = 0; + Value *Cmp = getCompareLoadPairs(0, LoadIndex); + assert(LoadIndex == getNumLoads() && "some entries were not consumed"); return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); } /// A memcmp expansion that only has one block of load and compare can bypass /// the compare, branch, and phi IR that is required in the general case. -Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) { +Value *MemCmpExpansion::getMemCmpOneBlock() { assert(NumLoadsPerBlock == 1 && "Only handles one load pair per block"); Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); @@ -2198,37 +2240,43 @@ Value *MemCmpExpansion::getMemCmpOneBlock(unsigned Size) { // This function expands the memcmp call into an inline expansion and returns // the memcmp result. -Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) { +Value *MemCmpExpansion::getMemCmpExpansion() { + computeLoadSequence(); + // A memcmp with zero-comparison with only one block of load and compare does + // not need to set up any extra blocks. This case could be handled in the DAG, + // but since we have all of the machinery to flexibly expand any memcpy here, + // we choose to handle this case too to avoid fragmented lowering. + if ((!IsUsedForZeroCmp && NumLoadsPerBlock != 1) || getNumBlocks() != 1) { + BasicBlock *StartBlock = CI->getParent(); + EndBlock = StartBlock->splitBasicBlock(CI, "endblock"); + setupEndBlockPHINodes(); + createResultBlock(); + + // If return value of memcmp is not used in a zero equality, we need to + // calculate which source was larger. The calculation requires the + // two loaded source values of each load compare block. + // These will be saved in the phi nodes created by setupResultBlockPHINodes. + if (!IsUsedForZeroCmp) setupResultBlockPHINodes(); + + // Create the number of required load compare basic blocks. + createLoadCmpBlocks(); + + // Update the terminator added by splitBasicBlock to branch to the first + // LoadCmpBlock. + StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); + } + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + if (IsUsedForZeroCmp) - return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) : - getMemCmpExpansionZeroCase(Size); + return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() + : getMemCmpExpansionZeroCase(); // TODO: Handle more than one load pair per block in getMemCmpOneBlock(). - if (NumBlocks == 1 && NumLoadsPerBlock == 1) - return getMemCmpOneBlock(Size); - - // This loop calls emitLoadCompareBlock for comparing Size bytes of the two - // memcmp sources. It starts with loading using the maximum load size set by - // the target. It processes any remaining bytes using a load size which is the - // next smallest power of 2. - unsigned LoadSize = MaxLoadSize; - unsigned NumBytesToBeProcessed = Size; - unsigned Index = 0; - while (NumBytesToBeProcessed) { - // Calculate how many blocks we can create with the current load size. - unsigned NumBlocks = NumBytesToBeProcessed / LoadSize; - unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize; - NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize; - - // For each NumBlocks, populate the instruction sequence for loading and - // comparing LoadSize bytes. - while (NumBlocks--) { - emitLoadCompareBlock(Index, LoadSize, GEPIndex); - Index++; - GEPIndex++; - } - // Get the next LoadSize to use. - LoadSize = LoadSize / 2; + if (getNumBlocks() == 1 && NumLoadsPerBlock == 1) return getMemCmpOneBlock(); + + for (unsigned I = 0; I < getNumBlocks(); ++I) { + emitLoadCompareBlock(I); } emitMemCmpResultBlock(); @@ -2312,12 +2360,6 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, const TargetLowering *TLI, const DataLayout *DL) { NumMemCmpCalls++; - // TTI call to check if target would like to expand memcmp. Also, get the - // MaxLoadSize. - unsigned MaxLoadSize; - if (!TTI->enableMemCmpExpansion(MaxLoadSize)) - return false; - // Early exit from expansion if -Oz. if (CI->getFunction()->optForMinSize()) return false; @@ -2328,36 +2370,26 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, NumMemCmpNotConstant++; return false; } + const uint64_t SizeVal = SizeCast->getZExtValue(); - // Scale the max size down if the target can load more bytes than we need. - uint64_t SizeVal = SizeCast->getZExtValue(); - if (MaxLoadSize > SizeVal) - MaxLoadSize = 1 << SizeCast->getValue().logBase2(); + // TTI call to check if target would like to expand memcmp. Also, get the + // max LoadSize. + unsigned MaxLoadSize; + if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return false; - // Calculate how many load pairs are needed for the constant size. - unsigned NumLoads = 0; - unsigned RemainingSize = SizeVal; - unsigned LoadSize = MaxLoadSize; - while (RemainingSize) { - NumLoads += RemainingSize / LoadSize; - RemainingSize = RemainingSize % LoadSize; - LoadSize = LoadSize / 2; - } + MemCmpExpansion Expansion(CI, SizeVal, MaxLoadSize, MemCmpNumLoadsPerBlock, + *DL); // Don't expand if this will require more loads than desired by the target. - if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) { + if (Expansion.getNumLoads() > + TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) { NumMemCmpGreaterThanMax++; return false; } NumMemCmpInlined++; - // MemCmpHelper object creates and sets up basic blocks required for - // expanding memcmp with size SizeVal. - unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock; - MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL); - - Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal); + Value *Res = Expansion.getMemCmpExpansion(); // Replace call with result of expansion and erase call. CI->replaceAllUsesWith(Res); @@ -2676,16 +2708,68 @@ namespace { struct ExtAddrMode : public TargetLowering::AddrMode { Value *BaseReg = nullptr; Value *ScaledReg = nullptr; + Value *OriginalValue = nullptr; + + enum FieldName { + NoField = 0x00, + BaseRegField = 0x01, + BaseGVField = 0x02, + BaseOffsField = 0x04, + ScaledRegField = 0x08, + ScaleField = 0x10, + MultipleFields = 0xff + }; ExtAddrMode() = default; void print(raw_ostream &OS) const; void dump() const; - bool operator==(const ExtAddrMode& O) const { - return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && - (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && - (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); + FieldName compare(const ExtAddrMode &other) { + // First check that the types are the same on each field, as differing types + // is something we can't cope with later on. + if (BaseReg && other.BaseReg && + BaseReg->getType() != other.BaseReg->getType()) + return MultipleFields; + if (BaseGV && other.BaseGV && + BaseGV->getType() != other.BaseGV->getType()) + return MultipleFields; + if (ScaledReg && other.ScaledReg && + ScaledReg->getType() != other.ScaledReg->getType()) + return MultipleFields; + + // Check each field to see if it differs. + unsigned Result = NoField; + if (BaseReg != other.BaseReg) + Result |= BaseRegField; + if (BaseGV != other.BaseGV) + Result |= BaseGVField; + if (BaseOffs != other.BaseOffs) + Result |= BaseOffsField; + if (ScaledReg != other.ScaledReg) + Result |= ScaledRegField; + // Don't count 0 as being a different scale, because that actually means + // unscaled (which will already be counted by having no ScaledReg). + if (Scale && other.Scale && Scale != other.Scale) + Result |= ScaleField; + + if (countPopulation(Result) > 1) + return MultipleFields; + else + return static_cast(Result); + } + + // AddrModes with a base reg or gv where the reg/gv is just the original + // value are trivial. + bool isTrivial() { + bool Trivial = (BaseGV && BaseGV == OriginalValue) || + (BaseReg && BaseReg == OriginalValue); + // If the AddrMode is trivial it shouldn't have an offset or be scaled. + if (Trivial) { + assert(BaseOffs == 0); + assert(Scale == 0); + } + return Trivial; } }; @@ -3302,6 +3386,92 @@ class AddressingModeMatcher { Value *PromotedOperand) const; }; +/// \brief A helper class for combining addressing modes. +class AddressingModeCombiner { +private: + /// The addressing modes we've collected. + SmallVector AddrModes; + + /// The field in which the AddrModes differ, when we have more than one. + ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField; + + /// Are the AddrModes that we have all just equal to their original values? + bool AllAddrModesTrivial = true; + +public: + /// \brief Get the combined AddrMode + const ExtAddrMode &getAddrMode() const { + return AddrModes[0]; + } + + /// \brief Add a new AddrMode if it's compatible with the AddrModes we already + /// have. + /// \return True iff we succeeded in doing so. + bool addNewAddrMode(ExtAddrMode &NewAddrMode) { + // Take note of if we have any non-trivial AddrModes, as we need to detect + // when all AddrModes are trivial as then we would introduce a phi or select + // which just duplicates what's already there. + AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial(); + + // If this is the first addrmode then everything is fine. + if (AddrModes.empty()) { + AddrModes.emplace_back(NewAddrMode); + return true; + } + + // Figure out how different this is from the other address modes, which we + // can do just by comparing against the first one given that we only care + // about the cumulative difference. + ExtAddrMode::FieldName ThisDifferentField = + AddrModes[0].compare(NewAddrMode); + if (DifferentField == ExtAddrMode::NoField) + DifferentField = ThisDifferentField; + else if (DifferentField != ThisDifferentField) + DifferentField = ExtAddrMode::MultipleFields; + + // If this AddrMode is the same as all the others then everything is fine + // (which should only happen when there is actually only one AddrMode). + if (DifferentField == ExtAddrMode::NoField) { + assert(AddrModes.size() == 1); + return true; + } + + // If NewAddrMode differs in only one dimension then we can handle it by + // inserting a phi/select later on. + if (DifferentField != ExtAddrMode::MultipleFields) { + AddrModes.emplace_back(NewAddrMode); + return true; + } + + // We couldn't combine NewAddrMode with the rest, so return failure. + AddrModes.clear(); + return false; + } + + /// \brief Combine the addressing modes we've collected into a single + /// addressing mode. + /// \return True iff we successfully combined them or we only had one so + /// didn't need to combine them anyway. + bool combineAddrModes() { + // If we have no AddrModes then they can't be combined. + if (AddrModes.size() == 0) + return false; + + // A single AddrMode can trivially be combined. + if (AddrModes.size() == 1) + return true; + + // If the AddrModes we collected are all just equal to the value they are + // derived from then combining them wouldn't do anything useful. + if (AllAddrModesTrivial) + return false; + + // TODO: Combine multiple AddrModes by inserting a select or phi for the + // field in which the AddrModes differ. + return false; + } +}; + } // end anonymous namespace /// Try adding ScaleReg*Scale to the current addressing mode. @@ -4389,13 +4559,12 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, SmallPtrSet Visited; worklist.push_back(Addr); - // Use a worklist to iteratively look through PHI nodes, and ensure that - // the addressing mode obtained from the non-PHI roots of the graph - // are equivalent. - bool AddrModeFound = false; - bool PhiSeen = false; + // Use a worklist to iteratively look through PHI and select nodes, and + // ensure that the addressing mode obtained from the non-PHI/select roots of + // the graph are compatible. + bool PhiOrSelectSeen = false; SmallVector AddrModeInsts; - ExtAddrMode AddrMode; + AddressingModeCombiner AddrModes; TypePromotionTransaction TPT(RemovedInsts); TypePromotionTransaction::ConstRestorationPt LastKnownGood = TPT.getRestorationPoint(); @@ -4419,7 +4588,14 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (PHINode *P = dyn_cast(V)) { for (Value *IncValue : P->incoming_values()) worklist.push_back(IncValue); - PhiSeen = true; + PhiOrSelectSeen = true; + continue; + } + // Similar for select. + if (SelectInst *SI = dyn_cast(V)) { + worklist.push_back(SI->getFalseValue()); + worklist.push_back(SI->getTrueValue()); + PhiOrSelectSeen = true; continue; } @@ -4430,30 +4606,29 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, InsertedInsts, PromotedInsts, TPT); + NewAddrMode.OriginalValue = V; - if (!AddrModeFound) { - AddrModeFound = true; - AddrMode = NewAddrMode; - continue; - } - if (NewAddrMode == AddrMode) - continue; - - AddrModeFound = false; - break; + if (!AddrModes.addNewAddrMode(NewAddrMode)) + break; } - // If the addressing mode couldn't be determined, or if multiple different - // ones were determined, bail out now. - if (!AddrModeFound) { + // Try to combine the AddrModes we've collected. If we couldn't collect any, + // or we have multiple but either couldn't combine them or combining them + // wouldn't do anything useful, bail out now. + if (!AddrModes.combineAddrModes()) { TPT.rollback(LastKnownGood); return false; } TPT.commit(); + // Get the combined AddrMode (or the only AddrMode, if we only had one). + ExtAddrMode AddrMode = AddrModes.getAddrMode(); + // If all the instructions matched are already in this BB, don't do anything. - // If we saw Phi node then it is not local definitely. - if (!PhiSeen && none_of(AddrModeInsts, [&](Value *V) { + // If we saw a Phi node then it is not local definitely, and if we saw a select + // then we want to push the address calculation past it even if it's already + // in this BB. + if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) { return IsNonLocalValue(V, MemoryInst->getParent()); })) { DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); diff --git a/lib/CodeGen/CountingFunctionInserter.cpp b/lib/CodeGen/CountingFunctionInserter.cpp index 7f7350f5fb5cd..15af09807ba62 100644 --- a/lib/CodeGen/CountingFunctionInserter.cpp +++ b/lib/CodeGen/CountingFunctionInserter.cpp @@ -27,13 +27,13 @@ namespace { CountingFunctionInserter() : FunctionPass(ID) { initializeCountingFunctionInserterPass(*PassRegistry::getPassRegistry()); } - + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); } bool runOnFunction(Function &F) override { - std::string CountingFunctionName = + StringRef CountingFunctionName = F.getFnAttribute("counting-function").getValueAsString(); if (CountingFunctionName.empty()) return false; @@ -46,17 +46,13 @@ namespace { return true; } }; - + char CountingFunctionInserter::ID = 0; } -INITIALIZE_PASS(CountingFunctionInserter, "cfinserter", +INITIALIZE_PASS(CountingFunctionInserter, "cfinserter", "Inserts calls to mcount-like functions", false, false) -//===----------------------------------------------------------------------===// -// -// CountingFunctionInserter - Give any unnamed non-void instructions "tmp" names. -// FunctionPass *llvm::createCountingFunctionInserterPass() { return new CountingFunctionInserter(); } diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index a3cf2846d2f5d..a791c01c48b35 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -1,4 +1,4 @@ -//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===// +//===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===// // // The LLVM Compiler Infrastructure // @@ -14,14 +14,29 @@ //===----------------------------------------------------------------------===// #include "CriticalAntiDepBreaker.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include using namespace llvm; @@ -35,8 +50,7 @@ CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi, Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0), DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {} -CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { -} +CriticalAntiDepBreaker::~CriticalAntiDepBreaker() = default; void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { const unsigned BBSize = BB->size(); @@ -333,8 +347,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) { bool CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, RegRefIter RegRefEnd, - unsigned NewReg) -{ + unsigned NewReg) { for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) { MachineOperand *RefOper = I->second; @@ -381,8 +394,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC, - SmallVectorImpl &Forbid) -{ + SmallVectorImpl &Forbid) { ArrayRef Order = RegClassInfo.getOrder(RC); for (unsigned i = 0; i != Order.size(); ++i) { unsigned NewReg = Order[i]; @@ -423,7 +435,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, } unsigned CriticalAntiDepBreaker:: -BreakAntiDependencies(const std::vector& SUnits, +BreakAntiDependencies(const std::vector &SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, @@ -436,7 +448,7 @@ BreakAntiDependencies(const std::vector& SUnits, // This is used for updating debug information. // // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap - DenseMap MISUnitMap; + DenseMap MISUnitMap; // Find the node at the bottom of the critical path. const SUnit *Max = nullptr; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 678779fa1a267..09c4423a2f057 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +//===- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,16 +18,21 @@ #include "AntiDepBreaker.h" #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Compiler.h" +#include +#include namespace llvm { + +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class MachineRegisterInfo; class RegisterClassInfo; class TargetInstrInfo; +class TargetRegisterClass; class TargetRegisterInfo; -class MachineFunction; class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; @@ -46,12 +51,13 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { /// corresponding value is null. If the register is live but used in /// multiple register classes, the corresponding value is -1 casted to a /// pointer. - std::vector Classes; + std::vector Classes; /// Map registers to all their references within a live range. std::multimap RegRefs; - typedef std::multimap::const_iterator - RegRefIter; + + using RegRefIter = + std::multimap::const_iterator; /// The index of the most recent kill (proceeding bottom-up), /// or ~0u if the register is not live. @@ -66,7 +72,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { BitVector KeepRegs; public: - CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); + CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI); ~CriticalAntiDepBreaker() override; /// Initialize anti-dep breaking for a new basic block. @@ -74,7 +80,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { /// Identifiy anti-dependencies along the critical path /// of the ScheduleDAG and break them by renaming registers. - unsigned BreakAntiDependencies(const std::vector& SUnits, + unsigned BreakAntiDependencies(const std::vector &SUnits, MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned InsertPosIndex, @@ -101,6 +107,7 @@ class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { const TargetRegisterClass *RC, SmallVectorImpl &Forbid); }; -} -#endif +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 853b9afa1026c..cf21316ec22dd 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -336,6 +336,38 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, VLIWScheduler->finishBlock(); } +bool VLIWPacketizerList::alias(const MachineMemOperand &Op1, + const MachineMemOperand &Op2, + bool UseTBAA) const { + if (!Op1.getValue() || !Op2.getValue()) + return true; + + int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset()); + int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset; + int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset; + + AliasResult AAResult = + AA->alias(MemoryLocation(Op1.getValue(), Overlapa, + UseTBAA ? Op1.getAAInfo() : AAMDNodes()), + MemoryLocation(Op2.getValue(), Overlapb, + UseTBAA ? Op2.getAAInfo() : AAMDNodes())); + + return AAResult != NoAlias; +} + +bool VLIWPacketizerList::alias(const MachineInstr &MI1, + const MachineInstr &MI2, + bool UseTBAA) const { + if (MI1.memoperands_empty() || MI2.memoperands_empty()) + return true; + + for (const MachineMemOperand *Op1 : MI1.memoperands()) + for (const MachineMemOperand *Op2 : MI2.memoperands()) + if (alias(*Op1, *Op2, UseTBAA)) + return true; + return false; +} + // Add a DAG mutation object to the ordered list. void VLIWPacketizerList::addMutation( std::unique_ptr Mutation) { diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index b5f84863b59ff..4133a26b96c25 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -172,7 +172,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes( BasicBlock *BB = RI->getParent(); new UnreachableInst(Ctx, RI); RI->eraseFromParent(); - SimplifyCFG(BB, TTI); + simplifyCFG(BB, TTI); } } Resumes.resize(ResumesLeft); diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index be0c5c2bb70e6..93db334f45453 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -160,10 +160,11 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg, // FIXME: bitconverting between vector types may or may not be a // nop in big-endian situations. return ValReg; - case CCValAssign::AExt: + case CCValAssign::AExt: { assert(!VA.getLocVT().isVector() && "unexpected vector extend"); - // Otherwise, it's a nop. - return ValReg; + auto MIB = MIRBuilder.buildAnyExt(LocTy, ValReg); + return MIB->getOperand(0).getReg(); + } case CCValAssign::SExt: { unsigned NewReg = MRI.createGenericVirtualRegister(LocTy); MIRBuilder.buildSExt(NewReg, ValReg); diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index aaa7b73572f21..8e31ed0a01539 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -15,7 +15,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/LowLevelType.h" diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index bf427225d6a96..2a563c9bf5c28 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -6,8 +6,10 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file implements the InstructionSelector class. +// //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" @@ -16,11 +18,8 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" -#include "llvm/IR/Constants.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -31,7 +30,7 @@ using namespace llvm; InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) - : Renderers(MaxRenderers, nullptr), MIs() {} + : Renderers(MaxRenderers), MIs() {} InstructionSelector::InstructionSelector() = default; @@ -100,6 +99,23 @@ bool InstructionSelector::isOperandImmEqual( return false; } +bool InstructionSelector::isBaseWithConstantOffset( + const MachineOperand &Root, const MachineRegisterInfo &MRI) const { + if (!Root.isReg()) + return false; + + MachineInstr *RootI = MRI.getVRegDef(Root.getReg()); + if (RootI->getOpcode() != TargetOpcode::G_GEP) + return false; + + MachineOperand &RHS = RootI->getOperand(2); + MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg()); + if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT) + return false; + + return true; +} + bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI) const { return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() && MI.implicit_operands().begin() == MI.implicit_operands().end(); diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index 1c474b9984601..fb954f3c3f166 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -70,6 +70,9 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // convergence for performance reasons. bool Changed = false; MachineBasicBlock::iterator NextMI; + using VecType = SmallSetVector; + VecType WorkList; + VecType CombineList; for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) { // Get the next Instruction before we try to legalize, because there's a @@ -81,9 +84,8 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { if (!isPreISelGenericOpcode(MI->getOpcode())) continue; unsigned NumNewInsns = 0; - using VecType = SetVector>; - VecType WorkList; - VecType CombineList; + WorkList.clear(); + CombineList.clear(); Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { // Only legalize pre-isel generic instructions. // Legalization process could generate Target specific pseudo @@ -95,7 +97,8 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { } }); WorkList.insert(&*MI); - LegalizerCombiner C(Helper.MIRBuilder, MF.getRegInfo()); + LegalizerCombiner C(Helper.MIRBuilder, MF.getRegInfo(), + Helper.getLegalizerInfo()); bool Changed = false; LegalizerHelper::LegalizeResult Res; do { @@ -156,7 +159,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); MachineIRBuilder MIRBuilder(MF); - LegalizerCombiner C(MIRBuilder, MRI); + LegalizerCombiner C(MIRBuilder, MRI, Helper.getLegalizerInfo()); for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) { // Get the next Instruction before we try to legalize, because there's a diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index a70e46e67df60..cd6684fbb4e2b 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -396,6 +396,50 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_OR: { + // Legalize bitwise operation: + // A = BinOp B, C + // into: + // B1, ..., BN = G_UNMERGE_VALUES B + // C1, ..., CN = G_UNMERGE_VALUES C + // A1 = BinOp B1, C2 + // ... + // AN = BinOp BN, CN + // A = G_MERGE_VALUES A1, ..., AN + unsigned NarrowSize = NarrowTy.getSizeInBits(); + int NumParts = + MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; + + // List the registers where the destination will be scattered. + SmallVector DstRegs; + // List the registers where the first argument will be split. + SmallVector SrcsReg1; + // List the registers where the second argument will be split. + SmallVector SrcsReg2; + // Create all the temporary registers. + for (int i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); + unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); + unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); + + DstRegs.push_back(DstReg); + SrcsReg1.push_back(SrcReg1); + SrcsReg2.push_back(SrcReg2); + } + // Explode the big arguments into smaller chunks. + MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); + MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); + + // Do the operation on each small part. + for (int i = 0; i < NumParts; ++i) + MIRBuilder.buildOr(DstRegs[i], SrcsReg1[i], SrcsReg2[i]); + + // Gather the destination registers into the final destination. + unsigned DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } } } diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 76917aa9660d4..e7a46eadb443f 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -48,6 +48,7 @@ LegalizerInfo::LegalizerInfo() { DefaultActions[TargetOpcode::G_ADD] = NarrowScalar; DefaultActions[TargetOpcode::G_LOAD] = NarrowScalar; DefaultActions[TargetOpcode::G_STORE] = NarrowScalar; + DefaultActions[TargetOpcode::G_OR] = NarrowScalar; DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar; DefaultActions[TargetOpcode::G_INSERT] = NarrowScalar; @@ -57,7 +58,7 @@ LegalizerInfo::LegalizerInfo() { void LegalizerInfo::computeTables() { for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) { - for (unsigned Idx = 0; Idx != Actions[Opcode].size(); ++Idx) { + for (unsigned Idx = 0, End = Actions[Opcode].size(); Idx != End; ++Idx) { for (auto &Action : Actions[Opcode][Idx]) { LLT Ty = Action.first; if (!Ty.isVector()) @@ -144,8 +145,9 @@ std::tuple LegalizerInfo::getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const { SmallBitVector SeenTypes(8); - const MCOperandInfo *OpInfo = MI.getDesc().OpInfo; - for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) { + const MCInstrDesc &MCID = MI.getDesc(); + const MCOperandInfo *OpInfo = MCID.OpInfo; + for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) { if (!OpInfo[i].isGenericType()) continue; diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 677941dbbf6da..90d9f2d20bbe2 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -221,9 +221,8 @@ uint64_t RegBankSelect::getRepairCost( // into a new virtual register. // We would also need to propagate this information in the // repairing placement. - unsigned Cost = - RBI->copyCost(*DesiredRegBrank, *CurRegBank, - RegisterBankInfo::getSizeInBits(MO.getReg(), *MRI, *TRI)); + unsigned Cost = RBI->copyCost(*DesiredRegBrank, *CurRegBank, + RBI->getSizeInBits(MO.getReg(), *MRI, *TRI)); // TODO: use a dedicated constant for ImpossibleCost. if (Cost != std::numeric_limits::max()) return Cost; diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index a841902feed11..f117c6094534b 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -84,7 +84,7 @@ const RegisterBank * RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return &getRegBankFromRegClass(*TRI.getMinimalPhysRegClass(Reg)); + return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI)); assert(Reg && "NoRegister does not have a register bank"); const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); @@ -95,6 +95,19 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI, return nullptr; } +const TargetRegisterClass & +RegisterBankInfo::getMinimalPhysRegClass(unsigned Reg, + const TargetRegisterInfo &TRI) const { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Reg must be a physreg"); + const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); + if (RegRCIt != PhysRegMinimalRCs.end()) + return *RegRCIt->second; + const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg); + PhysRegMinimalRCs[Reg] = PhysRC; + return *PhysRC; +} + const RegisterBank *RegisterBankInfo::getRegBankFromConstraints( const MachineInstr &MI, unsigned OpIdx, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) const { @@ -151,7 +164,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const { // is important. The rest is not constrained. unsigned NumOperandsForMapping = IsCopyLike ? 1 : MI.getNumOperands(); - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -441,13 +454,13 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { unsigned RegisterBankInfo::getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI) { + const TargetRegisterInfo &TRI) const { const TargetRegisterClass *RC = nullptr; if (TargetRegisterInfo::isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. - RC = TRI.getMinimalPhysRegClass(Reg); + RC = &getMinimalPhysRegClass(Reg, TRI); } else { LLT Ty = MRI.getType(Reg); unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0; @@ -543,10 +556,11 @@ bool RegisterBankInfo::InstructionMapping::verify( // For PHI, we only care about mapping the definition. assert(NumOperands == (isCopyLike(MI) ? 1 : MI.getNumOperands()) && "NumOperands must match, see constructor"); - assert(MI.getParent() && MI.getParent()->getParent() && + assert(MI.getParent() && MI.getMF() && "MI must be connected to a MachineFunction"); - const MachineFunction &MF = *MI.getParent()->getParent(); - (void)MF; + const MachineFunction &MF = *MI.getMF(); + const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo(); + (void)RBI; for (unsigned Idx = 0; Idx < NumOperands; ++Idx) { const MachineOperand &MO = MI.getOperand(Idx); @@ -564,7 +578,7 @@ bool RegisterBankInfo::InstructionMapping::verify( (void)MOMapping; // Register size in bits. // This size must match what the mapping expects. - assert(MOMapping.verify(getSizeInBits( + assert(MOMapping.verify(RBI->getSizeInBits( Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) && "Value mapping is invalid"); } @@ -725,8 +739,8 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS, // If we have a function, we can pretty print the name of the registers. // Otherwise we will print the raw numbers. const TargetRegisterInfo *TRI = - getMI().getParent() && getMI().getParent()->getParent() - ? getMI().getParent()->getParent()->getSubtarget().getRegisterInfo() + getMI().getParent() && getMI().getMF() + ? getMI().getMF()->getSubtarget().getRegisterInfo() : nullptr; bool IsFirst = true; for (unsigned Idx = 0; Idx != NumOpds; ++Idx) { diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index ccb992d92e54a..08720d1271f36 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1361,8 +1361,7 @@ static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB, /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all /// values defined in MI which are also live/used by MI. static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { - const TargetRegisterInfo *TRI = MI.getParent()->getParent() - ->getSubtarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MI.getMF()->getSubtarget().getRegisterInfo(); // Before stepping forward past MI, remember which regs were live // before MI. This is needed to set the Undef flag only when reg is @@ -1382,7 +1381,7 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) { unsigned Reg = Clobber.first; MachineOperand &Op = const_cast(*Clobber.second); MachineInstr *OpMI = Op.getParent(); - MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI); + MachineInstrBuilder MIB(*OpMI->getMF(), OpMI); if (Op.isRegMask()) { // First handle regmasks. They clobber any entries in the mask which // means that we need a def for those registers. diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp index 19c52d013749e..bf0f88d49a82c 100644 --- a/lib/CodeGen/ImplicitNullChecks.cpp +++ b/lib/CodeGen/ImplicitNullChecks.cpp @@ -369,7 +369,7 @@ ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg, // We want the mem access to be issued at a sane offset from PointerReg, // so that if PointerReg is null then the access reliably page faults. if (!((MI.mayLoad() || MI.mayStore()) && !MI.isPredicable() && - Offset < PageSize)) + -PageSize < Offset && Offset < PageSize)) return SR_Unsuitable; // Finally, check whether the current memory access aliases with previous one. diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 753db85226368..e829409f0974e 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -20,7 +20,9 @@ #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" @@ -163,7 +165,8 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM, Triple T(getTargetTriple().str()); AsmStreamer.reset(getTarget().createMCObjectStreamer( - T, Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + T, Context, std::unique_ptr(MAB), Out, + std::unique_ptr(MCE), STI, Options.MCOptions.MCRelaxAll, Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); break; @@ -238,7 +241,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, const Triple &T = getTargetTriple(); const MCSubtargetInfo &STI = *getMCSubtargetInfo(); std::unique_ptr AsmStreamer(getTarget().createMCObjectStreamer( - T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + T, *Ctx, std::unique_ptr(MAB), Out, + std::unique_ptr(MCE), STI, Options.MCOptions.MCRelaxAll, Options.MCOptions.MCIncrementalLinkerCompatible, /*DWARFMustBeAtTheEnd*/ true)); diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp index bca3361ad4cb9..a45b1e39feed0 100644 --- a/lib/CodeGen/LiveDebugValues.cpp +++ b/lib/CodeGen/LiveDebugValues.cpp @@ -374,7 +374,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI, void LiveDebugValues::transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges, const VarLocMap &VarLocIDs) { - MachineFunction *MF = MI.getParent()->getParent(); + MachineFunction *MF = MI.getMF(); const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); SparseBitVector<> KillSet; @@ -450,7 +450,7 @@ void LiveDebugValues::transferSpillInst(MachineInstr &MI, VarLocMap &VarLocIDs, SpillMap &Spills) { unsigned Reg; - MachineFunction *MF = MI.getParent()->getParent(); + MachineFunction *MF = MI.getMF(); if (!isSpillInstruction(MI, MF, Reg)) return; diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index d7345b0446aab..0c81306a9a501 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -91,8 +91,48 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); } +enum : unsigned { UndefLocNo = ~0U }; + +/// Describes a location by number along with some flags about the original +/// usage of the location. +class DbgValueLocation { +public: + DbgValueLocation(unsigned LocNo, bool WasIndirect) + : LocNo(LocNo), WasIndirect(WasIndirect) { + static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing"); + assert(locNo() == LocNo && "location truncation"); + } + + DbgValueLocation() : LocNo(0), WasIndirect(0) {} + + unsigned locNo() const { + // Fix up the undef location number, which gets truncated. + return LocNo == INT_MAX ? UndefLocNo : LocNo; + } + bool wasIndirect() const { return WasIndirect; } + bool isUndef() const { return locNo() == UndefLocNo; } + + DbgValueLocation changeLocNo(unsigned NewLocNo) const { + return DbgValueLocation(NewLocNo, WasIndirect); + } + + friend inline bool operator==(const DbgValueLocation &LHS, + const DbgValueLocation &RHS) { + return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect; + } + + friend inline bool operator!=(const DbgValueLocation &LHS, + const DbgValueLocation &RHS) { + return !(LHS == RHS); + } + +private: + unsigned LocNo : 31; + unsigned WasIndirect : 1; +}; + /// LocMap - Map of where a user value is live, and its location. -using LocMap = IntervalMap; +using LocMap = IntervalMap; namespace { @@ -110,7 +150,6 @@ class LDVImpl; class UserValue { const DILocalVariable *Variable; ///< The debug info variable we are part of. const DIExpression *Expression; ///< Any complex address expression. - bool IsIndirect; ///< true if this is a register-indirect+offset value. DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. UserValue *leader; ///< Equivalence class leader. @@ -127,9 +166,11 @@ class UserValue { SmallSet trimmedDefs; /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo. - void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, - unsigned LocNo, bool Spilled, LiveIntervals &LIS, - const TargetInstrInfo &TII); + void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, + SlotIndex StopIdx, + DbgValueLocation Loc, bool Spilled, LiveIntervals &LIS, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI); /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs /// is live. Returns true if any changes were made. @@ -138,10 +179,10 @@ class UserValue { public: /// UserValue - Create a new UserValue. - UserValue(const DILocalVariable *var, const DIExpression *expr, bool i, - DebugLoc L, LocMap::Allocator &alloc) - : Variable(var), Expression(expr), IsIndirect(i), dl(std::move(L)), - leader(this), locInts(alloc) {} + UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L, + LocMap::Allocator &alloc) + : Variable(var), Expression(expr), dl(std::move(L)), leader(this), + locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. UserValue *getLeader() { @@ -156,13 +197,12 @@ class UserValue { /// match - Does this UserValue match the parameters? bool match(const DILocalVariable *Var, const DIExpression *Expr, - const DILocation *IA, bool indirect) const { - return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA && - indirect == IsIndirect; + const DILocation *IA) const { + // FIXME: The fragment should be part of the equivalence class, but not + // other things in the expression like stack values. + return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA; } - enum : unsigned { UndefLocNo = ~0U }; - /// merge - Merge equivalence classes. static UserValue *merge(UserValue *L1, UserValue *L2) { L2 = L2->getLeader(); @@ -211,14 +251,15 @@ class UserValue { void mapVirtRegs(LDVImpl *LDV); /// addDef - Add a definition point to this value. - void addDef(SlotIndex Idx, const MachineOperand &LocMO) { + void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) { + DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect); // Add a singular (Idx,Idx) -> Loc mapping. LocMap::iterator I = locInts.find(Idx); if (!I.valid() || I.start() != Idx) - I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO)); + I.insert(Idx, Idx.getNextSlot(), Loc); else // A later DBG_VALUE at the same SlotIndex overrides the old location. - I.setValue(getLocationNo(LocMO)); + I.setValue(Loc); } /// extendDef - Extend the current definition as far as possible down. @@ -226,12 +267,12 @@ class UserValue { /// range of VNI. /// End points where VNI is no longer live are added to Kills. /// @param Idx Starting point for the definition. - /// @param LocNo Location number to propagate. + /// @param Loc Location number to propagate. /// @param LR Restrict liveness to where LR has the value VNI. May be null. /// @param VNI When LR is not null, this is the value to restrict to. /// @param Kills Append end points of VNI's live range to Kills. /// @param LIS Live intervals analysis. - void extendDef(SlotIndex Idx, unsigned LocNo, + void extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, const VNInfo *VNI, SmallVectorImpl *Kills, LiveIntervals &LIS); @@ -241,13 +282,14 @@ class UserValue { /// points, and add defs if possible. /// @param LI Scan for copies of the value in LI->reg. /// @param LocNo Location number of LI->reg. + /// @param WasIndirect Indicates if the original use of LI->reg was indirect /// @param Kills Points where the range of LocNo could be extended. /// @param NewDefs Append (Idx, LocNo) of inserted defs here. - void addDefsFromCopies(LiveInterval *LI, unsigned LocNo, - const SmallVectorImpl &Kills, - SmallVectorImpl> &NewDefs, - MachineRegisterInfo &MRI, - LiveIntervals &LIS); + void addDefsFromCopies( + LiveInterval *LI, unsigned LocNo, bool WasIndirect, + const SmallVectorImpl &Kills, + SmallVectorImpl> &NewDefs, + MachineRegisterInfo &MRI, LiveIntervals &LIS); /// computeIntervals - Compute the live intervals of all locations after /// collecting all their def points. @@ -266,7 +308,8 @@ class UserValue { /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, - const TargetInstrInfo &TRI, + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, const BitVector &SpilledLocations); /// getDebugLoc - Return DebugLoc of this UserValue. @@ -302,7 +345,7 @@ class LDVImpl { /// getUserValue - Find or create a UserValue. UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr, - bool IsIndirect, const DebugLoc &DL); + const DebugLoc &DL); /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); @@ -400,10 +443,13 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { OS << "\"\t"; for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { OS << " [" << I.start() << ';' << I.stop() << "):"; - if (I.value() == UndefLocNo) + if (I.value().isUndef()) OS << "undef"; - else - OS << I.value(); + else { + OS << I.value().locNo(); + if (I.value().wasIndirect()) + OS << " ind"; + } } for (unsigned i = 0, e = locations.size(); i != e; ++i) { OS << " Loc" << i << '='; @@ -427,19 +473,18 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { } UserValue *LDVImpl::getUserValue(const DILocalVariable *Var, - const DIExpression *Expr, bool IsIndirect, - const DebugLoc &DL) { + const DIExpression *Expr, const DebugLoc &DL) { UserValue *&Leader = userVarMap[Var]; if (Leader) { UserValue *UV = Leader->getLeader(); Leader = UV; for (; UV; UV = UV->getNext()) - if (UV->match(Var, Expr, DL->getInlinedAt(), IsIndirect)) + if (UV->match(Var, Expr, DL->getInlinedAt())) return UV; } userValues.push_back( - llvm::make_unique(Var, Expr, IsIndirect, DL, allocator)); + llvm::make_unique(Var, Expr, DL, allocator)); UserValue *UV = userValues.back().get(); Leader = UserValue::merge(Leader, UV); return UV; @@ -466,15 +511,15 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) { return false; } - // Get or create the UserValue for (variable,offset). + // Get or create the UserValue for (variable,offset) here. bool IsIndirect = MI.getOperand(1).isImm(); if (IsIndirect) assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset"); const DILocalVariable *Var = MI.getDebugVariable(); const DIExpression *Expr = MI.getDebugExpression(); - //here. - UserValue *UV = getUserValue(Var, Expr, IsIndirect, MI.getDebugLoc()); - UV->addDef(Idx, MI.getOperand(0)); + UserValue *UV = + getUserValue(Var, Expr, MI.getDebugLoc()); + UV->addDef(Idx, MI.getOperand(0), IsIndirect); return true; } @@ -509,7 +554,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { /// We only propagate DBG_VALUES locally here. LiveDebugValues performs a /// data-flow analysis to propagate them beyond basic block boundaries. -void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, +void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR, const VNInfo *VNI, SmallVectorImpl *Kills, LiveIntervals &LIS) { SlotIndex Start = Idx; @@ -536,7 +581,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, if (I.valid() && I.start() <= Start) { // Stop when meeting a different location or an already extended interval. Start = Start.getNextSlot(); - if (I.value() != LocNo || I.stop() != Start) + if (I.value() != Loc || I.stop() != Start) return; // This is a one-slot placeholder. Just skip it. ++I; @@ -552,14 +597,14 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, Kills->push_back(Stop); if (Start < Stop) - I.insert(Start, Stop, LocNo); + I.insert(Start, Stop, Loc); } -void -UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, - const SmallVectorImpl &Kills, - SmallVectorImpl> &NewDefs, - MachineRegisterInfo &MRI, LiveIntervals &LIS) { +void UserValue::addDefsFromCopies( + LiveInterval *LI, unsigned LocNo, bool WasIndirect, + const SmallVectorImpl &Kills, + SmallVectorImpl> &NewDefs, + MachineRegisterInfo &MRI, LiveIntervals &LIS) { if (Kills.empty()) return; // Don't track copies from physregs, there are too many uses. @@ -586,7 +631,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, // it, or we are looking at a wrong value of LI. SlotIndex Idx = LIS.getInstructionIndex(*MI); LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); - if (!I.valid() || I.value() != LocNo) + if (!I.valid() || I.value().locNo() != LocNo) continue; if (!LIS.hasInterval(DstReg)) @@ -619,8 +664,9 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); - I.insert(Idx, Idx.getNextSlot(), LocNo); - NewDefs.push_back(std::make_pair(Idx, LocNo)); + DbgValueLocation NewLoc(LocNo, WasIndirect); + I.insert(Idx, Idx.getNextSlot(), NewLoc); + NewDefs.push_back(std::make_pair(Idx, NewLoc)); break; } } @@ -629,36 +675,37 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, void UserValue::computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, LiveIntervals &LIS, LexicalScopes &LS) { - SmallVector, 16> Defs; + SmallVector, 16> Defs; // Collect all defs to be extended (Skipping undefs). for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) - if (I.value() != UndefLocNo) + if (!I.value().isUndef()) Defs.push_back(std::make_pair(I.start(), I.value())); // Extend all defs, and possibly add new ones along the way. for (unsigned i = 0; i != Defs.size(); ++i) { SlotIndex Idx = Defs[i].first; - unsigned LocNo = Defs[i].second; - const MachineOperand &Loc = locations[LocNo]; + DbgValueLocation Loc = Defs[i].second; + const MachineOperand &LocMO = locations[Loc.locNo()]; - if (!Loc.isReg()) { - extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS); + if (!LocMO.isReg()) { + extendDef(Idx, Loc, nullptr, nullptr, nullptr, LIS); continue; } // Register locations are constrained to where the register value is live. - if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { + if (TargetRegisterInfo::isVirtualRegister(LocMO.getReg())) { LiveInterval *LI = nullptr; const VNInfo *VNI = nullptr; - if (LIS.hasInterval(Loc.getReg())) { - LI = &LIS.getInterval(Loc.getReg()); + if (LIS.hasInterval(LocMO.getReg())) { + LI = &LIS.getInterval(LocMO.getReg()); VNI = LI->getVNInfoAt(Idx); } SmallVector Kills; - extendDef(Idx, LocNo, LI, VNI, &Kills, LIS); + extendDef(Idx, Loc, LI, VNI, &Kills, LIS); if (LI) - addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); + addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI, + LIS); continue; } @@ -672,7 +719,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // Erase all the undefs. for (LocMap::iterator I = locInts.begin(); I.valid();) - if (I.value() == UndefLocNo) + if (I.value().isUndef()) I.erase(); else ++I; @@ -702,7 +749,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // I.stop() >= PrevEnd. Check for overlap. if (PrevEnd && I.start() < PrevEnd) { SlotIndex IStop = I.stop(); - unsigned LocNo = I.value(); + DbgValueLocation Loc = I.value(); // Stop overlaps previous end - trim the end of the interval to // the scope range. @@ -713,7 +760,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI, // current) range create a new interval for the remainder (which // may be further trimmed). if (RStart < IStop) - I.insert(RStart, IStop, LocNo); + I.insert(RStart, IStop, Loc); } // Advance I so that I.stop() >= RStart, and check for overlap. @@ -840,7 +887,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs, break; // Now LII->end > LocMapI.start(). Do we have an overlap? - if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) { + if (LocMapI.value().locNo() == OldLocNo && LII->start < LocMapI.stop()) { // Overlapping correct location. Allocate NewLocNo now. if (NewLocNo == UndefLocNo) { MachineOperand MO = MachineOperand::CreateReg(LI->reg, false); @@ -851,6 +898,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs, SlotIndex LStart = LocMapI.start(); SlotIndex LStop = LocMapI.stop(); + DbgValueLocation OldLoc = LocMapI.value(); // Trim LocMapI down to the LII overlap. if (LStart < LII->start) @@ -859,17 +907,17 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs, LocMapI.setStopUnchecked(LII->end); // Change the value in the overlap. This may trigger coalescing. - LocMapI.setValue(NewLocNo); + LocMapI.setValue(OldLoc.changeLocNo(NewLocNo)); // Re-insert any removed OldLocNo ranges. if (LStart < LocMapI.start()) { - LocMapI.insert(LStart, LocMapI.start(), OldLocNo); + LocMapI.insert(LStart, LocMapI.start(), OldLoc); ++LocMapI; assert(LocMapI.valid() && "Unexpected coalescing"); } if (LStop > LocMapI.stop()) { ++LocMapI; - LocMapI.insert(LII->end, LStop, OldLocNo); + LocMapI.insert(LII->end, LStop, OldLoc); --LocMapI; } } @@ -892,14 +940,14 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs, locations.erase(locations.begin() + OldLocNo); LocMapI.goToBegin(); while (LocMapI.valid()) { - unsigned v = LocMapI.value(); - if (v == OldLocNo) { + DbgValueLocation v = LocMapI.value(); + if (v.locNo() == OldLocNo) { DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';' << LocMapI.stop() << ")\n"); LocMapI.erase(); } else { - if (v > OldLocNo) - LocMapI.setValueUnchecked(v-1); + if (v.locNo() > OldLocNo) + LocMapI.setValueUnchecked(v.changeLocNo(v.locNo() - 1)); ++LocMapI; } } @@ -1003,14 +1051,14 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI, // DBG_VALUE intervals with different vregs that were allocated to the same // physical register. for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { - unsigned NewLocNo = LocNoMap[I.value()]; - I.setValueUnchecked(NewLocNo); + DbgValueLocation Loc = I.value(); + unsigned NewLocNo = LocNoMap[Loc.locNo()]; + I.setValueUnchecked(Loc.changeLocNo(NewLocNo)); I.setStart(I.start()); } } -/// findInsertLocation - Find an iterator for inserting a DBG_VALUE -/// instruction. +/// Find an iterator for inserting a DBG_VALUE instruction. static MachineBasicBlock::iterator findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, LiveIntervals &LIS) { @@ -1033,12 +1081,42 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, std::next(MachineBasicBlock::iterator(MI)); } -void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, - unsigned LocNo, bool Spilled, +/// Find an iterator for inserting the next DBG_VALUE instruction +/// (or end if no more insert locations found). +static MachineBasicBlock::iterator +findNextInsertLocation(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + SlotIndex StopIdx, MachineOperand &LocMO, + LiveIntervals &LIS, + const TargetRegisterInfo &TRI) { + if (!LocMO.isReg()) + return MBB->instr_end(); + unsigned Reg = LocMO.getReg(); + + // Find the next instruction in the MBB that define the register Reg. + while (I != MBB->end()) { + if (!LIS.isNotInMIMap(*I) && + SlotIndex::isEarlierEqualInstr(StopIdx, LIS.getInstructionIndex(*I))) + break; + if (I->definesRegister(Reg, &TRI)) + // The insert location is directly after the instruction/bundle. + return std::next(I); + ++I; + } + return MBB->end(); +} + +void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx, + SlotIndex StopIdx, + DbgValueLocation Loc, bool Spilled, LiveIntervals &LIS, - const TargetInstrInfo &TII) { - MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); - MachineOperand &Loc = locations[LocNo]; + const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI) { + SlotIndex MBBEndIdx = LIS.getMBBEndIdx(&*MBB); + // Only search within the current MBB. + StopIdx = (MBBEndIdx < StopIdx) ? MBBEndIdx : StopIdx; + MachineBasicBlock::iterator I = findInsertLocation(MBB, StartIdx, LIS); + MachineOperand &MO = locations[Loc.locNo()]; ++NumInsertedDebugValues; assert(cast(Variable) @@ -1048,34 +1126,43 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, // If the location was spilled, the new DBG_VALUE will be indirect. If the // original DBG_VALUE was indirect, we need to add DW_OP_deref to indicate // that the original virtual register was a pointer. - bool NewIndirect = IsIndirect || Spilled; const DIExpression *Expr = Expression; - if (Spilled && IsIndirect) - Expr = DIExpression::prepend(Expr, DIExpression::WithDeref); + bool IsIndirect = Loc.wasIndirect(); + if (Spilled) { + if (IsIndirect) + Expr = DIExpression::prepend(Expr, DIExpression::WithDeref); + IsIndirect = true; + } - assert((!Spilled || Loc.isFI()) && - "a spilled location must be a frame index"); + assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index"); - MachineInstrBuilder MIB = + do { + MachineInstrBuilder MIB = BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) - .add(Loc); - if (NewIndirect) - MIB.addImm(0U); - else - MIB.addReg(0U, RegState::Debug); - MIB.addMetadata(Variable).addMetadata(Expr); + .add(MO); + if (IsIndirect) + MIB.addImm(0U); + else + MIB.addReg(0U, RegState::Debug); + MIB.addMetadata(Variable).addMetadata(Expr); + + // Continue and insert DBG_VALUES after every redefinition of register + // associated with the debug value within the range + I = findNextInsertLocation(MBB, I, StopIdx, MO, LIS, TRI); + } while (I != MBB->end()); } void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI, const BitVector &SpilledLocations) { MachineFunction::iterator MFEnd = VRM->getMachineFunction().end(); for (LocMap::const_iterator I = locInts.begin(); I.valid();) { SlotIndex Start = I.start(); SlotIndex Stop = I.stop(); - unsigned LocNo = I.value(); - bool Spilled = LocNo != UndefLocNo ? SpilledLocations.test(LocNo) : false; + DbgValueLocation Loc = I.value(); + bool Spilled = !Loc.isUndef() ? SpilledLocations.test(Loc.locNo()) : false; // If the interval start was trimmed to the lexical scope insert the // DBG_VALUE at the previous index (otherwise it appears after the @@ -1083,22 +1170,22 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, if (trimmedDefs.count(Start)) Start = Start.getPrevIndex(); - DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo); + DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo()); MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); - insertDebugValue(&*MBB, Start, LocNo, Spilled, LIS, TII); + insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI); // This interval may span multiple basic blocks. // Insert a DBG_VALUE into each one. - while(Stop > MBBEnd) { + while (Stop > MBBEnd) { // Move to the next block. Start = MBBEnd; if (++MBB == MFEnd) break; MBBEnd = LIS.getMBBEndIdx(&*MBB); DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); - insertDebugValue(&*MBB, Start, LocNo, Spilled, LIS, TII); + insertDebugValue(&*MBB, Start, Stop, Loc, Spilled, LIS, TII, TRI); } DEBUG(dbgs() << '\n'); if (MBB == MFEnd) @@ -1117,7 +1204,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { for (unsigned i = 0, e = userValues.size(); i != e; ++i) { DEBUG(userValues[i]->print(dbgs(), TRI)); userValues[i]->rewriteLocations(*VRM, *TRI, SpilledLocations); - userValues[i]->emitDebugValues(VRM, *LIS, *TII, SpilledLocations); + userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpilledLocations); } EmitDone = true; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 0e240f482a19a..911d8f04433b8 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -824,7 +824,13 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { float LiveIntervals::getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, const MachineInstr &MI) { - BlockFrequency Freq = MBFI->getBlockFreq(MI.getParent()); + return getSpillWeight(isDef, isUse, MBFI, MI.getParent()); +} + +float LiveIntervals::getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineBasicBlock *MBB) { + BlockFrequency Freq = MBFI->getBlockFreq(MBB); const float Scale = 1.0f / MBFI->getEntryFreq(); return (isDef + isUse) * (Freq.getFrequency() * Scale); } diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index b109f1922a3ec..2eab0376da2fb 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -14,29 +14,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include using namespace llvm; @@ -47,6 +48,7 @@ STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated"); STATISTIC(NumReplacements, "Number of frame indices references replaced"); namespace { + class FrameRef { MachineBasicBlock::iterator MI; // Instr referencing the frame int64_t LocalOffset; // Local offset of the frame idx referenced @@ -72,9 +74,10 @@ namespace { }; class LocalStackSlotPass: public MachineFunctionPass { - SmallVector LocalOffsets; + SmallVector LocalOffsets; + /// StackObjSet - A set of stack object indexes - typedef SmallSetVector StackObjSet; + using StackObjSet = SmallSetVector; void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset, bool StackGrowsDown, unsigned &MaxAlign); @@ -84,11 +87,14 @@ namespace { int64_t &Offset, unsigned &MaxAlign); void calculateFrameObjectOffsets(MachineFunction &Fn); bool insertFrameReferenceRegisters(MachineFunction &Fn); + public: static char ID; // Pass identification, replacement for typeid + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry()); } + bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -96,20 +102,20 @@ namespace { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } - - private: }; + } // end anonymous namespace char LocalStackSlotPass::ID = 0; + char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; + INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) - bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); @@ -178,7 +184,6 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs MachineFrameInfo &MFI, bool StackGrowsDown, int64_t &Offset, unsigned &MaxAlign) { - for (StackObjSet::const_iterator I = UnassignedObjs.begin(), E = UnassignedObjs.end(); I != E; ++I) { int i = *I; @@ -189,7 +194,6 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. -/// void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { // Loop over all of the stack objects, assigning sequential addresses... MachineFrameInfo &MFI = Fn.getFrameInfo(); @@ -397,7 +401,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { continue; } - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); BaseReg = Fn.getRegInfo().createVirtualRegister(RC); diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp index 5df8dbce36a4c..c91255f959283 100644 --- a/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/lib/CodeGen/MIRParser/MIRParser.cpp @@ -120,7 +120,7 @@ class MIRParserImpl { bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS, std::vector &CSIInfo, const yaml::StringValue &RegisterSource, - int FrameIdx); + bool IsRestored, int FrameIdx); bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS, const yaml::MachineStackObject &Object, @@ -214,6 +214,9 @@ void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) { case SourceMgr::DK_Note: Kind = DS_Note; break; + case SourceMgr::DK_Remark: + llvm_unreachable("remark unexpected"); + break; } Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag)); } @@ -595,7 +598,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, Twine("redefinition of fixed stack object '%fixed-stack.") + Twine(Object.ID.Value) + "'"); if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister, - ObjectIdx)) + Object.CalleeSavedRestored, ObjectIdx)) return true; } @@ -628,7 +631,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, Twine("redefinition of stack object '%stack.") + Twine(Object.ID.Value) + "'"); if (parseCalleeSavedRegister(PFS, CSIInfo, Object.CalleeSavedRegister, - ObjectIdx)) + Object.CalleeSavedRestored, ObjectIdx)) return true; if (Object.LocalOffset) MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue()); @@ -653,14 +656,16 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS, bool MIRParserImpl::parseCalleeSavedRegister(PerFunctionMIParsingState &PFS, std::vector &CSIInfo, - const yaml::StringValue &RegisterSource, int FrameIdx) { + const yaml::StringValue &RegisterSource, bool IsRestored, int FrameIdx) { if (RegisterSource.Value.empty()) return false; unsigned Reg = 0; SMDiagnostic Error; if (parseNamedRegisterReference(PFS, Reg, RegisterSource.Value, Error)) return error(Error, RegisterSource.SourceRange); - CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx)); + CalleeSavedInfo CSI(Reg, FrameIdx); + CSI.setRestored(IsRestored); + CSIInfo.push_back(CSI); return false; } diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index 7650d6346803d..f8da8d32d6acf 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -270,6 +270,28 @@ static void printCustomRegMask(const uint32_t *RegMask, raw_ostream &OS, OS << ')'; } +static void printRegClassOrBank(unsigned Reg, raw_ostream &OS, + const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI) { + if (RegInfo.getRegClassOrNull(Reg)) + OS << StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); + else if (RegInfo.getRegBankOrNull(Reg)) + OS << StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower(); + else { + OS << "_"; + assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) && + "Generic registers must have a valid type"); + } +} + +static void printRegClassOrBank(unsigned Reg, yaml::StringValue &Dest, + const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI) { + raw_string_ostream OS(Dest.Value); + printRegClassOrBank(Reg, OS, RegInfo, TRI); +} + + void MIRPrinter::convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo, const TargetRegisterInfo *TRI) { @@ -280,16 +302,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, unsigned Reg = TargetRegisterInfo::index2VirtReg(I); yaml::VirtualRegisterDefinition VReg; VReg.ID = I; - if (RegInfo.getRegClassOrNull(Reg)) - VReg.Class = - StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); - else if (RegInfo.getRegBankOrNull(Reg)) - VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower(); - else { - VReg.Class = std::string("_"); - assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) && - "Generic registers must have a valid type"); - } + printRegClassOrBank(Reg, VReg.Class, RegInfo, TRI); unsigned PreferredReg = RegInfo.getSimpleHint(Reg); if (PreferredReg) printReg(PreferredReg, VReg.PreferredRegister, TRI); @@ -297,11 +310,11 @@ void MIRPrinter::convert(yaml::MachineFunction &MF, } // Print the live ins. - for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) { + for (std::pair LI : RegInfo.liveins()) { yaml::MachineFunctionLiveIn LiveIn; - printReg(I->first, LiveIn.Register, TRI); - if (I->second) - printReg(I->second, LiveIn.VirtualRegister, TRI); + printReg(LI.first, LiveIn.Register, TRI); + if (LI.second) + printReg(LI.second, LiveIn.VirtualRegister, TRI); MF.LiveIns.push_back(LiveIn); } @@ -407,10 +420,15 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF, assert(StackObjectInfo != StackObjectOperandMapping.end() && "Invalid stack object index"); const FrameIndexOperand &StackObject = StackObjectInfo->second; - if (StackObject.IsFixed) + if (StackObject.IsFixed) { YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; - else + YMF.FixedStackObjects[StackObject.ID].CalleeSavedRestored = + CSInfo.isRestored(); + } else { YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; + YMF.StackObjects[StackObject.ID].CalleeSavedRestored = + CSInfo.isRestored(); + } } for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) { auto LocalObject = MFI.getLocalFrameObjectMap(I); @@ -698,7 +716,7 @@ static LLT getTypeToPrint(const MachineInstr &MI, unsigned OpIdx, } void MIPrinter::print(const MachineInstr &MI) { - const auto *MF = MI.getParent()->getParent(); + const auto *MF = MI.getMF(); const auto &MRI = MF->getRegInfo(); const auto &SubTarget = MF->getSubtarget(); const auto *TRI = SubTarget.getRegisterInfo(); @@ -849,8 +867,7 @@ static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { void MIPrinter::printTargetFlags(const MachineOperand &Op) { if (!Op.getTargetFlags()) return; - const auto *TII = - Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo(); + const auto *TII = Op.getParent()->getMF()->getSubtarget().getInstrInfo(); assert(TII && "expected instruction info"); auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags()); OS << "target-flags("; @@ -911,7 +928,8 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, bool IsDef) { printTargetFlags(Op); switch (Op.getType()) { - case MachineOperand::MO_Register: + case MachineOperand::MO_Register: { + unsigned Reg = Op.getReg(); if (Op.isImplicit()) OS << (Op.isDef() ? "implicit-def " : "implicit "); else if (!IsDef && Op.isDef()) @@ -929,15 +947,23 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, OS << "early-clobber "; if (Op.isDebug()) OS << "debug-use "; - printReg(Op.getReg(), OS, TRI); + printReg(Reg, OS, TRI); // Print the sub register. if (Op.getSubReg() != 0) OS << '.' << TRI->getSubRegIndexName(Op.getSubReg()); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const MachineRegisterInfo &MRI = Op.getParent()->getMF()->getRegInfo(); + if (IsDef || MRI.def_empty(Reg)) { + OS << ':'; + printRegClassOrBank(Reg, OS, MRI, TRI); + } + } if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef()) OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")"; if (TypeToPrint.isValid()) OS << '(' << TypeToPrint << ')'; break; + } case MachineOperand::MO_Immediate: OS << Op.getImm(); break; @@ -959,8 +985,8 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, break; case MachineOperand::MO_TargetIndex: OS << "target-index("; - if (const auto *Name = getTargetIndexName( - *Op.getParent()->getParent()->getParent(), Op.getIndex())) + if (const auto *Name = + getTargetIndexName(*Op.getParent()->getMF(), Op.getIndex())) OS << Name; else OS << ""; @@ -1024,7 +1050,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, OS << ""; break; case MachineOperand::MO_CFIIndex: { - const MachineFunction &MF = *Op.getParent()->getParent()->getParent(); + const MachineFunction &MF = *Op.getParent()->getMF(); print(MF.getFrameInstructions()[Op.getCFIIndex()], TRI); break; } @@ -1033,7 +1059,7 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, if (ID < Intrinsic::num_intrinsics) OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')'; else { - const MachineFunction &MF = *Op.getParent()->getParent()->getParent(); + const MachineFunction &MF = *Op.getParent()->getMF(); const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo(); OS << "intrinsic(@" << TII->getName(ID) << ')'; } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 81597afe6b02b..d5758da0464c4 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -111,7 +111,7 @@ void ilist_traits::removeNodeFromList(MachineInstr *N) { assert(N->getParent() && "machine instruction not in a basic block"); // Remove from the use/def lists. - if (MachineFunction *MF = N->getParent()->getParent()) + if (MachineFunction *MF = N->getMF()) N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); N->setParent(nullptr); diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index f135cf715936b..c5991332f088b 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -2233,6 +2233,10 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) { // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate // branches by placing an exit edge at the bottom. + // + // Loops are processed innermost to uttermost, make sure we clear + // PreferredLoopExit before processing a new loop. + PreferredLoopExit = nullptr; if (!RotateLoopWithProfile && LoopTop == L.getHeader()) PreferredLoopExit = findBestLoopExit(L, LoopBlockSet); diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp index d563370dd4fe2..3ffef68233434 100644 --- a/lib/CodeGen/MachineCombiner.cpp +++ b/lib/CodeGen/MachineCombiner.cpp @@ -415,7 +415,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { bool IncrementalUpdate = false; auto BlockIter = MBB->begin(); - auto LastUpdate = BlockIter; + decltype(BlockIter) LastUpdate; // Check if the block is in a loop. const MachineLoop *ML = MLI->getLoopFor(MBB); if (!MinInstr) @@ -503,9 +503,11 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { InstrIdxForVirtReg, P, !IncrementalUpdate) && preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs)) { - if (MBB->size() > inc_threshold) + if (MBB->size() > inc_threshold) { // Use incremental depth updates for basic blocks above treshold IncrementalUpdate = true; + LastUpdate = BlockIter; + } insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, RegUnits, IncrementalUpdate); diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 4655b5ba7044d..61f56fffc8870 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -286,7 +286,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // it's no longer available for copy propagation. RegList &DestList = SrcMap[Src]; if (!is_contained(DestList, Def)) - DestList.push_back(Def); + DestList.push_back(Def); continue; } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index efd4bd00a45a0..250a10c7d0768 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -1,4 +1,4 @@ -//===-- MachineFunction.cpp -----------------------------------------------===// +//===- MachineFunction.cpp ------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -14,45 +14,76 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/IR/Value.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "codegen" static cl::opt - AlignAllFunctions("align-all-functions", - cl::desc("Force the alignment of all functions."), - cl::init(0), cl::Hidden); +AlignAllFunctions("align-all-functions", + cl::desc("Force the alignment of all functions."), + cl::init(0), cl::Hidden); static const char *getPropertyName(MachineFunctionProperties::Property Prop) { - typedef MachineFunctionProperties::Property P; + using P = MachineFunctionProperties::Property; + switch(Prop) { case P::FailedISel: return "FailedISel"; case P::IsSSA: return "IsSSA"; @@ -81,7 +112,7 @@ void MachineFunctionProperties::print(raw_ostream &OS) const { //===----------------------------------------------------------------------===// // Out-of-line virtual method. -MachineFunctionInfo::~MachineFunctionInfo() {} +MachineFunctionInfo::~MachineFunctionInfo() = default; void ilist_alloc_traits::deleteNode(MachineBasicBlock *MBB) { MBB->getParent()->DeleteMachineBasicBlock(MBB); @@ -277,7 +308,7 @@ MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) { MachineInstr *FirstClone = nullptr; MachineBasicBlock::const_instr_iterator I = Orig.getIterator(); - for (;;) { + while (true) { MachineInstr *Cloned = CloneMachineInstr(&*I); MBB.insert(InsertBefore, Cloned); if (FirstClone == nullptr) { @@ -499,10 +530,10 @@ void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const { } namespace llvm { + template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { - - DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} static std::string getGraphName(const MachineFunction *F) { return ("CFG for '" + F->getName() + "' function").str(); @@ -533,7 +564,8 @@ namespace llvm { return OutStr; } }; -} + +} // end namespace llvm void MachineFunction::viewCFG() const { @@ -886,12 +918,11 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const { LLVM_DUMP_METHOD void MachineJumpTableInfo::dump() const { print(dbgs()); } #endif - //===----------------------------------------------------------------------===// // MachineConstantPool implementation //===----------------------------------------------------------------------===// -void MachineConstantPoolValue::anchor() { } +void MachineConstantPoolValue::anchor() {} Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 66de99156b4e4..bb2dda980e418 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -311,7 +311,7 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { return true; // Calculate the size of the RegMask - const MachineFunction *MF = getParent()->getParent()->getParent(); + const MachineFunction *MF = getParent()->getMF(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32; @@ -1055,7 +1055,7 @@ MachineInstr::mergeMemRefsWith(const MachineInstr& Other) { if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs)) return std::make_pair(nullptr, 0); - MachineFunction *MF = getParent()->getParent(); + MachineFunction *MF = getMF(); mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs); mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(), MemBegin); @@ -1129,9 +1129,9 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, if (Check == IgnoreDefs) continue; else if (Check == IgnoreVRegDefs) { - if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || - TargetRegisterInfo::isPhysicalRegister(OMO.getReg())) - if (MO.getReg() != OMO.getReg()) + if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()) || + !TargetRegisterInfo::isVirtualRegister(OMO.getReg())) + if (!MO.isIdenticalTo(OMO)) return false; } else { if (!MO.isIdenticalTo(OMO)) @@ -1154,6 +1154,10 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other, return true; } +const MachineFunction *MachineInstr::getMF() const { + return getParent()->getParent(); +} + MachineInstr *MachineInstr::removeFromParent() { assert(getParent() && "Not embedded in a basic block!"); return getParent()->remove(this); @@ -1303,8 +1307,8 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { assert(getParent() && "Can't have an MBB reference here!"); - assert(getParent()->getParent() && "Can't have an MF reference here!"); - const MachineFunction &MF = *getParent()->getParent(); + assert(getMF() && "Can't have an MF reference here!"); + const MachineFunction &MF = *getMF(); // Most opcodes have fixed constraints in their MCInstrDesc. if (!isInlineAsm()) @@ -1665,7 +1669,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other, bool UseTBAA) { - const MachineFunction *MF = getParent()->getParent(); + const MachineFunction *MF = getMF(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const MachineFrameInfo &MFI = MF->getFrameInfo(); diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index f83248d1ebf59..efb5c3371de2b 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -917,8 +917,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // However, if the physreg is known to always be caller saved/restored // then this use is safe to hoist. if (!MRI->isConstantPhysReg(Reg) && - !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent()))) - return false; + !(TRI->isCallerPreservedPhysReg(Reg, *I.getMF()))) + return false; // Otherwise it's safe to move. continue; } else if (!MO.isDead()) { @@ -1191,7 +1191,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { &LoadRegIndex); if (NewOpc == 0) return nullptr; const MCInstrDesc &MID = TII->get(NewOpc); - MachineFunction &MF = *MI->getParent()->getParent(); + MachineFunction &MF = *MI->getMF(); const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = MRI->createVirtualRegister(RC); diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp index 22d519e5d88fa..a29fbc2852860 100644 --- a/lib/CodeGen/MachineModuleInfoImpls.cpp +++ b/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===// +//===- llvm/CodeGen/MachineModuleInfoImpls.cpp ----------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,7 +13,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/MC/MCSymbol.h" +#include +#include + using namespace llvm; //===----------------------------------------------------------------------===// @@ -25,7 +29,8 @@ void MachineModuleInfoMachO::anchor() {} void MachineModuleInfoELF::anchor() {} static int SortSymbolPair(const void *LHS, const void *RHS) { - typedef std::pair PairTy; + using PairTy = std::pair; + const MCSymbol *LHSS = ((const PairTy *)LHS)->first; const MCSymbol *RHSS = ((const PairTy *)RHS)->first; return LHSS->getName().compare(RHSS->getName()); @@ -41,4 +46,3 @@ MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs( Map.clear(); return List; } - diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp index 73c3428a6e535..ecc569dab8357 100644 --- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp +++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp @@ -60,15 +60,7 @@ void MachineOptimizationRemarkEmitter::emit( return; } - yaml::Output *Out = Ctx.getDiagnosticsOutputFile(); - if (Out) { - auto *P = &const_cast(OptDiagCommon); - *Out << P; - } - // FIXME: now that IsVerbose is part of DI, filtering for this will be moved - // from here to clang. - if (!OptDiag.isVerbose() || shouldEmitVerbose()) - Ctx.diagnose(OptDiag); + Ctx.diagnose(OptDiag); } MachineOptimizationRemarkEmitterPass::MachineOptimizationRemarkEmitterPass() diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp index 38aea4fdc98a6..1bc869e02e646 100644 --- a/lib/CodeGen/MachineOutliner.cpp +++ b/lib/CodeGen/MachineOutliner.cpp @@ -92,23 +92,33 @@ namespace { /// \brief An individual sequence of instructions to be replaced with a call to /// an outlined function. struct Candidate { - - /// Set to false if the candidate overlapped with another candidate. - bool InCandidateList = true; - - /// The start index of this \p Candidate. +private: + /// The start index of this \p Candidate in the instruction list. unsigned StartIdx; /// The number of instructions in this \p Candidate. unsigned Len; - /// The index of this \p Candidate's \p OutlinedFunction in the list of +public: + /// Set to false if the candidate overlapped with another candidate. + bool InCandidateList = true; + + /// \brief The index of this \p Candidate's \p OutlinedFunction in the list of /// \p OutlinedFunctions. unsigned FunctionIdx; /// Contains all target-specific information for this \p Candidate. TargetInstrInfo::MachineOutlinerInfo MInfo; + /// Return the number of instructions in this Candidate. + unsigned getLength() const { return Len; } + + /// Return the start index of this candidate. + unsigned getStartIdx() const { return StartIdx; } + + // Return the end index of this candidate. + unsigned getEndIdx() const { return StartIdx + Len - 1; } + /// \brief The number of instructions that would be saved by outlining every /// candidate of this type. /// @@ -125,13 +135,22 @@ struct Candidate { /// \brief Used to ensure that \p Candidates are outlined in an order that /// preserves the start and end indices of other \p Candidates. - bool operator<(const Candidate &RHS) const { return StartIdx > RHS.StartIdx; } + bool operator<(const Candidate &RHS) const { + return getStartIdx() > RHS.getStartIdx(); + } }; /// \brief The information necessary to create an outlined function for some /// class of candidate. struct OutlinedFunction { +private: + /// The number of candidates for this \p OutlinedFunction. + unsigned OccurrenceCount = 0; + +public: + std::vector> Candidates; + /// The actual outlined function created. /// This is initialized after we go through and create the actual function. MachineFunction *MF = nullptr; @@ -139,24 +158,45 @@ struct OutlinedFunction { /// A number assigned to this function which appears at the end of its name. unsigned Name; - /// The number of candidates for this OutlinedFunction. - unsigned OccurrenceCount = 0; - /// \brief The sequence of integers corresponding to the instructions in this /// function. std::vector Sequence; - /// The number of instructions this function would save. - unsigned Benefit = 0; - /// Contains all target-specific information for this \p OutlinedFunction. TargetInstrInfo::MachineOutlinerInfo MInfo; + /// Return the number of candidates for this \p OutlinedFunction. + unsigned getOccurrenceCount() { return OccurrenceCount; } + + /// Decrement the occurrence count of this OutlinedFunction and return the + /// new count. + unsigned decrement() { + assert(OccurrenceCount > 0 && "Can't decrement an empty function!"); + OccurrenceCount--; + return getOccurrenceCount(); + } + + /// \brief Return the number of instructions it would take to outline this + /// function. + unsigned getOutliningCost() { + return (OccurrenceCount * MInfo.CallOverhead) + Sequence.size() + + MInfo.FrameOverhead; + } + + /// \brief Return the number of instructions that would be saved by outlining + /// this function. + unsigned getBenefit() { + unsigned NotOutlinedCost = OccurrenceCount * Sequence.size(); + unsigned OutlinedCost = getOutliningCost(); + return (NotOutlinedCost < OutlinedCost) ? 0 + : NotOutlinedCost - OutlinedCost; + } + OutlinedFunction(unsigned Name, unsigned OccurrenceCount, - const std::vector &Sequence, unsigned Benefit, + const std::vector &Sequence, TargetInstrInfo::MachineOutlinerInfo &MInfo) - : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence), - Benefit(Benefit), MInfo(MInfo) {} + : OccurrenceCount(OccurrenceCount), Name(Name), Sequence(Sequence), + MInfo(MInfo) {} }; /// Represents an undefined index in the suffix tree. @@ -733,6 +773,10 @@ struct MachineOutliner : public ModulePass { static char ID; + /// \brief Set to true if the outliner should consider functions with + /// linkonceodr linkage. + bool OutlineFromLinkOnceODRs = false; + StringRef getPassName() const override { return "Machine Outliner"; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -742,7 +786,8 @@ struct MachineOutliner : public ModulePass { ModulePass::getAnalysisUsage(AU); } - MachineOutliner() : ModulePass(ID) { + MachineOutliner(bool OutlineFromLinkOnceODRs = false) + : ModulePass(ID), OutlineFromLinkOnceODRs(OutlineFromLinkOnceODRs) { initializeMachineOutlinerPass(*PassRegistry::getPassRegistry()); } @@ -764,10 +809,11 @@ struct MachineOutliner : public ModulePass { /// type of candidate. /// /// \returns The length of the longest candidate found. - unsigned findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, - InstructionMapper &Mapper, - std::vector &CandidateList, - std::vector &FunctionList); + unsigned + findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, + InstructionMapper &Mapper, + std::vector> &CandidateList, + std::vector &FunctionList); /// \brief Replace the sequences of instructions represented by the /// \p Candidates in \p CandidateList with calls to \p MachineFunctions @@ -777,7 +823,8 @@ struct MachineOutliner : public ModulePass { /// \param CandidateList A list of candidates to be outlined. /// \param FunctionList A list of functions to be inserted into the module. /// \param Mapper Contains the instruction mappings for the module. - bool outline(Module &M, const ArrayRef &CandidateList, + bool outline(Module &M, + const ArrayRef> &CandidateList, std::vector &FunctionList, InstructionMapper &Mapper); @@ -798,10 +845,15 @@ struct MachineOutliner : public ModulePass { /// \param TII TargetInstrInfo for the module. /// /// \returns The length of the longest candidate found. 0 if there are none. - unsigned buildCandidateList(std::vector &CandidateList, - std::vector &FunctionList, - SuffixTree &ST, InstructionMapper &Mapper, - const TargetInstrInfo &TII); + unsigned + buildCandidateList(std::vector> &CandidateList, + std::vector &FunctionList, + SuffixTree &ST, InstructionMapper &Mapper, + const TargetInstrInfo &TII); + + /// Helper function for pruneOverlaps. + /// Removes \p C from the candidate list, and updates its \p OutlinedFunction. + void prune(Candidate &C, std::vector &FunctionList); /// \brief Remove any overlapping candidates that weren't handled by the /// suffix tree's pruning method. @@ -816,7 +868,7 @@ struct MachineOutliner : public ModulePass { /// \param Mapper Contains instruction mapping info for outlining. /// \param MaxCandidateLen The length of the longest candidate. /// \param TII TargetInstrInfo for the module. - void pruneOverlaps(std::vector &CandidateList, + void pruneOverlaps(std::vector> &CandidateList, std::vector &FunctionList, InstructionMapper &Mapper, unsigned MaxCandidateLen, const TargetInstrInfo &TII); @@ -831,20 +883,21 @@ struct MachineOutliner : public ModulePass { char MachineOutliner::ID = 0; namespace llvm { -ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); } +ModulePass *createMachineOutlinerPass(bool OutlineFromLinkOnceODRs) { + return new MachineOutliner(OutlineFromLinkOnceODRs); +} + } // namespace llvm INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) -unsigned -MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, - InstructionMapper &Mapper, - std::vector &CandidateList, - std::vector &FunctionList) { +unsigned MachineOutliner::findCandidates( + SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper, + std::vector> &CandidateList, + std::vector &FunctionList) { CandidateList.clear(); FunctionList.clear(); - unsigned FnIdx = 0; unsigned MaxLen = 0; // FIXME: Visit internal nodes instead of leaves. @@ -891,7 +944,8 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, MachineBasicBlock::iterator EndIt = Mapper.InstrList[M->SuffixIdx + StringLen - 1]; - CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, FnIdx); + CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, + FunctionList.size()); RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt)); // Never visit this leaf again. @@ -899,16 +953,20 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, } } - unsigned SequenceOverhead = StringLen; + // We've found something we might want to outline. + // Create an OutlinedFunction to store it and check if it'd be beneficial + // to outline. TargetInstrInfo::MachineOutlinerInfo MInfo = TII.getOutlininingCandidateInfo(RepeatedSequenceLocs); - - unsigned OutliningCost = - (MInfo.CallOverhead * Parent.OccurrenceCount) + MInfo.FrameOverhead; - unsigned NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount; + std::vector Seq; + for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) + Seq.push_back(ST.Str[i]); + OutlinedFunction OF(FunctionList.size(), Parent.OccurrenceCount, Seq, + MInfo); + unsigned Benefit = OF.getBenefit(); // Is it better to outline this candidate than not? - if (NotOutliningCost <= OutliningCost) { + if (Benefit < 1) { // Outlining this candidate would take more instructions than not // outlining. // Emit a remark explaining why we didn't outline this candidate. @@ -916,67 +974,101 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, RepeatedSequenceLocs[0]; MachineOptimizationRemarkEmitter MORE( *(C.first->getParent()->getParent()), nullptr); - MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper", - C.first->getDebugLoc(), - C.first->getParent()); - R << "Did not outline " << NV("Length", StringLen) << " instructions" - << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size()) - << " locations." - << " Instructions from outlining all occurrences (" - << NV("OutliningCost", OutliningCost) << ")" - << " >= Unoutlined instruction count (" - << NV("NotOutliningCost", NotOutliningCost) << ")" - << " (Also found at: "; - - // Tell the user the other places the candidate was found. - for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) { - R << NV((Twine("OtherStartLoc") + Twine(i)).str(), - RepeatedSequenceLocs[i].first->getDebugLoc()); - if (i != e - 1) - R << ", "; - } + MORE.emit([&]() { + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper", + C.first->getDebugLoc(), + C.first->getParent()); + R << "Did not outline " << NV("Length", StringLen) << " instructions" + << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size()) + << " locations." + << " Instructions from outlining all occurrences (" + << NV("OutliningCost", OF.getOutliningCost()) << ")" + << " >= Unoutlined instruction count (" + << NV("NotOutliningCost", StringLen * OF.getOccurrenceCount()) << ")" + << " (Also found at: "; + + // Tell the user the other places the candidate was found. + for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) { + R << NV((Twine("OtherStartLoc") + Twine(i)).str(), + RepeatedSequenceLocs[i].first->getDebugLoc()); + if (i != e - 1) + R << ", "; + } - R << ")"; - MORE.emit(R); + R << ")"; + return R; + }); // Move to the next candidate. continue; } - unsigned Benefit = NotOutliningCost - OutliningCost; - if (StringLen > MaxLen) MaxLen = StringLen; // At this point, the candidate class is seen as beneficial. Set their // benefit values and save them in the candidate list. + std::vector> CandidatesForFn; for (Candidate &C : CandidatesForRepeatedSeq) { C.Benefit = Benefit; C.MInfo = MInfo; - CandidateList.push_back(C); + std::shared_ptr Cptr = std::make_shared(C); + CandidateList.push_back(Cptr); + CandidatesForFn.push_back(Cptr); } - // Save the function for the new candidate sequence. - std::vector CandidateSequence; - for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) - CandidateSequence.push_back(ST.Str[i]); - - FunctionList.emplace_back(FnIdx, CandidatesForRepeatedSeq.size(), - CandidateSequence, Benefit, MInfo); + FunctionList.push_back(OF); + FunctionList.back().Candidates = CandidatesForFn; // Move to the next function. - FnIdx++; Parent.IsInTree = false; } return MaxLen; } -void MachineOutliner::pruneOverlaps(std::vector &CandidateList, - std::vector &FunctionList, - InstructionMapper &Mapper, - unsigned MaxCandidateLen, - const TargetInstrInfo &TII) { +// Remove C from the candidate space, and update its OutlinedFunction. +void MachineOutliner::prune(Candidate &C, + std::vector &FunctionList) { + // Get the OutlinedFunction associated with this Candidate. + OutlinedFunction &F = FunctionList[C.FunctionIdx]; + + // Update C's associated function's occurrence count. + F.decrement(); + + // Remove C from the CandidateList. + C.InCandidateList = false; + + DEBUG(dbgs() << "- Removed a Candidate \n"; + dbgs() << "--- Num fns left for candidate: " << F.getOccurrenceCount() + << "\n"; + dbgs() << "--- Candidate's functions's benefit: " << F.getBenefit() + << "\n";); +} + +void MachineOutliner::pruneOverlaps( + std::vector> &CandidateList, + std::vector &FunctionList, InstructionMapper &Mapper, + unsigned MaxCandidateLen, const TargetInstrInfo &TII) { + + // Return true if this candidate became unbeneficial for outlining in a + // previous step. + auto ShouldSkipCandidate = [&FunctionList, this](Candidate &C) { + + // Check if the candidate was removed in a previous step. + if (!C.InCandidateList) + return true; + + // C must be alive. Check if we should remove it. + if (FunctionList[C.FunctionIdx].getBenefit() < 1) { + prune(C, FunctionList); + return true; + } + + // C is in the list, and F is still beneficial. + return false; + }; + // TODO: Experiment with interval trees or other interval-checking structures // to lower the time complexity of this function. // TODO: Can we do better than the simple greedy choice? @@ -984,57 +1076,36 @@ void MachineOutliner::pruneOverlaps(std::vector &CandidateList, // This is O(MaxCandidateLen * CandidateList.size()). for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et; It++) { - Candidate &C1 = *It; - OutlinedFunction &F1 = FunctionList[C1.FunctionIdx]; + Candidate &C1 = **It; - // If we removed this candidate, skip it. - if (!C1.InCandidateList) + // If C1 was already pruned, or its function is no longer beneficial for + // outlining, move to the next candidate. + if (ShouldSkipCandidate(C1)) continue; - // Is it still worth it to outline C1? - if (F1.Benefit < 1 || F1.OccurrenceCount < 2) { - assert(F1.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F1.OccurrenceCount--; - C1.InCandidateList = false; - continue; - } - // The minimum start index of any candidate that could overlap with this // one. unsigned FarthestPossibleIdx = 0; // Either the index is 0, or it's at most MaxCandidateLen indices away. - if (C1.StartIdx > MaxCandidateLen) - FarthestPossibleIdx = C1.StartIdx - MaxCandidateLen; + if (C1.getStartIdx() > MaxCandidateLen) + FarthestPossibleIdx = C1.getStartIdx() - MaxCandidateLen; // Compare against the candidates in the list that start at at most // FarthestPossibleIdx indices away from C1. There are at most // MaxCandidateLen of these. for (auto Sit = It + 1; Sit != Et; Sit++) { - Candidate &C2 = *Sit; - OutlinedFunction &F2 = FunctionList[C2.FunctionIdx]; + Candidate &C2 = **Sit; // Is this candidate too far away to overlap? - if (C2.StartIdx < FarthestPossibleIdx) + if (C2.getStartIdx() < FarthestPossibleIdx) break; - // Did we already remove this candidate in a previous step? - if (!C2.InCandidateList) + // If C2 was already pruned, or its function is no longer beneficial for + // outlining, move to the next candidate. + if (ShouldSkipCandidate(C2)) continue; - // Is the function beneficial to outline? - if (F2.OccurrenceCount < 2 || F2.Benefit < 1) { - // If not, remove this candidate and move to the next one. - assert(F2.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F2.OccurrenceCount--; - C2.InCandidateList = false; - continue; - } - - unsigned C2End = C2.StartIdx + C2.Len - 1; - // Do C1 and C2 overlap? // // Not overlapping: @@ -1043,7 +1114,7 @@ void MachineOutliner::pruneOverlaps(std::vector &CandidateList, // We sorted our candidate list so C2Start <= C1Start. We know that // C2End > C2Start since each candidate has length >= 2. Therefore, all we // have to check is C2End < C2Start to see if we overlap. - if (C2End < C1.StartIdx) + if (C2.getEndIdx() < C1.getStartIdx()) continue; // C1 and C2 overlap. @@ -1051,65 +1122,25 @@ void MachineOutliner::pruneOverlaps(std::vector &CandidateList, // // Approximate this by picking the one which would have saved us the // most instructions before any pruning. - if (C1.Benefit >= C2.Benefit) { - - // C1 is better, so remove C2 and update C2's OutlinedFunction to - // reflect the removal. - assert(F2.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F2.OccurrenceCount--; - - // Remove the call overhead from the removed sequence. - F2.Benefit += C2.MInfo.CallOverhead; - // Add back one instance of the sequence. - if (F2.Sequence.size() > F2.Benefit) - F2.Benefit = 0; - else - F2.Benefit -= F2.Sequence.size(); - - C2.InCandidateList = false; - - DEBUG(dbgs() << "- Removed C2. \n"; - dbgs() << "--- Num fns left for C2: " << F2.OccurrenceCount - << "\n"; - dbgs() << "--- C2's benefit: " << F2.Benefit << "\n";); - - } else { - // C2 is better, so remove C1 and update C1's OutlinedFunction to - // reflect the removal. - assert(F1.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F1.OccurrenceCount--; - - // Remove the call overhead from the removed sequence. - F1.Benefit += C1.MInfo.CallOverhead; - - // Add back one instance of the sequence. - if (F1.Sequence.size() > F1.Benefit) - F1.Benefit = 0; - else - F1.Benefit -= F1.Sequence.size(); - - C1.InCandidateList = false; - - DEBUG(dbgs() << "- Removed C1. \n"; - dbgs() << "--- Num fns left for C1: " << F1.OccurrenceCount - << "\n"; - dbgs() << "--- C1's benefit: " << F1.Benefit << "\n";); - - // C1 is out, so we don't have to compare it against anyone else. + // Is C2 a better candidate? + if (C2.Benefit > C1.Benefit) { + // Yes, so prune C1. Since C1 is dead, we don't have to compare it + // against anything anymore, so break. + prune(C1, FunctionList); break; } + + // Prune C2 and move on to the next candidate. + prune(C2, FunctionList); } } } -unsigned -MachineOutliner::buildCandidateList(std::vector &CandidateList, - std::vector &FunctionList, - SuffixTree &ST, InstructionMapper &Mapper, - const TargetInstrInfo &TII) { +unsigned MachineOutliner::buildCandidateList( + std::vector> &CandidateList, + std::vector &FunctionList, SuffixTree &ST, + InstructionMapper &Mapper, const TargetInstrInfo &TII) { std::vector CandidateSequence; // Current outlining candidate. unsigned MaxCandidateLen = 0; // Length of the longest candidate. @@ -1120,7 +1151,10 @@ MachineOutliner::buildCandidateList(std::vector &CandidateList, // Sort the candidates in decending order. This will simplify the outlining // process when we have to remove the candidates from the mapping by // allowing us to cut them out without keeping track of an offset. - std::stable_sort(CandidateList.begin(), CandidateList.end()); + std::stable_sort( + CandidateList.begin(), CandidateList.end(), + [](const std::shared_ptr &LHS, + const std::shared_ptr &RHS) { return *LHS < *RHS; }); return MaxCandidateLen; } @@ -1179,15 +1213,14 @@ MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, return &MF; } -bool MachineOutliner::outline(Module &M, - const ArrayRef &CandidateList, - std::vector &FunctionList, - InstructionMapper &Mapper) { +bool MachineOutliner::outline( + Module &M, const ArrayRef> &CandidateList, + std::vector &FunctionList, InstructionMapper &Mapper) { bool OutlinedSomething = false; // Replace the candidates with calls to their respective outlined functions. - for (const Candidate &C : CandidateList) { - + for (const std::shared_ptr &Cptr : CandidateList) { + Candidate &C = *Cptr; // Was the candidate removed during pruneOverlaps? if (!C.InCandidateList) continue; @@ -1196,14 +1229,15 @@ bool MachineOutliner::outline(Module &M, OutlinedFunction &OF = FunctionList[C.FunctionIdx]; // Was its OutlinedFunction made unbeneficial during pruneOverlaps? - if (OF.OccurrenceCount < 2 || OF.Benefit < 1) + if (OF.getBenefit() < 1) continue; // If not, then outline it. - assert(C.StartIdx < Mapper.InstrList.size() && "Candidate out of bounds!"); - MachineBasicBlock *MBB = (*Mapper.InstrList[C.StartIdx]).getParent(); - MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.StartIdx]; - unsigned EndIdx = C.StartIdx + C.Len - 1; + assert(C.getStartIdx() < Mapper.InstrList.size() && + "Candidate out of bounds!"); + MachineBasicBlock *MBB = (*Mapper.InstrList[C.getStartIdx()]).getParent(); + MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.getStartIdx()]; + unsigned EndIdx = C.getEndIdx(); assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!"); MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; @@ -1214,6 +1248,37 @@ bool MachineOutliner::outline(Module &M, // Does this candidate have a function yet? if (!OF.MF) { OF.MF = createOutlinedFunction(M, OF, Mapper); + MachineBasicBlock *MBB = &*OF.MF->begin(); + + // Output a remark telling the user that an outlined function was created, + // and explaining where it came from. + MachineOptimizationRemarkEmitter MORE(*OF.MF, nullptr); + MachineOptimizationRemark R(DEBUG_TYPE, "OutlinedFunction", + MBB->findDebugLoc(MBB->begin()), MBB); + R << "Saved " << NV("OutliningBenefit", OF.getBenefit()) + << " instructions by " + << "outlining " << NV("Length", OF.Sequence.size()) << " instructions " + << "from " << NV("NumOccurrences", OF.getOccurrenceCount()) + << " locations. " + << "(Found at: "; + + // Tell the user the other places the candidate was found. + for (size_t i = 0, e = OF.Candidates.size(); i < e; i++) { + + // Skip over things that were pruned. + if (!OF.Candidates[i]->InCandidateList) + continue; + + R << NV( + (Twine("StartLoc") + Twine(i)).str(), + Mapper.InstrList[OF.Candidates[i]->getStartIdx()]->getDebugLoc()); + if (i != e - 1) + R << ", "; + } + + R << ")"; + + MORE.emit(R); FunctionsCreated++; } @@ -1223,7 +1288,7 @@ bool MachineOutliner::outline(Module &M, // Insert a call to the new function and erase the old sequence. TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.MInfo); - StartIt = Mapper.InstrList[C.StartIdx]; + StartIt = Mapper.InstrList[C.getStartIdx()]; MBB->erase(StartIt, EndIt); OutlinedSomething = true; @@ -1256,7 +1321,8 @@ bool MachineOutliner::runOnModule(Module &M) { MachineFunction &MF = MMI.getOrCreateMachineFunction(F); // Is the function empty? Safe to outline from? - if (F.empty() || !TII->isFunctionSafeToOutlineFrom(MF)) + if (F.empty() || + !TII->isFunctionSafeToOutlineFrom(MF, OutlineFromLinkOnceODRs)) continue; // If it is, look at each MachineBasicBlock in the function. @@ -1273,7 +1339,7 @@ bool MachineOutliner::runOnModule(Module &M) { // Construct a suffix tree, use it to find candidates, and then outline them. SuffixTree ST(Mapper.UnsignedVec); - std::vector CandidateList; + std::vector> CandidateList; std::vector FunctionList; // Find all of the outlining candidates. diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp index 91e1257ba4dd8..c852c2e1564f9 100644 --- a/lib/CodeGen/MachinePipeliner.cpp +++ b/lib/CodeGen/MachinePipeliner.cpp @@ -369,8 +369,9 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs { /// Set the Minimum Initiation Interval for this schedule attempt. void setMII(unsigned mii) { MII = mii; } - MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule, - bool UpdateDAG = false); + void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); + + void fixupRegisterOverlaps(std::deque &Instrs); /// Return the new base register that was stored away for the changed /// instruction. @@ -3353,7 +3354,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI, unsigned BaseReg = MI->getOperand(BasePosLd).getReg(); // Look for the Phi instruction. - MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); MachineInstr *Phi = MRI.getVRegDef(BaseReg); if (!Phi || !Phi->isPHI()) return false; @@ -3390,9 +3391,8 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI, /// Apply changes to the instruction if needed. The changes are need /// to improve the scheduling and depend up on the final schedule. -MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, - SMSchedule &Schedule, - bool UpdateDAG) { +void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, + SMSchedule &Schedule) { SUnit *SU = getSUnit(MI); DenseMap>::iterator It = InstrChanges.find(SU); @@ -3400,7 +3400,7 @@ MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, std::pair RegAndOffset = It->second; unsigned BasePos, OffsetPos; if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) - return nullptr; + return; unsigned BaseReg = MI->getOperand(BasePos).getReg(); MachineInstr *LoopDef = findDefInLoop(BaseReg); int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef)); @@ -3418,15 +3418,11 @@ MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI, int64_t NewOffset = MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff; NewMI->getOperand(OffsetPos).setImm(NewOffset); - if (UpdateDAG) { - SU->setInstr(NewMI); - MISUnitMap[NewMI] = SU; - } + SU->setInstr(NewMI); + MISUnitMap[NewMI] = SU; NewMIs.insert(NewMI); - return NewMI; } } - return nullptr; } /// Return true for an order dependence that is loop carried potentially. @@ -3872,6 +3868,58 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) { return true; } +/// Attempt to fix the degenerate cases when the instruction serialization +/// causes the register lifetimes to overlap. For example, +/// p' = store_pi(p, b) +/// = load p, offset +/// In this case p and p' overlap, which means that two registers are needed. +/// Instead, this function changes the load to use p' and updates the offset. +void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque &Instrs) { + unsigned OverlapReg = 0; + unsigned NewBaseReg = 0; + for (SUnit *SU : Instrs) { + MachineInstr *MI = SU->getInstr(); + for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + // Look for an instruction that uses p. The instruction occurs in the + // same cycle but occurs later in the serialized order. + if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) { + // Check that the instruction appears in the InstrChanges structure, + // which contains instructions that can have the offset updated. + DenseMap>::iterator It = + InstrChanges.find(SU); + if (It != InstrChanges.end()) { + unsigned BasePos, OffsetPos; + // Update the base register and adjust the offset. + if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) { + MachineInstr *NewMI = MF.CloneMachineInstr(MI); + NewMI->getOperand(BasePos).setReg(NewBaseReg); + int64_t NewOffset = + MI->getOperand(OffsetPos).getImm() - It->second.second; + NewMI->getOperand(OffsetPos).setImm(NewOffset); + SU->setInstr(NewMI); + MISUnitMap[NewMI] = SU; + NewMIs.insert(NewMI); + } + } + OverlapReg = 0; + NewBaseReg = 0; + break; + } + // Look for an instruction of the form p' = op(p), which uses and defines + // two virtual registers that get allocated to the same physical register. + unsigned TiedUseIdx = 0; + if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) { + // OverlapReg is p in the example above. + OverlapReg = MI->getOperand(TiedUseIdx).getReg(); + // NewBaseReg is p' in the example above. + NewBaseReg = MI->getOperand(i).getReg(); + break; + } + } + } +} + /// After the schedule has been formed, call this function to combine /// the instructions from the different stages/cycles. That is, this /// function creates a schedule that represents a single iteration. @@ -3932,7 +3980,7 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { // map. We need to use the new registers to create the correct order. for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) { SUnit *SU = &SSD->SUnits[i]; - SSD->applyInstrChange(SU->getInstr(), *this, true); + SSD->applyInstrChange(SU->getInstr(), *this); } // Reorder the instructions in each cycle to fix and improve the @@ -3956,6 +4004,7 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) { // Replace the old order with the new order. cycleInstrs.swap(newOrderZC); cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end()); + SSD->fixupRegisterOverlaps(cycleInstrs); } DEBUG(dump();); diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 6780d76e876db..3e12bdcd689e1 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -1831,6 +1831,13 @@ static const unsigned InvalidCycle = ~0U; SchedBoundary::~SchedBoundary() { delete HazardRec; } +/// Given a Count of resource usage and a Latency value, return true if a +/// SchedBoundary becomes resource limited. +static bool checkResourceLimit(unsigned LFactor, unsigned Count, + unsigned Latency) { + return (int)(Count - (Latency * LFactor)) > (int)LFactor; +} + void SchedBoundary::reset() { // A new HazardRec is created for each DAG and owned by SchedBoundary. // Destroying and reconstructing it is very expensive though. So keep @@ -1962,16 +1969,18 @@ bool SchedBoundary::checkHazard(SUnit *SU) { if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { const MCSchedClassDesc *SC = DAG->getSchedClass(SU); - for (TargetSchedModel::ProcResIter - PI = SchedModel->getWriteProcResBegin(SC), - PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles); + for (const MCWriteProcResEntry &PE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) { + unsigned ResIdx = PE.ProcResourceIdx; + unsigned Cycles = PE.Cycles; + unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles); if (NRCycle > CurrCycle) { #ifndef NDEBUG - MaxObservedStall = std::max(PI->Cycles, MaxObservedStall); + MaxObservedStall = std::max(Cycles, MaxObservedStall); #endif DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " - << SchedModel->getResourceName(PI->ProcResourceIdx) + << SchedModel->getResourceName(ResIdx) << "=" << NRCycle << "c\n"); return true; } @@ -2083,10 +2092,9 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) { } } CheckPending = true; - unsigned LFactor = SchedModel->getLatencyFactor(); IsResourceLimited = - (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) - > (int)LFactor; + checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), + getScheduledLatency()); DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); } @@ -2239,16 +2247,15 @@ void SchedBoundary::bumpNode(SUnit *SU) { << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); } // If we stall for any reason, bump the cycle. - if (NextCycle > CurrCycle) { + if (NextCycle > CurrCycle) bumpCycle(NextCycle); - } else { + else // After updating ZoneCritResIdx and ExpectedLatency, check if we're // resource limited. If a stall occurred, bumpCycle does this. - unsigned LFactor = SchedModel->getLatencyFactor(); IsResourceLimited = - (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) - > (int)LFactor; - } + checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(), + getScheduledLatency()); + // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle // resets CurrMOps. Loop to handle instructions with more MOps than issue in // one cycle. Since we commonly reach the max MOps here, opportunistically @@ -2433,10 +2440,10 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA, OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; bool OtherResLimited = false; - if (SchedModel->hasInstrSchedModel()) { - unsigned LFactor = SchedModel->getLatencyFactor(); - OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; - } + if (SchedModel->hasInstrSchedModel()) + OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(), + OtherCount, RemLatency); + // Schedule aggressively for latency in PostRA mode. We don't check for // acyclic latency during PostRA, and highly out-of-order processors will // skip PostRA scheduling. @@ -2651,7 +2658,7 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) { - const MachineFunction &MF = *Begin->getParent()->getParent(); + const MachineFunction &MF = *Begin->getMF(); const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); // Avoid setting up the register pressure tracker for small regions to save diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 032abb441ddd7..d9e9b3360a053 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -1,4 +1,4 @@ -//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// +//===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===// // // The LLVM Compiler Infrastructure // @@ -16,79 +16,89 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "prologepilog" -typedef SmallVector MBBVector; -static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, - unsigned &MinCSFrameIndex, - unsigned &MaxCXFrameIndex, - const MBBVector &SaveBlocks, - const MBBVector &RestoreBlocks); +using MBBVector = SmallVector; + +static void spillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, + unsigned &MinCSFrameIndex, + unsigned &MaxCXFrameIndex, + const MBBVector &SaveBlocks, + const MBBVector &RestoreBlocks); namespace { + class PEI : public MachineFunctionPass { public: static char ID; + PEI() : MachineFunctionPass(ID) { initializePEIPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override; - MachineFunctionProperties getRequiredProperties() const override { - MachineFunctionProperties MFP; - if (UsesCalleeSaves) - MFP.set(MachineFunctionProperties::Property::NoVRegs); - return MFP; - } - /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. - /// bool runOnMachineFunction(MachineFunction &Fn) override; private: - std::function - SpillCalleeSavedRegisters; - std::function - ScavengeFrameVirtualRegs; - - bool UsesCalleeSaves = false; - RegScavenger *RS; // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved @@ -122,9 +132,11 @@ class PEI : public MachineFunctionPass { int &SPAdj); void insertPrologEpilogCode(MachineFunction &Fn); }; -} // namespace + +} // end anonymous namespace char PEI::ID = 0; + char &llvm::PrologEpilogCodeInserterID = PEI::ID; static cl::opt @@ -158,28 +170,12 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } - /// StackObjSet - A set of stack object indexes -typedef SmallSetVector StackObjSet; +using StackObjSet = SmallSetVector; /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. -/// bool PEI::runOnMachineFunction(MachineFunction &Fn) { - if (!SpillCalleeSavedRegisters) { - const TargetMachine &TM = Fn.getTarget(); - if (!TM.usesPhysRegsForPEI()) { - SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *, - unsigned &, unsigned &, const MBBVector &, - const MBBVector &) {}; - ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {}; - } else { - SpillCalleeSavedRegisters = doSpillCalleeSavedRegs; - ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs; - UsesCalleeSaves = true; - } - } - const Function* F = Fn.getFunction(); const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); @@ -200,8 +196,9 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { calculateSaveRestoreBlocks(Fn); // Handle CSR spilling and restoring, for targets that need it. - SpillCalleeSavedRegisters(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex, - SaveBlocks, RestoreBlocks); + if (Fn.getTarget().usesPhysRegsForPEI()) + spillCalleeSavedRegs(Fn, RS, MinCSFrameIndex, MaxCSFrameIndex, SaveBlocks, + RestoreBlocks); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. @@ -226,12 +223,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // If register scavenging is needed, as we've enabled doing it as a // post-pass, scavenge the virtual registers that frame index elimination // inserted. - if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) { - ScavengeFrameVirtualRegs(Fn, *RS); - - // Clear any vregs created by virtual scavenging. - Fn.getRegInfo().clearVirtRegs(); - } + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) + scavengeFrameVirtualRegs(Fn, *RS); // Warn on stack size when we exceeds the given limit. MachineFrameInfo &MFI = Fn.getFrameInfo(); @@ -512,11 +505,19 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, } } -static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS, - unsigned &MinCSFrameIndex, - unsigned &MaxCSFrameIndex, - const MBBVector &SaveBlocks, - const MBBVector &RestoreBlocks) { +static void spillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS, + unsigned &MinCSFrameIndex, + unsigned &MaxCSFrameIndex, + const MBBVector &SaveBlocks, + const MBBVector &RestoreBlocks) { + // We can't list this requirement in getRequiredProperties because some + // targets (WebAssembly) use virtual registers past this point, and the pass + // pipeline is set up without giving the passes a chance to look at the + // TargetMachine. + // FIXME: Find a way to express this in getRequiredProperties. + assert(Fn.getProperties().hasProperty( + MachineFunctionProperties::Property::NoVRegs)); + const Function *F = Fn.getFunction(); const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); MachineFrameInfo &MFI = Fn.getFrameInfo(); @@ -578,7 +579,6 @@ AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, /// Compute which bytes of fixed and callee-save stack area are unused and keep /// track of them in StackBytesFree. -/// static inline void computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown, unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex, @@ -619,7 +619,6 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown, /// Assign frame object to an unused portion of the stack in the fixed stack /// object range. Return true if the allocation was successful. -/// static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx, bool StackGrowsDown, unsigned MaxAlign, BitVector &StackBytesFree) { @@ -696,7 +695,6 @@ AssignProtectedObjSet(const StackObjSet &UnassignedObjs, /// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the /// abstract stack objects. -/// void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); StackProtector *SP = &getAnalysis(); @@ -818,7 +816,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } // Retrieve the Exception Handler registration node. - int EHRegNodeFrameIndex = INT_MAX; + int EHRegNodeFrameIndex = std::numeric_limits::max(); if (const WinEHFuncInfo *FuncInfo = Fn.getWinEHFuncInfo()) EHRegNodeFrameIndex = FuncInfo->EHRegNodeFrameIndex; @@ -896,7 +894,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { } // Allocate the EH registration node first if one is present. - if (EHRegNodeFrameIndex != INT_MAX) + if (EHRegNodeFrameIndex != std::numeric_limits::max()) AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset, MaxAlign, Skew); @@ -962,17 +960,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { MFI.setStackSize(StackSize); NumBytesStackSpace += StackSize; - MachineOptimizationRemarkAnalysis R( - DEBUG_TYPE, "StackSize", Fn.getFunction()->getSubprogram(), &Fn.front()); - R << ore::NV("NumStackBytes", StackSize) - << " stack bytes in function"; - ORE->emit(R); + ORE->emit([&]() { + return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "StackSize", + Fn.getFunction()->getSubprogram(), + &Fn.front()) + << ore::NV("NumStackBytes", StackSize) << " stack bytes in function"; + }); } /// insertPrologEpilogCode - Scan the function for modified callee saved /// registers, insert spill code for these callee saved registers, then add /// prolog and epilog code to the function. -/// void PEI::insertPrologEpilogCode(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); @@ -1012,7 +1010,6 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. -/// void PEI::replaceFrameIndices(MachineFunction &Fn) { const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); if (!TFI.needsFrameIndexResolution(Fn)) return; @@ -1062,7 +1059,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, bool InsideCallSequence = false; for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (TII.isFrameInstr(*I)) { InsideCallSequence = TII.isFrameSetup(*I); SPAdj += TII.getSPAdjust(*I); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 5bef24780bfe2..e74ac79f0010e 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -30,7 +31,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveInterval.h" @@ -129,6 +130,12 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost", cl::desc("Cost for first time use of callee-saved register."), cl::init(0), cl::Hidden); +static cl::opt ConsiderLocalIntervalCost( + "condsider-local-interval-cost", cl::Hidden, + cl::desc("Consider the cost of local intervals created by a split " + "candidate when choosing the best split candidate."), + cl::init(false)); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -277,6 +284,57 @@ class RAGreedy : public MachineFunctionPass, } }; + /// EvictionTrack - Keeps track of past evictions in order to optimize region + /// split decision. + class EvictionTrack { + + public: + using EvictorInfo = + std::pair; + using EvicteeInfo = llvm::MapVector; + + private: + /// Each Vreg that has been evicted in the last stage of selectOrSplit will + /// be mapped to the evictor Vreg and the PhysReg it was evicted from. + EvicteeInfo Evictees; + + public: + /// \brief Clear all eviction information. + void clear() { Evictees.clear(); } + + /// \brief Clear eviction information for the given evictee Vreg. + /// E.g. when Vreg get's a new allocation, the old eviction info is no + /// longer relevant. + /// \param Evictee The evictee Vreg for whom we want to clear collected + /// eviction info. + void clearEvicteeInfo(unsigned Evictee) { Evictees.erase(Evictee); } + + /// \brief Track new eviction. + /// The Evictor vreg has evicted the Evictee vreg from Physreg. + /// \praram PhysReg The phisical register Evictee was evicted from. + /// \praram Evictor The evictor Vreg that evicted Evictee. + /// \praram Evictee The evictee Vreg. + void addEviction(unsigned PhysReg, unsigned Evictor, unsigned Evictee) { + Evictees[Evictee].first = Evictor; + Evictees[Evictee].second = PhysReg; + } + + /// Return the Evictor Vreg which evicted Evictee Vreg from PhysReg. + /// \praram Evictee The evictee vreg. + /// \return The Evictor vreg which evicted Evictee vreg from PhysReg. 0 if + /// nobody has evicted Evictee from PhysReg. + EvictorInfo getEvictor(unsigned Evictee) { + if (Evictees.count(Evictee)) { + return Evictees[Evictee]; + } + + return EvictorInfo(0, 0); + } + }; + + // Keeps track of past evictions in order to optimize region split decision. + EvictionTrack LastEvicted; + // splitting state. std::unique_ptr SA; std::unique_ptr SE; @@ -340,6 +398,10 @@ class RAGreedy : public MachineFunctionPass, /// obtained from the TargetSubtargetInfo. bool EnableLocalReassign; + /// Enable or not the the consideration of the cost of local intervals created + /// by a split candidate when choosing the best split candidate. + bool EnableAdvancedRASplitCost; + /// Set of broken hints that may be reconciled later because of eviction. SmallSetVector SetOfBrokenHints; @@ -382,13 +444,24 @@ class RAGreedy : public MachineFunctionPass, bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef); void growRegion(GlobalSplitCandidate &Cand); - BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); + bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand, + unsigned BBNumber, + const AllocationOrder &Order); + BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate &, + const AllocationOrder &Order, + bool *CanCauseEvictionChain); bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef); void calcGapWeights(unsigned, SmallVectorImpl&); unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg, + SlotIndex Start, SlotIndex End, + EvictionCost &MaxCost); + unsigned getCheapestEvicteeWeight(const AllocationOrder &Order, + LiveInterval &VirtReg, SlotIndex Start, + SlotIndex End, float *BestEvictWeight); void evictInterference(LiveInterval&, unsigned, SmallVectorImpl&); bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, @@ -405,7 +478,8 @@ class RAGreedy : public MachineFunctionPass, unsigned calculateRegionSplitCost(LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, bool IgnoreCSR); + unsigned &NumCands, bool IgnoreCSR, + bool *CanCauseEvictionChain = nullptr); /// Perform region splitting. unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, @@ -859,6 +933,92 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, return true; } +/// \brief Return true if all interferences between VirtReg and PhysReg between +/// Start and End can be evicted. +/// +/// \param VirtReg Live range that is about to be assigned. +/// \param PhysReg Desired register for assignment. +/// \param Start Start of range to look for interferences. +/// \param End End of range to look for interferences. +/// \param MaxCost Only look for cheaper candidates and update with new cost +/// when returning true. +/// \return True when interference can be evicted cheaper than MaxCost. +bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg, + unsigned PhysReg, SlotIndex Start, + SlotIndex End, + EvictionCost &MaxCost) { + EvictionCost Cost; + + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + + // Check if any interfering live range is heavier than MaxWeight. + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + + // Check if interference overlast the segment in interest. + if (!Intf->overlaps(Start, End)) + continue; + + // Cannot evict non virtual reg interference. + if (!TargetRegisterInfo::isVirtualRegister(Intf->reg)) + return false; + // Never evict spill products. They cannot split or spill. + if (getStage(*Intf) == RS_Done) + return false; + + // Would this break a satisfied hint? + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + // Update eviction cost. + Cost.BrokenHints += BreaksHint; + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + // Abort if this would be too expensive. + if (!(Cost < MaxCost)) + return false; + } + } + + if (Cost.MaxWeight == 0) + return false; + + MaxCost = Cost; + return true; +} + +/// \brief Return tthe physical register that will be best +/// candidate for eviction by a local split interval that will be created +/// between Start and End. +/// +/// \param Order The allocation order +/// \param VirtReg Live range that is about to be assigned. +/// \param Start Start of range to look for interferences +/// \param End End of range to look for interferences +/// \param BestEvictweight The eviction cost of that eviction +/// \return The PhysReg which is the best candidate for eviction and the +/// eviction cost in BestEvictweight +unsigned RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order, + LiveInterval &VirtReg, + SlotIndex Start, SlotIndex End, + float *BestEvictweight) { + EvictionCost BestEvictCost; + BestEvictCost.setMax(); + BestEvictCost.MaxWeight = VirtReg.weight; + unsigned BestEvicteePhys = 0; + + // Go over all physical registers and find the best candidate for eviction + for (auto PhysReg : Order.getOrder()) { + + if (!canEvictInterferenceInRange(VirtReg, PhysReg, Start, End, + BestEvictCost)) + continue; + + // Best so far. + BestEvicteePhys = PhysReg; + } + *BestEvictweight = BestEvictCost.MaxWeight; + return BestEvicteePhys; +} + /// evictInterference - Evict any interferring registers that prevent VirtReg /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. @@ -893,6 +1053,9 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, // The same VirtReg may be present in multiple RegUnits. Skip duplicates. if (!VRM->hasPhys(Intf->reg)) continue; + + LastEvicted.addEviction(PhysReg, VirtReg.reg, Intf->reg); + Matrix->unassign(*Intf); assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || VirtReg.isSpillable() < Intf->isSpillable()) && @@ -1214,13 +1377,117 @@ BlockFrequency RAGreedy::calcSpillCost() { return Cost; } +/// \brief Check if splitting Evictee will create a local split interval in +/// basic block number BBNumber that may cause a bad eviction chain. This is +/// intended to prevent bad eviction sequences like: +/// movl %ebp, 8(%esp) # 4-byte Spill +/// movl %ecx, %ebp +/// movl %ebx, %ecx +/// movl %edi, %ebx +/// movl %edx, %edi +/// cltd +/// idivl %esi +/// movl %edi, %edx +/// movl %ebx, %edi +/// movl %ecx, %ebx +/// movl %ebp, %ecx +/// movl 16(%esp), %ebp # 4 - byte Reload +/// +/// Such sequences are created in 2 scenarios: +/// +/// Scenario #1: +/// vreg0 is evicted from physreg0 by vreg1. +/// Evictee vreg0 is intended for region splitting with split candidate +/// physreg0 (the reg vreg0 was evicted from). +/// Region splitting creates a local interval because of interference with the +/// evictor vreg1 (normally region spliitting creates 2 interval, the "by reg" +/// and "by stack" intervals and local interval created when interference +/// occurs). +/// One of the split intervals ends up evicting vreg2 from physreg1. +/// Evictee vreg2 is intended for region splitting with split candidate +/// physreg1. +/// One of the split intervals ends up evicting vreg3 from physreg2, etc. +/// +/// Scenario #2 +/// vreg0 is evicted from physreg0 by vreg1. +/// vreg2 is evicted from physreg2 by vreg3 etc. +/// Evictee vreg0 is intended for region splitting with split candidate +/// physreg1. +/// Region splitting creates a local interval because of interference with the +/// evictor vreg1. +/// One of the split intervals ends up evicting back original evictor vreg1 +/// from physreg0 (the reg vreg0 was evicted from). +/// Another evictee vreg2 is intended for region splitting with split candidate +/// physreg1. +/// One of the split intervals ends up evicting vreg3 from physreg2, etc. +/// +/// \param Evictee The register considered to be split. +/// \param Cand The split candidate that determines the physical register +/// we are splitting for and the interferences. +/// \param BBNumber The number of a BB for which the region split process will +/// create a local split interval. +/// \param Order The phisical registers that may get evicted by a split +/// artifact of Evictee. +/// \return True if splitting Evictee may cause a bad eviction chain, false +/// otherwise. +bool RAGreedy::splitCanCauseEvictionChain(unsigned Evictee, + GlobalSplitCandidate &Cand, + unsigned BBNumber, + const AllocationOrder &Order) { + EvictionTrack::EvictorInfo VregEvictorInfo = LastEvicted.getEvictor(Evictee); + unsigned Evictor = VregEvictorInfo.first; + unsigned PhysReg = VregEvictorInfo.second; + + // No actual evictor. + if (!Evictor || !PhysReg) + return false; + + float MaxWeight = 0; + unsigned FutureEvictedPhysReg = + getCheapestEvicteeWeight(Order, LIS->getInterval(Evictee), + Cand.Intf.first(), Cand.Intf.last(), &MaxWeight); + + // The bad eviction chain occurs when either the split candidate the the + // evited reg or one of the split artifact will evict the evicting reg. + if ((PhysReg != Cand.PhysReg) && (PhysReg != FutureEvictedPhysReg)) + return false; + + Cand.Intf.moveToBlock(BBNumber); + + // Check to see if the Evictor contains interference (with Evictee) in the + // given BB. If so, this interference caused the eviction of Evictee from + // PhysReg. This suggest that we will create a local interval during the + // region split to avoid this interference This local interval may cause a bad + // eviction chain. + if (!LIS->hasInterval(Evictor)) + return false; + LiveInterval &EvictorLI = LIS->getInterval(Evictor); + if (EvictorLI.FindSegmentContaining(Cand.Intf.first()) == EvictorLI.end()) + return false; + + // Now, check to see if the local interval we will create is going to be + // expensive enough to evict somebody If so, this may cause a bad eviction + // chain. + VirtRegAuxInfo VRAI(*MF, *LIS, VRM, getAnalysis(), *MBFI); + float splitArtifactWeight = + VRAI.futureWeight(LIS->getInterval(Evictee), + Cand.Intf.first().getPrevIndex(), Cand.Intf.last()); + if (splitArtifactWeight >= 0 && splitArtifactWeight < MaxWeight) + return false; + + return true; +} + /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { +BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, + const AllocationOrder &Order, + bool *CanCauseEvictionChain) { BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; + unsigned VirtRegToSplit = SA->getParent().reg; ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -1229,6 +1496,24 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)]; unsigned Ins = 0; + Cand.Intf.moveToBlock(BC.Number); + // Check wheather a local interval is going to be created during the region + // split. + if (EnableAdvancedRASplitCost && CanCauseEvictionChain && + Cand.Intf.hasInterference() && BI.LiveIn && BI.LiveOut && RegIn && + RegOut) { + + if (splitCanCauseEvictionChain(VirtRegToSplit, Cand, BC.Number, Order)) { + // This interfernce cause our eviction from this assignment, we might + // evict somebody else, add that cost. + // See splitCanCauseEvictionChain for detailed description of scenarios. + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); + + *CanCauseEvictionChain = true; + } + } + if (BI.LiveIn) Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); if (BI.LiveOut) @@ -1249,6 +1534,20 @@ BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { if (Cand.Intf.hasInterference()) { GlobalCost += SpillPlacer->getBlockFrequency(Number); GlobalCost += SpillPlacer->getBlockFrequency(Number); + + // Check wheather a local interval is going to be created during the + // region split. + if (EnableAdvancedRASplitCost && CanCauseEvictionChain && + splitCanCauseEvictionChain(VirtRegToSplit, Cand, Number, Order)) { + // This interfernce cause our eviction from this assignment, we might + // evict somebody else, add that cost. + // See splitCanCauseEvictionChain for detailed description of + // scenarios. + GlobalCost += SpillPlacer->getBlockFrequency(Number); + GlobalCost += SpillPlacer->getBlockFrequency(Number); + + *CanCauseEvictionChain = true; + } } continue; } @@ -1413,6 +1712,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { unsigned NumCands = 0; + BlockFrequency SpillCost = calcSpillCost(); BlockFrequency BestCost; // Check if we can split this live range around a compact region. @@ -1424,14 +1724,24 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } else { // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. - BestCost = calcSpillCost(); + BestCost = SpillCost; DEBUG(dbgs() << "Cost of isolating all blocks = "; MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); } + bool CanCauseEvictionChain = false; unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, - false/*IgnoreCSR*/); + false /*IgnoreCSR*/, &CanCauseEvictionChain); + + // Split candidates with compact regions can cause a bad eviction sequence. + // See splitCanCauseEvictionChain for detailed description of scenarios. + // To avoid it, we need to comapre the cost with the spill cost and not the + // current max frequency. + if (HasCompact && (BestCost > SpillCost) && (BestCand != NoCand) && + CanCauseEvictionChain) { + return 0; + } // No solutions found, fall back to single block splitting. if (!HasCompact && BestCand == NoCand) @@ -1443,8 +1753,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, AllocationOrder &Order, BlockFrequency &BestCost, - unsigned &NumCands, - bool IgnoreCSR) { + unsigned &NumCands, bool IgnoreCSR, + bool *CanCauseEvictionChain) { unsigned BestCand = NoCand; Order.rewind(); while (unsigned PhysReg = Order.next()) { @@ -1504,7 +1814,8 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, continue; } - Cost += calcGlobalSplitCost(Cand); + bool HasEvictionChain = false; + Cost += calcGlobalSplitCost(Cand, Order, &HasEvictionChain); DEBUG({ dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; @@ -1515,9 +1826,24 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, if (Cost < BestCost) { BestCand = NumCands; BestCost = Cost; + // See splitCanCauseEvictionChain for detailed description of bad + // eviction chain scenarios. + if (CanCauseEvictionChain) + *CanCauseEvictionChain = HasEvictionChain; } ++NumCands; } + + if (CanCauseEvictionChain && BestCand != NoCand) { + // See splitCanCauseEvictionChain for detailed description of bad + // eviction chain scenarios. + DEBUG(dbgs() << "Best split candidate of vreg " + << PrintReg(VirtReg.reg, TRI) << " may "); + if (!(*CanCauseEvictionChain)) + DEBUG(dbgs() << "not "); + DEBUG(dbgs() << "cause bad eviction chain\n"); + } + return BestCand; } @@ -2580,6 +2906,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { + // If VirtReg got an assignment, the eviction info is no longre relevant. + LastEvicted.clearEvicteeInfo(VirtReg.reg); // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. @@ -2613,6 +2941,9 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // copy-related live-ranges. if (Hint && Hint != PhysReg) SetOfBrokenHints.insert(&VirtReg); + // If VirtReg eviction someone, the eviction info for it as an evictee is + // no longre relevant. + LastEvicted.clearEvicteeInfo(VirtReg.reg); return PhysReg; } @@ -2632,8 +2963,11 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // Try splitting VirtReg or interferences. unsigned NewVRegSizeBefore = NewVRegs.size(); unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); - if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) + if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) { + // If VirtReg got split, the eviction info is no longre relevant. + LastEvicted.clearEvicteeInfo(VirtReg.reg); return PhysReg; + } } // If we couldn't allocate a register from spilling, there is probably some @@ -2717,17 +3051,20 @@ void RAGreedy::reportNumberOfSplillsReloads(MachineLoop *L, unsigned &Reloads, if (Reloads || FoldedReloads || Spills || FoldedSpills) { using namespace ore; - MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload", - L->getStartLoc(), L->getHeader()); - if (Spills) - R << NV("NumSpills", Spills) << " spills "; - if (FoldedSpills) - R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; - if (Reloads) - R << NV("NumReloads", Reloads) << " reloads "; - if (FoldedReloads) - R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; - ORE->emit(R << "generated in loop"); + ORE->emit([&]() { + MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReload", + L->getStartLoc(), L->getHeader()); + if (Spills) + R << NV("NumSpills", Spills) << " spills "; + if (FoldedSpills) + R << NV("NumFoldedSpills", FoldedSpills) << " folded spills "; + if (Reloads) + R << NV("NumReloads", Reloads) << " reloads "; + if (FoldedReloads) + R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads "; + R << "generated in loop"; + return R; + }); } } @@ -2744,6 +3081,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { MF->getSubtarget().enableRALocalReassignment( MF->getTarget().getOptLevel()); + EnableAdvancedRASplitCost = ConsiderLocalIntervalCost || + MF->getSubtarget().enableAdvancedRASplitCost(); + if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); @@ -2775,6 +3115,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. SetOfBrokenHints.clear(); + LastEvicted.clear(); allocatePhysRegs(); tryHintsRecoloring(); diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 255d17078a1c4..1ef7e41b8ae32 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -363,7 +363,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) { Flipped = true; } - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); if (TargetRegisterInfo::isPhysicalRegister(Dst)) { // Eliminate DstSub on a physreg. @@ -1583,7 +1583,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { std::swap(SrcRC, DstRC); } if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, - CP.getNewRC())) { + CP.getNewRC(), *LIS)) { DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); return false; } @@ -2685,8 +2685,8 @@ void JoinVals::pruneValues(JoinVals &Other, for (MachineOperand &MO : Indexes->getInstructionFromIndex(Def)->operands()) { if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { - if (MO.getSubReg() != 0) - MO.setIsUndef(EraseImpDef); + if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef) + MO.setIsUndef(false); MO.setIsDead(false); } } diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index fdd10edf07f00..844ddb9ed3ffa 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -463,7 +463,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj, MachineBasicBlock::iterator &UseMI) { // Find an available scavenging slot with size and alignment matching // the requirements of the class RC. - const MachineFunction &MF = *Before->getParent()->getParent(); + const MachineFunction &MF = *Before->getMF(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned NeedSize = TRI->getSpillSize(RC); unsigned NeedAlign = TRI->getSpillAlignment(RC); @@ -536,7 +536,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { MachineInstr &MI = *I; - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); // Consider all allocatable registers in the register class initially BitVector Candidates = TRI->getAllocatableSet(MF, RC); diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp index 21f2fa497233a..072e6e090e1ea 100644 --- a/lib/CodeGen/SafeStackColoring.cpp +++ b/lib/CodeGen/SafeStackColoring.cpp @@ -1,4 +1,4 @@ -//===-- SafeStackColoring.cpp - SafeStack frame coloring -------*- C++ -*--===// +//===- SafeStackColoring.cpp - SafeStack frame coloring -------------------===// // // The LLVM Compiler Infrastructure // @@ -8,12 +8,25 @@ //===----------------------------------------------------------------------===// #include "SafeStackColoring.h" - +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/User.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include using namespace llvm; using namespace llvm::safestack; diff --git a/lib/CodeGen/SafeStackColoring.h b/lib/CodeGen/SafeStackColoring.h index 08b179ccb7f1f..902e63ebeb7e1 100644 --- a/lib/CodeGen/SafeStackColoring.h +++ b/lib/CodeGen/SafeStackColoring.h @@ -1,4 +1,4 @@ -//===-- SafeStackColoring.h - SafeStack frame coloring ---------*- C++ -*--===// +//===- SafeStackColoring.h - SafeStack frame coloring ----------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -10,16 +10,23 @@ #ifndef LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H #define LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/raw_os_ostream.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/raw_ostream.h" +#include +#include namespace llvm { -class AllocaInst; + +class BasicBlock; +class Function; +class Instruction; namespace safestack { + /// Compute live ranges of allocas. /// Live ranges are represented as sets of "interesting" instructions, which are /// defined as instructions that may start or end an alloca's lifetime. These @@ -35,10 +42,13 @@ class StackColoring { struct BlockLifetimeInfo { /// Which slots BEGINs in each basic block. BitVector Begin; + /// Which slots ENDs in each basic block. BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. BitVector LiveOut; }; @@ -48,11 +58,14 @@ class StackColoring { /// live. struct LiveRange { BitVector bv; + void SetMaximum(int size) { bv.resize(size); } void AddRange(unsigned start, unsigned end) { bv.set(start, end); } + bool Overlaps(const LiveRange &Other) const { return bv.anyCommon(Other.bv); } + void Join(const LiveRange &Other) { bv |= Other.bv; } }; @@ -60,13 +73,15 @@ class StackColoring { Function &F; /// Maps active slots (per bit) for each basic block. - typedef DenseMap LivenessMap; + using LivenessMap = DenseMap; LivenessMap BlockLiveness; /// Number of interesting instructions. - int NumInst; + int NumInst = -1; + /// Numeric ids for interesting instructions. DenseMap InstructionNumbering; + /// A range [Start, End) of instruction ids for each basic block. /// Instructions inside each BB have monotonic and consecutive ids. DenseMap> BlockInstRange; @@ -74,6 +89,7 @@ class StackColoring { ArrayRef Allocas; unsigned NumAllocas; DenseMap AllocaNumbering; + /// LiveRange for allocas. SmallVector LiveRanges; @@ -101,7 +117,7 @@ class StackColoring { public: StackColoring(Function &F, ArrayRef Allocas) - : F(F), NumInst(-1), Allocas(Allocas), NumAllocas(Allocas.size()) {} + : F(F), Allocas(Allocas), NumAllocas(Allocas.size()) {} void run(); void removeAllMarkers(); @@ -143,7 +159,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS, return OS << R.bv; } -} // namespace safestack -} // namespace llvm +} // end namespace safestack + +} // end namespace llvm #endif // LLVM_LIB_CODEGEN_SAFESTACKCOLORING_H diff --git a/lib/CodeGen/SafeStackLayout.cpp b/lib/CodeGen/SafeStackLayout.cpp index 7d4dbd13abf44..b1759359e46f9 100644 --- a/lib/CodeGen/SafeStackLayout.cpp +++ b/lib/CodeGen/SafeStackLayout.cpp @@ -1,4 +1,4 @@ -//===-- SafeStackLayout.cpp - SafeStack frame layout -----------*- C++ -*--===// +//===- SafeStackLayout.cpp - SafeStack frame layout -----------------------===// // // The LLVM Compiler Infrastructure // @@ -8,9 +8,15 @@ //===----------------------------------------------------------------------===// #include "SafeStackLayout.h" - -#include "llvm/IR/Instructions.h" +#include "SafeStackColoring.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include using namespace llvm; using namespace llvm::safestack; diff --git a/lib/CodeGen/SafeStackLayout.h b/lib/CodeGen/SafeStackLayout.h index 313ed21c88698..7c1292f251f75 100644 --- a/lib/CodeGen/SafeStackLayout.h +++ b/lib/CodeGen/SafeStackLayout.h @@ -1,4 +1,4 @@ -//===-- SafeStackLayout.h - SafeStack frame layout -------------*- C++ -*--===// +//===- SafeStackLayout.h - SafeStack frame layout --------------*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,14 @@ #define LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H #include "SafeStackColoring.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" namespace llvm { + +class raw_ostream; +class Value; + namespace safestack { /// Compute the layout of an unsafe stack frame. @@ -23,10 +29,12 @@ class StackLayout { unsigned Start; unsigned End; StackColoring::LiveRange Range; + StackRegion(unsigned Start, unsigned End, const StackColoring::LiveRange &Range) : Start(Start), End(End), Range(Range) {} }; + /// The list of current stack regions, sorted by StackRegion::Start. SmallVector Regions; @@ -35,6 +43,7 @@ class StackLayout { unsigned Size, Alignment; StackColoring::LiveRange Range; }; + SmallVector StackObjects; DenseMap ObjectOffsets; @@ -43,6 +52,7 @@ class StackLayout { public: StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {} + /// Add an object to the stack frame. Value pointer is opaque and used as a /// handle to retrieve the object's offset in the frame later. void addObject(const Value *V, unsigned Size, unsigned Alignment, @@ -59,10 +69,12 @@ class StackLayout { /// Returns the alignment of the frame. unsigned getFrameAlignment() { return MaxAlignment; } + void print(raw_ostream &OS); }; -} // namespace safestack -} // namespace llvm +} // end namespace safestack + +} // end namespace llvm #endif // LLVM_LIB_CODEGEN_SAFESTACKLAYOUT_H diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 08b785d742ad4..be129b8766a34 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -121,9 +121,11 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, SchedModel.init(ST.getSchedModel(), &ST, TII); } -/// If this machine instr has memory reference information and it can be tracked -/// to a normal reference to a known object, return the Value for that object. -static void getUnderlyingObjectsForInstr(const MachineInstr *MI, +/// If this machine instr has memory reference information and it can be +/// tracked to a normal reference to a known object, return the Value +/// for that object. This function returns false the memory location is +/// unknown or may alias anything. +static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL) { @@ -151,7 +153,8 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); } else if (const Value *V = MMO->getValue()) { SmallVector Objs; - getUnderlyingObjectsForCodeGen(V, Objs, DL); + if (!getUnderlyingObjectsForCodeGen(V, Objs, DL)) + return false; for (Value *V : Objs) { assert(isIdentifiedObject(V)); @@ -163,8 +166,12 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, return true; }; - if (!allMMOsOkay()) + if (!allMMOsOkay()) { Objects.clear(); + return false; + } + + return true; } void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { @@ -860,13 +867,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Find the underlying objects for MI. The Objs vector is either // empty, or filled with the Values of memory locations which this - // SU depends on. An empty vector means the memory location is - // unknown, and may alias anything. + // SU depends on. UnderlyingObjectsVector Objs; - getUnderlyingObjectsForInstr(&MI, MFI, Objs, MF.getDataLayout()); + bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs, + MF.getDataLayout()); if (MI.mayStore()) { - if (Objs.empty()) { + if (!ObjsFound) { // An unknown store depends on all stores and loads. addChainDependencies(SU, Stores); addChainDependencies(SU, NonAliasStores); @@ -901,7 +908,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addChainDependencies(SU, Stores, UnknownValue); } } else { // SU is a load. - if (Objs.empty()) { + if (!ObjsFound) { // An unknown load depends on all stores. addChainDependencies(SU, Stores); addChainDependencies(SU, NonAliasStores); diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index ae9c5adb03979..fd1e5e2cfc567 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -24,7 +24,7 @@ add_llvm_library(LLVMSelectionDAG SelectionDAGTargetInfo.cpp StatepointLowering.cpp TargetLowering.cpp - + DEPENDS intrinsics_gen ) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 35d7ccb78c455..b79ff7f146d1f 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -328,7 +328,7 @@ namespace { SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); - SDValue visitAssertZext(SDNode *N); + SDValue visitAssertExt(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); @@ -415,6 +415,7 @@ namespace { SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); + SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); @@ -444,7 +445,6 @@ namespace { SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); - SDValue reduceBuildVecToTrunc(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); @@ -1553,7 +1553,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); - case ISD::AssertZext: return visitAssertZext(N); + case ISD::AssertSext: + case ISD::AssertZext: return visitAssertExt(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); @@ -7978,20 +7979,19 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDValue(); } -// TODO: These transforms should work with AssertSext too. -// Change the function name, comments, opcode references, and caller. -SDValue DAGCombiner::visitAssertZext(SDNode *N) { +SDValue DAGCombiner::visitAssertExt(SDNode *N) { + unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT AssertVT = cast(N1)->getVT(); - // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt) - if (N0.getOpcode() == ISD::AssertZext && + // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt) + if (N0.getOpcode() == Opcode && AssertVT == cast(N0.getOperand(1))->getVT()) return N0; if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && - N0.getOperand(0).getOpcode() == ISD::AssertZext) { + N0.getOperand(0).getOpcode() == Opcode) { // We have an assert, truncate, assert sandwich. Make one stronger assert // by asserting on the smallest asserted type to the larger source type. // This eliminates the later assert: @@ -8000,13 +8000,13 @@ SDValue DAGCombiner::visitAssertZext(SDNode *N) { SDValue BigA = N0.getOperand(0); EVT BigA_AssertVT = cast(BigA.getOperand(1))->getVT(); assert(BigA_AssertVT.bitsLE(N0.getValueType()) && - "Asserting zero/sign-extended bits from a type larger than the " + "Asserting zero/sign-extended bits to a type larger than the " "truncated destination does not provide information"); SDLoc DL(N); EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT; SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); - SDValue NewAssert = DAG.getNode(ISD::AssertZext, DL, BigA.getValueType(), + SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), BigA.getOperand(0), MinAssertVTVal); return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); } @@ -9095,7 +9095,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); - bool LookThroughFPExt = TLI.isFPExtFree(VT); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. @@ -9125,28 +9124,31 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (LookThroughFPExt) { - // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (isContractableFMUL(N00)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), N1); + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (isContractableFMUL(N00) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), N1); } + } - // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) - // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (isContractableFMUL(N10)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(1)), N0); + // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (isContractableFMUL(N10) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), N0); } } @@ -9182,80 +9184,87 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N0)); } - if (LookThroughFPExt) { - // fold (fadd (fma x, y, (fpext (fmul u, v))), z) - // -> (fma x, y, (fma (fpext u), (fpext v), z)) - auto FoldFAddFMAFPExtFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, U), - DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z)); - }; - if (N0.getOpcode() == PreferredFusedOpcode) { - SDValue N02 = N0.getOperand(2); - if (N02.getOpcode() == ISD::FP_EXTEND) { - SDValue N020 = N02.getOperand(0); - if (isContractableFMUL(N020)) - return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), - N020.getOperand(0), N020.getOperand(1), - N1); + + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + auto FoldFAddFMAFPExtFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (isContractableFMUL(N020) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { + return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), + N020.getOperand(0), N020.getOperand(1), + N1); } } + } - // fold (fadd (fpext (fma x, y, (fmul u, v))), z) - // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - auto FoldFAddFPExtFMAFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, X), - DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, U), - DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z)); - }; - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { - SDValue N002 = N00.getOperand(2); - if (isContractableFMUL(N002)) - return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), - N002.getOperand(0), N002.getOperand(1), - N1); + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + auto FoldFAddFPExtFMAFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, X), + DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (isContractableFMUL(N002) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), + N002.getOperand(0), N002.getOperand(1), + N1); } } + } - // fold (fadd x, (fma y, z, (fpext (fmul u, v))) - // -> (fma y, z, (fma (fpext u), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode) { - SDValue N12 = N1.getOperand(2); - if (N12.getOpcode() == ISD::FP_EXTEND) { - SDValue N120 = N12.getOperand(0); - if (isContractableFMUL(N120)) - return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), - N120.getOperand(0), N120.getOperand(1), - N0); + // fold (fadd x, (fma y, z, (fpext (fmul u, v))) + // -> (fma y, z, (fma (fpext u), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode) { + SDValue N12 = N1.getOperand(2); + if (N12.getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N12.getOperand(0); + if (isContractableFMUL(N120) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), + N120.getOperand(0), N120.getOperand(1), + N0); } } + } - // fold (fadd x, (fpext (fma y, z, (fmul u, v))) - // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == PreferredFusedOpcode) { - SDValue N102 = N10.getOperand(2); - if (isContractableFMUL(N102)) - return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), - N102.getOperand(0), N102.getOperand(1), - N0); + // fold (fadd x, (fpext (fma y, z, (fmul u, v))) + // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == PreferredFusedOpcode) { + SDValue N102 = N10.getOperand(2); + if (isContractableFMUL(N102) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), + N102.getOperand(0), N102.getOperand(1), + N0); } } } @@ -9297,7 +9306,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); - bool LookThroughFPExt = TLI.isFPExtFree(VT); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. @@ -9333,79 +9341,83 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (LookThroughFPExt) { - // fold (fsub (fpext (fmul x, y)), z) - // -> (fma (fpext x), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (isContractableFMUL(N00)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, N1)); + + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (isContractableFMUL(N00) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1)); } + } - // fold (fsub x, (fpext (fmul y, z))) - // -> (fma (fneg (fpext y)), (fpext z), x) - // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (isContractableFMUL(N10)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (isContractableFMUL(N10) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), + N0); + } + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fneg (fma (fpext x), (fpext y), z)) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (isContractableFMUL(N000) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(1)), - N0); - } - - // fold (fsub (fpext (fneg (fmul, x, y))), z) - // -> (fneg (fma (fpext x), (fpext y), z)) - // Note: This could be removed with appropriate canonicalization of the - // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the - // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent - // from implementing the canonicalization in visitFSUB. - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FNEG) { - SDValue N000 = N00.getOperand(0); - if (isContractableFMUL(N000)) { - return DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(1)), - N1)); - } + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); } } + } - // fold (fsub (fneg (fpext (fmul, x, y))), z) - // -> (fneg (fma (fpext x)), (fpext y), z) - // Note: This could be removed with appropriate canonicalization of the - // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the - // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent - // from implementing the canonicalization in visitFSUB. - if (N0.getOpcode() == ISD::FNEG) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FP_EXTEND) { - SDValue N000 = N00.getOperand(0); - if (isContractableFMUL(N000)) { - return DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(1)), - N1)); - } + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fneg (fma (fpext x)), (fpext y), z) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (isContractableFMUL(N000) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); } } - } // More folding opportunities when target permits. @@ -9444,102 +9456,108 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N21, N0)); } - if (LookThroughFPExt) { - // fold (fsub (fma x, y, (fpext (fmul u, v))), z) - // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode) { - SDValue N02 = N0.getOperand(2); - if (N02.getOpcode() == ISD::FP_EXTEND) { - SDValue N020 = N02.getOperand(0); - if (isContractableFMUL(N020)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N020.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N020.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, - N1))); - } - } - // fold (fsub (fpext (fma x, y, (fmul u, v))), z) - // -> (fma (fpext x), (fpext y), - // (fma (fpext u), (fpext v), (fneg z))) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { - SDValue N002 = N00.getOperand(2); - if (isContractableFMUL(N002)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N002.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N002.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, - N1))); - } - } - - // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) - // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { - SDValue N120 = N1.getOperand(2).getOperand(0); - if (isContractableFMUL(N120)) { - SDValue N1200 = N120.getOperand(0); - SDValue N1201 = N120.getOperand(1); + // fold (fsub (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (isContractableFMUL(N020) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), - N1.getOperand(1), + N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, - VT, N1200)), DAG.getNode(ISD::FP_EXTEND, SL, VT, - N1201), - N0)); + N020.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); } } + } - // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) - // -> (fma (fneg (fpext y)), (fpext z), - // (fma (fneg (fpext u)), (fpext v), x)) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - if (N1.getOpcode() == ISD::FP_EXTEND && - N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { - SDValue N100 = N1.getOperand(0).getOperand(0); - SDValue N101 = N1.getOperand(0).getOperand(1); - SDValue N102 = N1.getOperand(0).getOperand(2); - if (isContractableFMUL(N102)) { - SDValue N1020 = N102.getOperand(0); - SDValue N1021 = N102.getOperand(1); + // fold (fsub (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), + // (fma (fpext u), (fpext v), (fneg z))) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (isContractableFMUL(N002) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N100)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, - VT, N1020)), DAG.getNode(ISD::FP_EXTEND, SL, VT, - N1021), - N0)); + N002.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); } } } + + // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) + // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N1.getOperand(2).getOperand(0); + if (isContractableFMUL(N120) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + SDValue N1200 = N120.getOperand(0); + SDValue N1201 = N120.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1200)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1201), + N0)); + } + } + + // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) + // -> (fma (fneg (fpext y)), (fpext z), + // (fma (fneg (fpext u)), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + SDValue CvtSrc = N1.getOperand(0); + SDValue N100 = CvtSrc.getOperand(0); + SDValue N101 = CvtSrc.getOperand(1); + SDValue N102 = CvtSrc.getOperand(2); + if (isContractableFMUL(N102) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) { + SDValue N1020 = N102.getOperand(0); + SDValue N1021 = N102.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N100)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1020)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1021), + N0)); + } + } } return SDValue(); @@ -10702,6 +10720,19 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + // fold ftrunc (known rounded int x) -> x + // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is + // likely to be generated to extract integer from a rounded floating value. + switch (N0.getOpcode()) { + default: break; + case ISD::FRINT: + case ISD::FTRUNC: + case ISD::FNEARBYINT: + case ISD::FFLOOR: + case ISD::FCEIL: + return N0; + } + return SDValue(); } @@ -13734,6 +13765,60 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { return St1; } +/// Convert a disguised subvector insertion into a shuffle: +/// insert_vector_elt V, (bitcast X from vector type), IdxC --> +/// bitcast(shuffle (bitcast V), (extended X), Mask) +/// Note: We do not use an insert_subvector node because that requires a legal +/// subvector type. +SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { + SDValue InsertVal = N->getOperand(1); + if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || + !InsertVal.getOperand(0).getValueType().isVector()) + return SDValue(); + + SDValue SubVec = InsertVal.getOperand(0); + SDValue DestVec = N->getOperand(0); + EVT SubVecVT = SubVec.getValueType(); + EVT VT = DestVec.getValueType(); + unsigned NumSrcElts = SubVecVT.getVectorNumElements(); + unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); + unsigned NumMaskVals = ExtendRatio * NumSrcElts; + + // Step 1: Create a shuffle mask that implements this insert operation. The + // vector that we are inserting into will be operand 0 of the shuffle, so + // those elements are just 'i'. The inserted subvector is in the first + // positions of operand 1 of the shuffle. Example: + // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7} + SmallVector Mask(NumMaskVals); + for (unsigned i = 0; i != NumMaskVals; ++i) { + if (i / NumSrcElts == InsIndex) + Mask[i] = (i % NumSrcElts) + NumMaskVals; + else + Mask[i] = i; + } + + // Bail out if the target can not handle the shuffle we want to create. + EVT SubVecEltVT = SubVecVT.getVectorElementType(); + EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals); + if (!TLI.isShuffleMaskLegal(Mask, ShufVT)) + return SDValue(); + + // Step 2: Create a wide vector from the inserted source vector by appending + // undefined elements. This is the same size as our destination vector. + SDLoc DL(N); + SmallVector ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT)); + ConcatOps[0] = SubVec; + SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps); + + // Step 3: Shuffle in the padded subvector. + SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec); + SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask); + AddToWorklist(PaddedSubV.getNode()); + AddToWorklist(DestVecBC.getNode()); + AddToWorklist(Shuf.getNode()); + return DAG.getBitcast(VT, Shuf); +} + SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); @@ -13752,10 +13837,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) return InVec; - // Check that we know which element is being inserted - if (!isa(EltNo)) + // We must know which element is being inserted for folds below here. + auto *IndexC = dyn_cast(EltNo); + if (!IndexC) return SDValue(); - unsigned Elt = cast(EltNo)->getZExtValue(); + unsigned Elt = IndexC->getZExtValue(); + + if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) + return Shuf; // Canonicalize insert_vector_elt dag nodes. // Example: @@ -14615,93 +14704,6 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return Shuffles[0]; } -// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT -// operations which can be matched to a truncate or to a shuffle-truncate. -SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) { - // TODO: Add support for big-endian. - if (DAG.getDataLayout().isBigEndian()) - return SDValue(); - if (N->getNumOperands() < 2) - return SDValue(); - SDLoc DL(N); - EVT VT = N->getValueType(0); - unsigned NumElems = N->getNumOperands(); - - if (!isTypeLegal(VT)) - return SDValue(); - - // If the input is something other than an EXTRACT_VECTOR_ELT with a constant - // index, bail out. - // TODO: Allow undef elements in some cases? - if (llvm::any_of(N->ops(), [VT](SDValue Op) { - return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa(Op.getOperand(1)) || - Op.getValueType() != VT.getVectorElementType(); - })) - return SDValue(); - - // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index - auto GetExtractIdx = [](SDValue Extract) { - return cast(Extract.getOperand(1))->getSExtValue(); - }; - - // The offset is defined to be the BUILD_VECTOR's first operand (assuming no - // undef and little-endian). - int Offset = GetExtractIdx(N->getOperand(0)); - - // Compute the stride from the next operand. - int Stride = GetExtractIdx(N->getOperand(1)) - Offset; - SDValue ExtractedFromVec = N->getOperand(0).getOperand(0); - - // Proceed only if the stride and the types can be matched to a truncate. - if ((Stride == 1 || !isPowerOf2_32(Stride)) || - (ExtractedFromVec.getValueType().getVectorNumElements() != - Stride * NumElems) || - (VT.getScalarSizeInBits() * Stride > 64)) - return SDValue(); - - // Check remaining operands are consistent with the computed stride. - for (unsigned i = 1; i != NumElems; ++i) { - SDValue Op = N->getOperand(i); - - if ((Op.getOperand(0) != ExtractedFromVec) || - (GetExtractIdx(Op) != Stride * i + Offset)) - return SDValue(); - } - - SDValue Res = ExtractedFromVec; - EVT TruncVT = - VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; - if (Offset) { - // If the first index is non-zero, need to shuffle elements of interest to - // lower parts of the vector's elements the truncate will act upon. - // TODO: Generalize to compute the permute-shuffle that will prepare any - // element permutation for the truncate, and let the target decide if - // profitable. - EVT ExtractedVT = ExtractedFromVec.getValueType(); - SmallVector Mask; - for (unsigned i = 0; i != NumElems; ++i) { - Mask.push_back(Offset + i * Stride); - // Pad the elements that will be lost after the truncate with undefs. - Mask.append(Stride - 1, -1); - } - if (!TLI.isShuffleMaskLegal(Mask, ExtractedVT) || - !TLI.isDesirableToCombineBuildVectorToShuffleTruncate(Mask, ExtractedVT, - TruncVT)) - return SDValue(); - Res = DAG.getVectorShuffle(ExtractedVT, SDLoc(N), Res, - DAG.getUNDEF(ExtractedVT), Mask); - } - // Construct the truncate. - LLVMContext &Ctx = *DAG.getContext(); - EVT NewVT = VT.getVectorVT( - Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems); - - Res = DAG.getBitcast(NewVT, Res); - Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res); - return DAG.getBitcast(VT, Res); -} - SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -14744,10 +14746,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; - if (TLI.isDesirableToCombineBuildVectorToTruncate()) - if (SDValue V = reduceBuildVecToTrunc(N)) - return V; - if (SDValue V = reduceBuildVecToShuffle(N)) return V; @@ -15352,6 +15350,8 @@ static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0, // TODO - handle more cases as required. if (V.getOpcode() == ISD::BUILD_VECTOR) return V.getOperand(Idx).isUndef(); + if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) + return (Idx != 0) || V.getOperand(0).isUndef(); return false; }; @@ -15453,7 +15453,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { // // To deal with this, we currently use a bunch of mostly arbitrary heuristics. // We don't fold shuffles where one side is a non-zero constant, and we don't -// fold shuffles if the resulting BUILD_VECTOR would have duplicate +// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate // non-constant operands. This seems to work out reasonably well in practice. static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, @@ -15465,6 +15465,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, if (!N0->hasOneUse() || !N1->hasOneUse()) return SDValue(); + // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as // discussed above. if (!N1.isUndef()) { @@ -15476,6 +15477,15 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, return SDValue(); } + // If both inputs are splats of the same value then we can safely merge this + // to a single BUILD_VECTOR with undef elements based on the shuffle mask. + bool IsSplat = false; + auto *BV0 = dyn_cast(N0); + auto *BV1 = dyn_cast(N1); + if (BV0 && BV1) + if (SDValue Splat0 = BV0->getSplatValue()) + IsSplat = (Splat0 == BV1->getSplatValue()); + SmallVector Ops; SmallSet DuplicateOps; for (int M : SVN->getMask()) { @@ -15486,23 +15496,25 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, if (S.getOpcode() == ISD::BUILD_VECTOR) { Op = S.getOperand(Idx); } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { - if (Idx == 0) - Op = S.getOperand(0); + assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index."); + Op = S.getOperand(0); } else { // Operand can't be combined - bail out. return SDValue(); } } - // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is - // fine, but it's likely to generate low-quality code if the target can't - // reconstruct an appropriate shuffle. + // Don't duplicate a non-constant BUILD_VECTOR operand unless we're + // generating a splat; semantically, this is fine, but it's likely to + // generate low-quality code if the target can't reconstruct an appropriate + // shuffle. if (!Op.isUndef() && !isa(Op) && !isa(Op)) - if (!DuplicateOps.insert(Op).second) + if (!IsSplat && !DuplicateOps.insert(Op).second) return SDValue(); Ops.push_back(Op); } + // BUILD_VECTOR requires all inputs to be of the same type, find the // maximum type and extend them all. EVT SVT = VT.getScalarType(); @@ -15553,6 +15565,9 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for // power-of-2 extensions as they are the most likely. for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { + // Check for non power of 2 vector sizes + if (NumElts % Scale != 0) + continue; if (!isAnyExtend(Scale)) continue; diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 959735d66c4ac..491c56a7314d7 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -168,8 +168,7 @@ bool FastISel::hasTrivialKill(const Value *V) { // No-op casts are trivially coalesced by fast-isel. if (const auto *Cast = dyn_cast(I)) - if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && - !hasTrivialKill(Cast->getOperand(0))) + if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0))) return false; // Even the value might have only one use in the LLVM IR, it is possible that diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index ea207c71fe398..ff49134f7b997 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -408,6 +408,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + DEBUG(dbgs() << "Optimizing float store operations\n"); // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // FIXME: We shouldn't do this for TargetConstantFP's. // FIXME: move this to the DAG Combiner! Note that we can't regress due @@ -466,172 +467,174 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { } void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { - StoreSDNode *ST = cast(Node); - SDValue Chain = ST->getChain(); - SDValue Ptr = ST->getBasePtr(); - SDLoc dl(Node); - - unsigned Alignment = ST->getAlignment(); - MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); - AAMDNodes AAInfo = ST->getAAInfo(); - - if (!ST->isTruncatingStore()) { - if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - ReplaceNode(ST, OptStore); - return; - } + StoreSDNode *ST = cast(Node); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDLoc dl(Node); - { - SDValue Value = ST->getValue(); - MVT VT = Value.getSimpleValueType(); - switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: { - // If this is an unaligned store and the target doesn't support it, - // expand it. - EVT MemVT = ST->getMemoryVT(); - unsigned AS = ST->getAddressSpace(); - unsigned Align = ST->getAlignment(); - const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { - SDValue Result = TLI.expandUnalignedStore(ST, DAG); - ReplaceNode(SDValue(ST, 0), Result); - } - break; - } - case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res && Res != SDValue(Node, 0)) - ReplaceNode(SDValue(Node, 0), Res); - return; - } - case TargetLowering::Promote: { - MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); - assert(NVT.getSizeInBits() == VT.getSizeInBits() && - "Can only promote stores to same size type"); - Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); - ReplaceNode(SDValue(Node, 0), Result); - break; - } - } - return; - } + unsigned Alignment = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + + if (!ST->isTruncatingStore()) { + DEBUG(dbgs() << "Legalizing store operation\n"); + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + ReplaceNode(ST, OptStore); + return; + } + + SDValue Value = ST->getValue(); + MVT VT = Value.getSimpleValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + // If this is an unaligned store and the target doesn't support it, + // expand it. + EVT MemVT = ST->getMemoryVT(); + unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } else + DEBUG(dbgs() << "Legal store\n"); + break; + } + case TargetLowering::Custom: { + DEBUG(dbgs() << "Trying custom lowering\n"); + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res && Res != SDValue(Node, 0)) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Promote: { + MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote stores to same size type"); + Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + return; + } + + DEBUG(dbgs() << "Legalizing truncating store operations\n"); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + auto &DL = DAG.getDataLayout(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); + Value = DAG.getZeroExtendInReg(Value, dl, StVT); + SDValue Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, + Alignment, MMOFlags, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(!StVT.isVector() && "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (DL.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + RoundVT, Alignment, MMOFlags, AAInfo); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(RoundWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); + Hi = DAG.getTruncStore( + Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } else { - SDValue Value = ST->getValue(); - - EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); - auto &DL = DAG.getDataLayout(); - - if (StWidth != StVT.getStoreSizeInBits()) { - // Promote to a byte-sized store with upper bits zero if not - // storing an integral number of bytes. For example, promote - // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), - StVT.getStoreSizeInBits()); - Value = DAG.getZeroExtendInReg(Value, dl, StVT); - SDValue Result = - DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, - Alignment, MMOFlags, AAInfo); - ReplaceNode(SDValue(Node, 0), Result); - } else if (StWidth & (StWidth - 1)) { - // If not storing a power-of-2 number of bits, expand as two stores. - assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned RoundWidth = 1 << Log2_32(StWidth); - assert(RoundWidth < StWidth); - unsigned ExtraWidth = StWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Store size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi; - unsigned IncrementSize; - - if (DL.isLittleEndian()) { - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) - // Store the bottom RoundWidth bits. - Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); - Hi = DAG.getNode( - ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(RoundWidth, dl, - TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore( - Chain, dl, Hi, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); - } else { - // Big endian - avoid unaligned stores. - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X - // Store the top RoundWidth bits. - Hi = DAG.getNode( - ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(ExtraWidth, dl, - TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); - Lo = DAG.getTruncStore( - Chain, dl, Value, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); - } + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(ExtraWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), + RoundVT, Alignment, MMOFlags, AAInfo); - // The order of the stores doesn't matter. - SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - ReplaceNode(SDValue(Node, 0), Result); - } else { - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: { - EVT MemVT = ST->getMemoryVT(); - unsigned AS = ST->getAddressSpace(); - unsigned Align = ST->getAlignment(); - // If this is an unaligned store and the target doesn't support it, - // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { - SDValue Result = TLI.expandUnalignedStore(ST, DAG); - ReplaceNode(SDValue(ST, 0), Result); - } - break; - } - case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res && Res != SDValue(Node, 0)) - ReplaceNode(SDValue(Node, 0), Res); - return; - } - case TargetLowering::Expand: - assert(!StVT.isVector() && - "Vector Stores are handled in LegalizeVectorOps"); - - // TRUNCSTORE:i16 i32 -> STORE i16 - assert(TLI.isTypeLegal(StVT) && - "Do not know how to expand this store!"); - Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); - ReplaceNode(SDValue(Node, 0), Result); - break; - } + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Lo = DAG.getTruncStore( + Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + } + + // The order of the stores doesn't matter. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); + } else { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + EVT MemVT = ST->getMemoryVT(); + unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); } + break; + } + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res && Res != SDValue(Node, 0)) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Expand: + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); + + // TRUNCSTORE:i16 i32 -> STORE i16 + assert(TLI.isTypeLegal(StVT) && + "Do not know how to expand this store!"); + Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + break; } + } } void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { @@ -643,6 +646,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { + DEBUG(dbgs() << "Legalizing non-extending load operation\n"); MVT VT = Node->getSimpleValueType(0); SDValue RVal = SDValue(Node, 0); SDValue RChain = SDValue(Node, 1); @@ -692,6 +696,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { return; } + DEBUG(dbgs() << "Legalizing extending load operation\n"); EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); unsigned Alignment = LD->getAlignment(); @@ -966,7 +971,9 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); - if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + // Allow illegal target nodes and illegal registers. + if (Node->getOpcode() == ISD::TargetConstant || + Node->getOpcode() == ISD::Register) return; #ifndef NDEBUG @@ -980,7 +987,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || TLI.isTypeLegal(Op.getValueType()) || - Op.getOpcode() == ISD::TargetConstant) && + Op.getOpcode() == ISD::TargetConstant || + Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"); #endif @@ -1184,8 +1192,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } switch (Action) { case TargetLowering::Legal: + DEBUG(dbgs() << "Legal node: nothing to do\n"); return; case TargetLowering::Custom: + DEBUG(dbgs() << "Trying custom legalization\n"); // FIXME: The handling for custom lowering with multiple results is // a complete mess. if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { @@ -1193,6 +1203,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; if (Node->getNumValues() == 1) { + DEBUG(dbgs() << "Successfully custom legalized node\n"); // We can just directly replace this node with the lowered value. ReplaceNode(SDValue(Node, 0), Res); return; @@ -1201,9 +1212,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) ResultVals.push_back(Res.getValue(i)); + DEBUG(dbgs() << "Successfully custom legalized node\n"); ReplaceNode(Node, ResultVals.data()); return; } + DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; case TargetLowering::Expand: if (ExpandNode(Node)) @@ -2010,10 +2023,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::pair CallInfo = TLI.LowerCallTo(CLI); - if (!CallInfo.second.getNode()) + if (!CallInfo.second.getNode()) { + DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); + } + DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); return CallInfo.first; } @@ -2299,9 +2315,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl) { // TODO: Should any fast-math-flags be set for the created nodes? - + DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { - // simple 32-bit [signed|unsigned] integer to float/double expansion + DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " + "expansion\n"); // Get the stack frame index of a 8 byte buffer. SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); @@ -2366,6 +2383,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, // and in all alternate rounding modes. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { + DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); SDValue TwoP84PlusTwoP52 = @@ -2386,9 +2404,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } - // Implementation of unsigned i64 to f32. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); // For unsigned conversions, convert them to signed conversions using the // algorithm from the x86_64 __floatundidf in compiler_rt. if (!isSigned) { @@ -2758,7 +2776,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); case ISD::CTLZ: { EVT VT = Op.getValueType(); - unsigned len = VT.getSizeInBits(); + unsigned Len = VT.getSizeInBits(); if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { EVT SetCCVT = getSetCCResultType(VT); @@ -2766,7 +2784,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, - DAG.getConstant(len, dl, VT), CTLZ); + DAG.getConstant(Len, dl, VT), CTLZ); } // for now, we do this: @@ -2779,7 +2797,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // // Ref: "Hacker's Delight" by Henry Warren EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::OR, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); @@ -2791,11 +2809,22 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // This trivially expands to CTTZ. return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); case ISD::CTTZ: { + EVT VT = Op.getValueType(); + unsigned Len = VT.getSizeInBits(); + + if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { + EVT SetCCVT = getSetCCResultType(VT); + SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); + return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + DAG.getConstant(Len, dl, VT), CTTZ); + } + // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } // Ref: "Hacker's Delight" by Henry Warren - EVT VT = Op.getValueType(); SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), DAG.getNode(ISD::SUB, dl, VT, Op, @@ -2812,6 +2841,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { + DEBUG(dbgs() << "Trying to expand node\n"); SmallVector Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; @@ -3269,6 +3299,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; case ISD::FP_TO_FP16: + DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); @@ -3877,17 +3908,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (Results.empty()) + if (Results.empty()) { + DEBUG(dbgs() << "Cannot expand node\n"); return false; + } + DEBUG(dbgs() << "Succesfully expanded node\n"); ReplaceNode(Node, Results.data()); return true; } void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { + DEBUG(dbgs() << "Trying to convert node to libcall\n"); SmallVector Results; SDLoc dl(Node); - SDValue Tmp1, Tmp2, Tmp3, Tmp4; unsigned Opc = Node->getOpcode(); switch (Opc) { case ISD::ATOMIC_FENCE: { @@ -4139,8 +4173,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) + if (!Results.empty()) { + DEBUG(dbgs() << "Successfully converted node to libcall\n"); ReplaceNode(Node, Results.data()); + } else + DEBUG(dbgs() << "Could not convert node to libcall\n"); } // Determine the vector type to use in place of an original scalar element when @@ -4154,6 +4191,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI, } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + DEBUG(dbgs() << "Trying to promote node\n"); SmallVector Results; MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || @@ -4589,8 +4627,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) + if (!Results.empty()) { + DEBUG(dbgs() << "Successfully promoted node\n"); ReplaceNode(Node, Results.data()); + } else + DEBUG(dbgs() << "Could not promote node\n"); } /// This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 75fec7bd1d485..68cac22a99e66 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -40,8 +40,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { SDValue Res = SDValue(); // See if the target wants to custom expand this node. - if (CustomLowerNode(N, N->getValueType(ResNo), true)) + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + DEBUG(dbgs() << "Node has been custom expanded, done\n"); return; + } switch (N->getOpcode()) { default: @@ -885,8 +887,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); - if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { + DEBUG(dbgs() << "Node has been custom lowered, done\n"); return false; + } switch (N->getOpcode()) { default: diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index f76363adb99dd..b42edf8e751a5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -226,15 +226,21 @@ bool DAGTypeLegalizer::run() { assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); - if (IgnoreNodeResults(N)) + DEBUG(dbgs() << "Legalizing node: "; N->dump()); + if (IgnoreNodeResults(N)) { + DEBUG(dbgs() << "Ignoring node results\n"); goto ScanOperands; + } // Scan the values produced by the node, checking to see if any result // types are illegal. for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { EVT ResultVT = N->getValueType(i); + DEBUG(dbgs() << "Analyzing result type: " << + ResultVT.getEVTString() << "\n"); switch (getTypeAction(ResultVT)) { case TargetLowering::TypeLegal: + DEBUG(dbgs() << "Legal result type\n"); break; // The following calls must take care of *all* of the node's results, // not just the illegal result they were passed (this includes results @@ -291,9 +297,12 @@ bool DAGTypeLegalizer::run() { if (IgnoreNodeResults(N->getOperand(i).getNode())) continue; - EVT OpVT = N->getOperand(i).getValueType(); + const auto Op = N->getOperand(i); + DEBUG(dbgs() << "Analyzing operand: "; Op.dump()); + EVT OpVT = Op.getValueType(); switch (getTypeAction(OpVT)) { case TargetLowering::TypeLegal: + DEBUG(dbgs() << "Legal operand\n"); continue; // The following calls must either replace all of the node's results // using ReplaceValueWith, and return "false"; or update the node's @@ -864,8 +873,13 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, AnalyzeNewValue(Hi); // Transfer debug values. - transferDbgValues(DAG, Op, Lo, 0); - transferDbgValues(DAG, Op, Hi, Lo.getValueSizeInBits()); + if (DAG.getDataLayout().isBigEndian()) { + transferDbgValues(DAG, Op, Hi, 0); + transferDbgValues(DAG, Op, Lo, Hi.getValueSizeInBits()); + } else { + transferDbgValues(DAG, Op, Lo, 0); + transferDbgValues(DAG, Op, Hi, Lo.getValueSizeInBits()); + } // Remember that this is the result of the node. std::pair &Entry = ExpandedIntegers[Op]; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c46d1b04804c9..094afe2830b8e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -89,7 +89,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { /// Pretend all of this node's results are legal. bool IgnoreNodeResults(SDNode *N) const { - return N->getOpcode() == ISD::TargetConstant; + return N->getOpcode() == ISD::TargetConstant || + N->getOpcode() == ISD::Register; } /// For integer nodes that are below legal width, this map indicates what @@ -400,18 +401,22 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { /// Given an operand Op of Float type, returns the integer if the Op is not /// supported in target HW and converted to the integer. /// The integer contains exactly the same bits as Op - only the type changed. - /// For example, if Op is an f32 which was softened to an i32, then this method - /// returns an i32, the bits of which coincide with those of Op. + /// For example, if Op is an f32 which was softened to an i32, then this + /// method returns an i32, the bits of which coincide with those of Op. /// If the Op can be efficiently supported in target HW or the operand must /// stay in a register, the Op is not converted to an integer. /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { - SDValue &SoftenedOp = SoftenedFloats[Op]; - if (!SoftenedOp.getNode() && - isSimpleLegalType(Op.getValueType())) + auto Iter = SoftenedFloats.find(Op); + if (Iter == SoftenedFloats.end()) { + assert(isSimpleLegalType(Op.getValueType()) && + "Operand wasn't converted to integer?"); return Op; + } + + SDValue &SoftenedOp = Iter->second; + assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?"); RemapValue(SoftenedOp); - assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f826fa0510860..5d6c4998ecd5c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1955,7 +1955,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); - SDValue Lo, Hi; + SDValue Lo; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), @@ -2941,7 +2941,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), ExtType, - N->isExpandingLoad()); + N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index cd5b4c12f1dc6..cf92907a8b5f9 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -25,24 +25,23 @@ class DIExpression; class SDNode; class Value; -/// SDDbgValue - Holds the information from a dbg_value node through SDISel. +/// Holds the information from a dbg_value node through SDISel. /// We do not use SDValue here to avoid including its header. - class SDDbgValue { public: enum DbgValueKind { - SDNODE = 0, // value is the result of an expression - CONST = 1, // value is a constant - FRAMEIX = 2 // value is contents of a stack location + SDNODE = 0, ///< Value is the result of an expression. + CONST = 1, ///< Value is a constant. + FRAMEIX = 2 ///< Value is contents of a stack location. }; private: union { struct { - SDNode *Node; // valid for expressions - unsigned ResNo; // valid for expressions + SDNode *Node; ///< Valid for expressions. + unsigned ResNo; ///< Valid for expressions. } s; - const Value *Const; // valid for constants - unsigned FrameIx; // valid for stack objects + const Value *Const; ///< Valid for constants. + unsigned FrameIx; ///< Valid for stack objects. } u; DIVariable *Var; DIExpression *Expr; @@ -53,7 +52,7 @@ class SDDbgValue { bool Invalid = false; public: - // Constructor for non-constants. + /// Constructor for non-constants. SDDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R, bool indir, DebugLoc dl, unsigned O) : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(indir) { @@ -62,7 +61,7 @@ class SDDbgValue { u.s.ResNo = R; } - // Constructor for constants. + /// Constructor for constants. SDDbgValue(DIVariable *Var, DIExpression *Expr, const Value *C, DebugLoc dl, unsigned O) : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { @@ -70,7 +69,7 @@ class SDDbgValue { u.Const = C; } - // Constructor for frame indices. + /// Constructor for frame indices. SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl, unsigned O) : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { @@ -78,40 +77,40 @@ class SDDbgValue { u.FrameIx = FI; } - // Returns the kind. + /// Returns the kind. DbgValueKind getKind() const { return kind; } - // Returns the DIVariable pointer for the variable. + /// Returns the DIVariable pointer for the variable. DIVariable *getVariable() const { return Var; } - // Returns the DIExpression pointer for the expression. + /// Returns the DIExpression pointer for the expression. DIExpression *getExpression() const { return Expr; } - // Returns the SDNode* for a register ref + /// Returns the SDNode* for a register ref SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; } - // Returns the ResNo for a register ref + /// Returns the ResNo for a register ref unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; } - // Returns the Value* for a constant + /// Returns the Value* for a constant const Value *getConst() const { assert (kind==CONST); return u.Const; } - // Returns the FrameIx for a stack object + /// Returns the FrameIx for a stack object unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } - // Returns whether this is an indirect value. + /// Returns whether this is an indirect value. bool isIndirect() const { return IsIndirect; } - // Returns the DebugLoc. + /// Returns the DebugLoc. DebugLoc getDebugLoc() const { return DL; } - // Returns the SDNodeOrder. This is the order of the preceding node in the - // input. + /// Returns the SDNodeOrder. This is the order of the preceding node in the + /// input. unsigned getOrder() const { return Order; } - // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" - // property. A SDDbgValue is invalid if the SDNode that produces the value is - // deleted. + /// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" + /// property. A SDDbgValue is invalid if the SDNode that produces the value is + /// deleted. void setIsInvalidated() { Invalid = true; } bool isInvalidated() const { return Invalid; } }; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 53ef28e473ae0..9820292562936 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1,4 +1,4 @@ -//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// +//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===// // // The LLVM Compiler Infrastructure // @@ -16,23 +16,47 @@ //===----------------------------------------------------------------------===// #include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" @@ -46,6 +70,7 @@ static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler); + static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " @@ -105,6 +130,7 @@ static cl::opt AvgIPC( cl::desc("Average inst/cycle whan no target itinerary exists.")); namespace { + //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler /// implementation. This supports both top-down and bottom-up scheduling. @@ -112,7 +138,6 @@ namespace { class ScheduleDAGRRList : public ScheduleDAGSDNodes { private: /// NeedLatency - True if the scheduler will make use of latency information. - /// bool NeedLatency; /// AvailableQueue - The priority queue to use for the available SUnits. @@ -122,13 +147,13 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { /// been issued, but their results are not ready yet (due to the latency of /// the operation). Once the operands becomes available, the instruction is /// added to the AvailableQueue. - std::vector PendingQueue; + std::vector PendingQueue; /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; /// CurCycle - The current scheduler state corresponds to this cycle. - unsigned CurCycle; + unsigned CurCycle = 0; /// MinAvailableCycle - Cycle of the soonest available instruction. unsigned MinAvailableCycle; @@ -147,7 +172,9 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { // Collect interferences between physical register use/defs. // Each interference is an SUnit and set of physical registers. SmallVector Interferences; - typedef DenseMap > LRegsMapT; + + using LRegsMapT = DenseMap>; + LRegsMapT LRegsMap; /// Topo - A topological ordering for SUnits which permits fast IsReachable @@ -163,9 +190,8 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { SchedulingPriorityQueue *availqueue, CodeGenOpt::Level OptLevel) : ScheduleDAGSDNodes(mf), - NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), + NeedLatency(needlatency), AvailableQueue(availqueue), Topo(SUnits, nullptr) { - const TargetSubtargetInfo &STI = mf.getSubtarget(); if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); @@ -267,6 +293,7 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { return !NeedLatency; } }; + } // end anonymous namespace /// GetCostForDef - Looks up the register class and cost for a given definition. @@ -325,7 +352,8 @@ void ScheduleDAGRRList::Schedule() { CurCycle = 0; IssueCount = 0; - MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; + MinAvailableCycle = + DisableSchedCycles ? 0 : std::numeric_limits::max(); NumLiveRegs = 0; // Allocate slots for each physical register, plus one for a special register // to track the virtual resource of a calling sequence. @@ -409,7 +437,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII) { SDNode *N = Outer; - for (;;) { + while (true) { if (N == Inner) return true; // For a TokenFactor, examine each operand. There may be multiple ways @@ -456,7 +484,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner, static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII) { - for (;;) { + while (true) { // For a TokenFactor, examine each operand. There may be multiple ways // to get to the CALLSEQ_BEGIN, but we need to find the path with the // most nesting in order to ensure that we find the corresponding match. @@ -572,7 +600,7 @@ void ScheduleDAGRRList::ReleasePending() { // If the available queue is empty, it is safe to reset MinAvailableCycle. if (AvailableQueue->empty()) - MinAvailableCycle = UINT_MAX; + MinAvailableCycle = std::numeric_limits::max(); // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. @@ -792,7 +820,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { AvailableQueue->remove(PredSU); } - assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + assert(PredSU->NumSuccsLeft < std::numeric_limits::max() && + "NumSuccsLeft will overflow!"); ++PredSU->NumSuccsLeft; } @@ -898,7 +927,7 @@ void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { if (LookAhead == 0) return; - std::vector::const_iterator I = (Sequence.end() - LookAhead); + std::vector::const_iterator I = (Sequence.end() - LookAhead); unsigned HazardCycle = (*I)->getHeight(); for (auto E = Sequence.end(); I != E; ++I) { SUnit *SU = *I; @@ -1432,7 +1461,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Try unscheduling up to the point where it's safe to schedule // this node. SUnit *BtSU = nullptr; - unsigned LiveCycle = UINT_MAX; + unsigned LiveCycle = std::numeric_limits::max(); for (unsigned Reg : LRegs) { if (LiveRegGens[Reg]->getHeight() < LiveCycle) { BtSU = LiveRegGens[Reg]; @@ -1552,7 +1581,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { while (AvailableQueue->empty() && !PendingQueue.empty()) { // Advance the cycle to free resources. Skip ahead to the next ready SU. - assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); + assert(MinAvailableCycle < std::numeric_limits::max() && + "MinAvailableCycle uninitialized"); AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); } } @@ -1565,14 +1595,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { #endif } -//===----------------------------------------------------------------------===// -// RegReductionPriorityQueue Definition -//===----------------------------------------------------------------------===// -// -// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers -// to reduce register pressure. -// namespace { + class RegReductionPQBase; struct queue_sort { @@ -1583,6 +1607,7 @@ struct queue_sort { template struct reverse_sort : public queue_sort { SF &SortFunc; + reverse_sort(SF &sf) : SortFunc(sf) {} bool operator()(SUnit* left, SUnit* right) const { @@ -1602,6 +1627,7 @@ struct bu_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; + bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool operator()(SUnit* left, SUnit* right) const; @@ -1615,8 +1641,8 @@ struct src_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; - src_ls_rr_sort(RegReductionPQBase *spq) - : SPQ(spq) {} + + src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1629,8 +1655,8 @@ struct hybrid_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; - hybrid_ls_rr_sort(RegReductionPQBase *spq) - : SPQ(spq) {} + + hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1646,8 +1672,8 @@ struct ilp_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; - ilp_ls_rr_sort(RegReductionPQBase *spq) - : SPQ(spq) {} + + ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1656,8 +1682,8 @@ struct ilp_ls_rr_sort : public queue_sort { class RegReductionPQBase : public SchedulingPriorityQueue { protected: - std::vector Queue; - unsigned CurQueueId; + std::vector Queue; + unsigned CurQueueId = 0; bool TracksRegPressure; bool SrcOrder; @@ -1668,13 +1694,12 @@ class RegReductionPQBase : public SchedulingPriorityQueue { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const TargetLowering *TLI; - ScheduleDAGRRList *scheduleDAG; + ScheduleDAGRRList *scheduleDAG = nullptr; // SethiUllmanNumbers - The SethiUllman number for each node. std::vector SethiUllmanNumbers; /// RegPressure - Tracking current reg pressure per register class. - /// std::vector RegPressure; /// RegLimit - Tracking the number of allocatable registers per register @@ -1689,9 +1714,8 @@ class RegReductionPQBase : public SchedulingPriorityQueue { const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) - : SchedulingPriorityQueue(hasReadyFilter), - CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), - MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) { + : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp), + SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); RegLimit.resize(NumRC); @@ -1742,7 +1766,7 @@ class RegReductionPQBase : public SchedulingPriorityQueue { void remove(SUnit *SU) override { assert(!Queue.empty() && "Queue is empty!"); assert(SU->NodeQueueId != 0 && "Not in queue!"); - std::vector::iterator I = find(Queue, SU); + std::vector::iterator I = llvm::find(Queue, SU); if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); @@ -1771,7 +1795,7 @@ class RegReductionPQBase : public SchedulingPriorityQueue { }; template -static SUnit *popFromQueueImpl(std::vector &Q, SF &Picker) { +static SUnit *popFromQueueImpl(std::vector &Q, SF &Picker) { std::vector::iterator Best = Q.begin(); for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I) if (Picker(*Best, *I)) @@ -1784,7 +1808,7 @@ static SUnit *popFromQueueImpl(std::vector &Q, SF &Picker) { } template -SUnit *popFromQueue(std::vector &Q, SF &Picker, ScheduleDAG *DAG) { +SUnit *popFromQueue(std::vector &Q, SF &Picker, ScheduleDAG *DAG) { #ifndef NDEBUG if (DAG->StressSched) { reverse_sort RPicker(Picker); @@ -1795,6 +1819,13 @@ SUnit *popFromQueue(std::vector &Q, SF &Picker, ScheduleDAG *DAG) { return popFromQueueImpl(Q, Picker); } +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Definition +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// template class RegReductionPriorityQueue : public RegReductionPQBase { SF Picker; @@ -1827,7 +1858,7 @@ class RegReductionPriorityQueue : public RegReductionPQBase { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override { // Emulate pop() without clobbering NodeQueueIds. - std::vector DumpQueue = Queue; + std::vector DumpQueue = Queue; SF DumpPicker = Picker; while (!DumpQueue.empty()) { SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); @@ -1838,17 +1869,11 @@ class RegReductionPriorityQueue : public RegReductionPQBase { #endif }; -typedef RegReductionPriorityQueue -BURegReductionPriorityQueue; - -typedef RegReductionPriorityQueue -SrcRegReductionPriorityQueue; +using BURegReductionPriorityQueue = RegReductionPriorityQueue; +using SrcRegReductionPriorityQueue = RegReductionPriorityQueue; +using HybridBURRPriorityQueue = RegReductionPriorityQueue; +using ILPBURRPriorityQueue = RegReductionPriorityQueue; -typedef RegReductionPriorityQueue -HybridBURRPriorityQueue; - -typedef RegReductionPriorityQueue -ILPBURRPriorityQueue; } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -2867,7 +2892,6 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, /// This results in the store being scheduled immediately /// after N, which shortens the U->N live range, reducing /// register pressure. -/// void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Visit all the nodes in topological order, working top-down. for (SUnit &SU : *SUnits) { @@ -3034,7 +3058,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { // Public Constructor Functions //===----------------------------------------------------------------------===// -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); @@ -3048,7 +3072,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, return SD; } -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); @@ -3062,7 +3086,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, return SD; } -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); @@ -3078,7 +3102,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, return SD; } -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 6eebba19e5c3a..7ddb0dc07fd5e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -709,18 +709,17 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // source order number as N. MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - ArrayRef DVs = DAG->GetDbgValues(N); - for (unsigned i = 0, e = DVs.size(); i != e; ++i) { - if (DVs[i]->isInvalidated()) + for (auto DV : DAG->GetDbgValues(N)) { + if (DV->isInvalidated()) continue; - unsigned DVOrder = DVs[i]->getOrder(); + unsigned DVOrder = DV->getOrder(); if (!Order || DVOrder == Order) { - MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); + MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap); if (DbgMI) { Orders.push_back({DVOrder, DbgMI}); BB->insert(InsertPos, DbgMI); } - DVs[i]->setIsInvalidated(); + DV->setIsInvalidated(); } } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0e1bff80b10dd..dd5e1e5a3ee52 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1027,7 +1027,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " "the vector type!"); - if (Op.getValueType() == VT) return Op; + if (Op.getValueType().getScalarType() == VT) return Op; unsigned BitWidth = Op.getScalarValueSizeInBits(); APInt Imm = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); @@ -1486,7 +1486,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, // Validate that all indices in Mask are within the range of the elements // input to the shuffle. int NElts = Mask.size(); - assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) && + assert(llvm::all_of(Mask, + [&](int M) { return M < (NElts * 2) && M >= -1; }) && "Index out of range"); // Copy the mask so we can do any needed cleanup. @@ -2088,6 +2089,14 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, unsigned BitWidth = Op.getScalarValueSizeInBits(); Known = KnownBits(BitWidth); // Don't know anything. + + if (auto *C = dyn_cast(Op)) { + // We know all of the bits for a constant! + Known.One = C->getAPIntValue(); + Known.Zero = ~Known.One; + return; + } + if (Depth == 6) return; // Limit search depth. @@ -2099,11 +2108,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, unsigned Opcode = Op.getOpcode(); switch (Opcode) { - case ISD::Constant: - // We know all of the bits for a constant! - Known.One = cast(Op)->getAPIntValue(); - Known.Zero = ~Known.One; - break; case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every demanded vector element. assert(NumElts == Op.getValueType().getVectorNumElements() && @@ -2128,7 +2132,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero &= Known2.Zero; // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; } break; @@ -2166,7 +2170,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; if (!!DemandedRHS) { SDValue RHS = Op.getOperand(1); @@ -2192,7 +2196,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; } break; @@ -2276,7 +2280,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; } } @@ -2349,7 +2353,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, case ISD::SELECT: computeKnownBits(Op.getOperand(2), Known, Depth+1); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; computeKnownBits(Op.getOperand(1), Known2, Depth+1); @@ -2360,7 +2364,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, case ISD::SELECT_CC: computeKnownBits(Op.getOperand(3), Known, Depth+1); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; computeKnownBits(Op.getOperand(2), Known2, Depth+1); @@ -2838,7 +2842,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); Known.Zero &= Known2.Zero; @@ -2866,7 +2870,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); } SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, @@ -2962,6 +2966,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; + if (auto *C = dyn_cast(Op)) { + const APInt &Val = C->getAPIntValue(); + return Val.getNumSignBits(); + } + if (Depth == 6) return 1; // Limit search depth. @@ -2977,11 +2986,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = cast(Op.getOperand(1))->getVT().getSizeInBits(); return VTBits-Tmp; - case ISD::Constant: { - const APInt &Val = cast(Op)->getAPIntValue(); - return Val.getNumSignBits(); - } - case ISD::BUILD_VECTOR: Tmp = VTBits; for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { @@ -3105,6 +3109,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; case ISD::SELECT: + case ISD::VSELECT: Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1); if (Tmp == 1) return 1; // Early out. Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); @@ -6973,6 +6978,40 @@ SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O); } +void SelectionDAG::salvageDebugInfo(SDNode &N) { + if (!N.getHasDebugValue()) + return; + for (auto DV : GetDbgValues(&N)) { + if (DV->isInvalidated()) + continue; + switch (N.getOpcode()) { + default: + break; + case ISD::ADD: + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + if (!isConstantIntBuildVectorOrConstantInt(N0) && + isConstantIntBuildVectorOrConstantInt(N1)) { + uint64_t Offset = N.getConstantOperandVal(1); + // Rewrite an ADD constant node into a DIExpression. Since we are + // performing arithmetic to compute the variable's *value* in the + // DIExpression, we need to mark the expression with a + // DW_OP_stack_value. + auto *DIExpr = DV->getExpression(); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, + DIExpression::WithStackValue); + SDDbgValue *Clone = + getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), + DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); + DV->setIsInvalidated(); + AddDbgValue(Clone, N0.getNode(), false); + DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); + dbgs() << " into " << *DIExpr << '\n'); + } + } + } +} + namespace { /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node @@ -7387,17 +7426,14 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { DbgInfo->add(DB, SD, isParameter); } -/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes. +/// Transfer SDDbgValues. Called in replace nodes. void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { if (From == To || !From.getNode()->getHasDebugValue()) return; SDNode *FromNode = From.getNode(); SDNode *ToNode = To.getNode(); - ArrayRef DVs = GetDbgValues(FromNode); SmallVector ClonedDVs; - for (ArrayRef::iterator I = DVs.begin(), E = DVs.end(); - I != E; ++I) { - SDDbgValue *Dbg = *I; + for (auto *Dbg : GetDbgValues(FromNode)) { // Only add Dbgvalues attached to same ResNo. if (Dbg->getKind() == SDDbgValue::SDNODE && Dbg->getSDNode() == From.getNode() && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index df49b0474f3ce..ef713186d62bb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -935,7 +935,24 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); - unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + if (Code == InlineAsm::Kind_Clobber) { + // Clobbers should always have a 1:1 mapping with registers, and may + // reference registers that have illegal (e.g. vector) types. Hence, we + // shouldn't try to apply any sort of splitting logic to them. + assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && + "No 1:1 mapping from clobbers to regs?"); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + (void)SP; + for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) { + Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I])); + assert( + (Regs[I] != SP || + DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) && + "If we clobbered the stack pointer, MFI should know about it."); + } + return; + } + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; @@ -943,11 +960,6 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); - - if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { - // If we clobbered the stack pointer, MFI should know about it. - assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()); - } } } } @@ -8077,6 +8089,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.NumFixedArgs += 1; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 3dd58975b1f10..1550347f0063b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// +//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===// // // The LLVM Compiler Infrastructure // @@ -11,24 +11,42 @@ // //===----------------------------------------------------------------------===// -#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include + using namespace llvm; static cl::opt @@ -385,6 +403,7 @@ static Printable PrintNodeId(const SDNode &Node) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); } + LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); dbgs() << '\n'; @@ -402,6 +421,36 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { } void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (getFlags().hasNoUnsignedWrap()) + OS << " nuw"; + + if (getFlags().hasNoSignedWrap()) + OS << " nsw"; + + if (getFlags().hasExact()) + OS << " exact"; + + if (getFlags().hasUnsafeAlgebra()) + OS << " unsafe"; + + if (getFlags().hasNoNaNs()) + OS << " nnan"; + + if (getFlags().hasNoInfs()) + OS << " ninf"; + + if (getFlags().hasNoSignedZeros()) + OS << " nsz"; + + if (getFlags().hasAllowReciprocal()) + OS << " arcp"; + + if (getFlags().hasAllowContract()) + OS << " contract"; + + if (getFlags().hasVectorReduction()) + OS << " vector-reduction"; + if (const MachineSDNode *MN = dyn_cast(this)) { if (!MN->memoperands_empty()) { OS << "<"; @@ -429,9 +478,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { } else if (const ConstantSDNode *CSDN = dyn_cast(this)) { OS << '<' << CSDN->getAPIntValue() << '>'; } else if (const ConstantFPSDNode *CSDN = dyn_cast(this)) { - if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle()) + if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEsingle()) OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; - else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble()) + else if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; else { OS << " VisitedSDNodeSet; +using VisitedSDNodeSet = SmallPtrSet; + static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { if (!once.insert(N).second) // If we've been here before, return now. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 15d06871e70d9..4c4d196427e2f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -26,7 +26,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -494,10 +494,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DenseMap LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) - for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), - E = RegInfo->livein_end(); LI != E; ++LI) - if (LI->second) - LiveInMap.insert(std::make_pair(LI->first, LI->second)); + for (std::pair LI : RegInfo->liveins()) + if (LI.second) + LiveInMap.insert(LI); // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { @@ -3551,6 +3550,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, "NodeToMatch was removed partway through selection"); SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N, SDNode *E) { + CurDAG->salvageDebugInfo(*N); auto &Chain = ChainNodesMatched; assert((!E || !is_contained(Chain, N)) && "Chain node replaced during MorphNode"); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f6d14a8546c05..fe553bc986ae9 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -458,7 +458,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, // If Old has more than one use then it must be Op, because the // AssumeSingleUse flag is not propogated to recursive calls of // SimplifyDemanded bits, so the only node with multiple use that - // it will attempt to combine will be opt. + // it will attempt to combine will be Op. assert(TLO.Old == Op); SmallVector NewOps; @@ -469,7 +469,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, } NewOps.push_back(User->getOperand(i)); } - TLO.DAG.UpdateNodeOperands(User, NewOps); + User = TLO.DAG.UpdateNodeOperands(User, NewOps); // Op has less users now, so we may be able to perform additional combines // with it. DCI.AddToWorklist(Op.getNode()); @@ -479,7 +479,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, return true; } -bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, +bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -516,6 +516,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Don't know anything. Known = KnownBits(BitWidth); + if (Op.getOpcode() == ISD::Constant) { + // We know all of the bits for a constant! + Known.One = cast(Op)->getAPIntValue(); + Known.Zero = ~Known.One; + return false; + } + // Other users may use these bits. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { @@ -538,11 +545,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownBits Known2, KnownOut; switch (Op.getOpcode()) { - case ISD::Constant: - // We know all of the bits for a constant! - Known.One = cast(Op)->getAPIntValue(); - Known.Zero = ~Known.One; - return false; // Don't fall through, will infinitely loop. case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every constant vector element. Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -986,15 +988,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; case ISD::SIGN_EXTEND_INREG: { EVT ExVT = cast(Op.getOperand(1))->getVT(); + unsigned ExVTBits = ExVT.getScalarSizeInBits(); - APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); // If we only care about the highest bit, don't bother shifting right. - if (MsbMask == NewMask) { - unsigned ShAmt = ExVT.getScalarSizeInBits(); + if (NewMask.isSignMask()) { SDValue InOp = Op.getOperand(0); - unsigned VTBits = Op->getValueType(0).getScalarSizeInBits(); bool AlreadySignExtended = - TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1; + TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. if (!AlreadySignExtended) { @@ -1004,7 +1004,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); - SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl, + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, Op.getValueType(), InOp, @@ -1012,26 +1012,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - // Sign extension. Compute the demanded bits in the result that are not - // present in the input. - APInt NewBits = - APInt::getHighBitsSet(BitWidth, - BitWidth - ExVT.getScalarSizeInBits()); - // If none of the extended bits are demanded, eliminate the sextinreg. - if ((NewBits & NewMask) == 0) + if (NewMask.getActiveBits() <= ExVTBits) return TLO.CombineTo(Op, Op.getOperand(0)); - APInt InSignBit = - APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth); - APInt InputDemandedBits = - APInt::getLowBitsSet(BitWidth, - ExVT.getScalarSizeInBits()) & - NewMask; + APInt InputDemandedBits = NewMask.getLoBits(ExVTBits); // Since the sign extended bits are demanded, we know that the sign // bit is demanded. - InputDemandedBits |= InSignBit; + InputDemandedBits.setBit(ExVTBits - 1); if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, Known, TLO, Depth+1)) @@ -1042,16 +1031,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // top bits of the result. // If the input sign bit is known zero, convert this into a zero extension. - if (Known.Zero.intersects(InSignBit)) + if (Known.Zero[ExVTBits - 1]) return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg( Op.getOperand(0), dl, ExVT.getScalarType())); - if (Known.One.intersects(InSignBit)) { // Input sign bit known set - Known.One |= NewBits; - Known.Zero &= ~NewBits; + APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits); + if (Known.One[ExVTBits - 1]) { // Input sign bit known set + Known.One.setBitsFrom(ExVTBits); + Known.Zero &= Mask; } else { // Input sign bit unknown - Known.Zero &= ~NewBits; - Known.One &= ~NewBits; + Known.Zero &= Mask; + Known.One &= Mask; } break; } @@ -1079,61 +1069,47 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } case ISD::ZERO_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); - APInt InMask = NewMask.trunc(OperandBitWidth); // If none of the top bits are demanded, convert this into an any_extend. - APInt NewBits = - APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; - if (!NewBits.intersects(NewMask)) + if (NewMask.getActiveBits() <= OperandBitWidth) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); + APInt InMask = NewMask.trunc(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known = Known.zext(BitWidth); - Known.Zero |= NewBits; + Known.Zero.setBitsFrom(OperandBitWidth); break; } case ISD::SIGN_EXTEND: { - EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); - APInt InSignBit = APInt::getOneBitSet(BitWidth, InBits - 1); - APInt NewBits = ~InMask & NewMask; + unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits(); // If none of the top bits are demanded, convert this into an any_extend. - if (NewBits == 0) + if (NewMask.getActiveBits() <= InBits) return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); // Since some of the sign extended bits are demanded, we know that the sign // bit is demanded. - APInt InDemandedBits = InMask & NewMask; - InDemandedBits |= InSignBit; - InDemandedBits = InDemandedBits.trunc(InBits); + APInt InDemandedBits = NewMask.trunc(InBits); + InDemandedBits.setBit(InBits - 1); if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO, Depth+1)) return true; - Known = Known.zext(BitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + // If the sign bit is known one, the top bits match. + Known = Known.sext(BitWidth); // If the sign bit is known zero, convert this to a zero extend. - if (Known.Zero.intersects(InSignBit)) + if (Known.isNonNegative()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); - - // If the sign bit is known one, the top bits match. - if (Known.One.intersects(InSignBit)) { - Known.One |= NewBits; - assert((Known.Zero & NewBits) == 0); - } else { // Otherwise, top bits aren't known. - assert((Known.One & NewBits) == 0); - assert((Known.Zero & NewBits) == 0); - } break; } case ISD::ANY_EXTEND: { @@ -3481,6 +3457,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { EVT VT = LD->getValueType(0); EVT LoadedVT = LD->getMemoryVT(); SDLoc dl(LD); + auto &MF = DAG.getMachineFunction(); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { @@ -3511,7 +3488,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Make sure the stack slot is also aligned for the register type. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - + auto FrameIndex = cast(StackBase.getNode())->getIndex(); SmallVector Stores; SDValue StackPtr = StackBase; unsigned Offset = 0; @@ -3530,8 +3507,9 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo())); + Stores.push_back(DAG.getStore( + Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset))); // Increment the pointers. Offset += RegBytes; Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); @@ -3550,15 +3528,17 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo(), MemVT)); + Stores.push_back(DAG.getTruncStore( + Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT)); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - MachinePointerInfo(), LoadedVT); + MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), + LoadedVT); // Callers expect a MERGE_VALUES node. return std::make_pair(Load, TF); @@ -3628,6 +3608,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); + auto &MF = DAG.getMachineFunction(); SDLoc dl(ST); if (ST->getMemoryVT().isFloatingPoint() || @@ -3662,10 +3643,12 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, // Make sure the stack slot is also aligned for the register type. SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + auto FrameIndex = cast(StackPtr.getNode())->getIndex(); // Perform the original store, only redirected to the stack slot. - SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, - MachinePointerInfo(), StoredVT); + SDValue Store = DAG.getTruncStore( + Chain, dl, Val, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT); EVT StackPtrVT = StackPtr.getValueType(); @@ -3677,8 +3660,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, // Do all but one copies using the full register width. for (unsigned i = 1; i < NumRegs; i++) { // Load one integer register's worth from the stack slot. - SDValue Load = - DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo()); + SDValue Load = DAG.getLoad( + RegVT, dl, Store, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), @@ -3698,8 +3682,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, 8 * (StoredBytes - Offset)); // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, - MachinePointerInfo(), MemVT); + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT); Stores.push_back( DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp index aa75f5e2caa23..5fb6afee88a84 100644 --- a/lib/CodeGen/ShrinkWrap.cpp +++ b/lib/CodeGen/ShrinkWrap.cpp @@ -1,4 +1,4 @@ -//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===// +//===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===// // // The LLVM Compiler Infrastructure // @@ -45,50 +45,58 @@ // // If this pass found points matching all these properties, then // MachineFrameInfo is updated with this information. +// //===----------------------------------------------------------------------===// + #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -// To check for profitability. +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -// For property #1 for Save. #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -// To record the result of the analysis. #include "llvm/CodeGen/MachineFrameInfo.h" -// For property #2. +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" -// For property #1 for Restore. +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" -#include "llvm/CodeGen/Passes.h" -// To know about callee-saved. #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -// To query the target about frame lowering. +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" -// To know about frame setup operation. #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -// To access TargetInstrInfo. +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" - -#define DEBUG_TYPE "shrink-wrap" +#include +#include +#include using namespace llvm; +#define DEBUG_TYPE "shrink-wrap" + STATISTIC(NumFunc, "Number of functions"); STATISTIC(NumCandidates, "Number of shrink-wrapping candidates"); STATISTIC(NumCandidatesDropped, "Number of shrink-wrapping candidates dropped because of frequency"); static cl::opt - EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, - cl::desc("enable the shrink-wrapping pass")); +EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, + cl::desc("enable the shrink-wrapping pass")); namespace { + /// \brief Class to determine where the safe point to insert the /// prologue and epilogue are. /// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the @@ -101,31 +109,42 @@ class ShrinkWrap : public MachineFunctionPass { RegisterClassInfo RCI; MachineDominatorTree *MDT; MachinePostDominatorTree *MPDT; + /// Current safe point found for the prologue. /// The prologue will be inserted before the first instruction /// in this basic block. MachineBasicBlock *Save; + /// Current safe point found for the epilogue. /// The epilogue will be inserted before the first terminator instruction /// in this basic block. MachineBasicBlock *Restore; + /// Hold the information of the basic block frequency. /// Use to check the profitability of the new points. MachineBlockFrequencyInfo *MBFI; + /// Hold the loop information. Used to determine if Save and Restore /// are in the same loop. MachineLoopInfo *MLI; + /// Frequency of the Entry block. uint64_t EntryFreq; + /// Current opcode for frame setup. unsigned FrameSetupOpcode; + /// Current opcode for frame destroy. unsigned FrameDestroyOpcode; + /// Entry block. const MachineBasicBlock *Entry; - typedef SmallSetVector SetOfRegs; + + using SetOfRegs = SmallSetVector; + /// Registers that need to be saved for the current function. mutable SetOfRegs CurrentCSRs; + /// Current MachineFunction. MachineFunction *MachineFunc; @@ -205,9 +224,11 @@ class ShrinkWrap : public MachineFunctionPass { /// the MachineFrameInfo attached to \p MF with the results. bool runOnMachineFunction(MachineFunction &MF) override; }; -} // End anonymous namespace. + +} // end anonymous namespace char ShrinkWrap::ID = 0; + char &llvm::ShrinkWrapID = ShrinkWrap::ID; INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index e5fc5402cb41b..b9ddd96d4046b 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -1,4 +1,4 @@ -//===-- StackColoring.cpp -------------------------------------------------===// +//===- StackColoring.cpp --------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -22,35 +22,44 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/WinEHFuncInfo.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetOpcodes.h" +#include +#include +#include +#include +#include using namespace llvm; @@ -366,6 +375,7 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // namespace { + /// StackColoring - A machine pass for merging disjoint stack allocations, /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. class StackColoring : public MachineFunctionPass { @@ -378,32 +388,40 @@ class StackColoring : public MachineFunctionPass { struct BlockLifetimeInfo { /// Which slots BEGINs in each basic block. BitVector Begin; + /// Which slots ENDs in each basic block. BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. BitVector LiveOut; }; /// Maps active slots (per bit) for each basic block. - typedef DenseMap LivenessMap; + using LivenessMap = DenseMap; LivenessMap BlockLiveness; /// Maps serial numbers to basic blocks. - DenseMap BasicBlocks; + DenseMap BasicBlocks; + /// Maps basic blocks to a serial number. - SmallVector BasicBlockNumbering; + SmallVector BasicBlockNumbering; /// Maps slots to their use interval. Outside of this interval, slots /// values are either dead or `undef` and they will not be written to. SmallVector, 16> Intervals; + /// Maps slots to the points where they can become in-use. SmallVector, 16> LiveStarts; + /// VNInfo is used for the construction of LiveIntervals. VNInfo::Allocator VNInfoAllocator; + /// SlotIndex analysis object. SlotIndexes *Indexes; + /// The stack protector object. StackProtector *SP; @@ -424,13 +442,18 @@ class StackColoring : public MachineFunctionPass { public: static char ID; + StackColoring() : MachineFunctionPass(ID) { initializeStackColoringPass(*PassRegistry::getPassRegistry()); } + void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnMachineFunction(MachineFunction &MF) override; private: + /// Used in collectMarkers + using BlockBitVecMap = DenseMap; + /// Debug. void dump() const; void dumpIntervals() const; @@ -489,13 +512,12 @@ class StackColoring : public MachineFunctionPass { /// Map entries which point to other entries to their destination. /// A->B->C becomes A->C. void expungeSlotMap(DenseMap &SlotRemap, unsigned NumSlots); - - /// Used in collectMarkers - typedef DenseMap BlockBitVecMap; }; + } // end anonymous namespace char StackColoring::ID = 0; + char &llvm::StackColoringID = StackColoring::ID; INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, @@ -559,16 +581,13 @@ static inline int getStartOrEndSlot(const MachineInstr &MI) return -1; } -// // At the moment the only way to end a variable lifetime is with // a VARIABLE_LIFETIME op (which can't contain a start). If things // change and the IR allows for a single inst that both begins // and ends lifetime(s), this interface will need to be reworked. -// bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, SmallVector &slots, - bool &isStart) -{ + bool &isStart) { if (MI.getOpcode() == TargetOpcode::LIFETIME_START || MI.getOpcode() == TargetOpcode::LIFETIME_END) { int Slot = getStartOrEndSlot(MI); @@ -608,8 +627,7 @@ bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI, return false; } -unsigned StackColoring::collectMarkers(unsigned NumSlot) -{ +unsigned StackColoring::collectMarkers(unsigned NumSlot) { unsigned MarkersFound = 0; BlockBitVecMap SeenStartMap; InterestingSlots.clear(); @@ -624,7 +642,6 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) // Step 1: collect markers and populate the "InterestingSlots" // and "ConservativeSlots" sets. for (MachineBasicBlock *MBB : depth_first(MF)) { - // Compute the set of slots for which we've seen a START marker but have // not yet seen an END marker at this point in the walk (e.g. on entry // to this bb). @@ -697,7 +714,6 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) // NOTE: We use a depth-first iteration to ensure that we obtain a // deterministic numbering. for (MachineBasicBlock *MBB : depth_first(MF)) { - // Assign a serial number to this basic block. BasicBlocks[MBB] = BasicBlockNumbering.size(); BasicBlockNumbering.push_back(MBB); @@ -745,8 +761,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) return MarkersFound; } -void StackColoring::calculateLocalLiveness() -{ +void StackColoring::calculateLocalLiveness() { unsigned NumIters = 0; bool changed = true; while (changed) { @@ -754,7 +769,6 @@ void StackColoring::calculateLocalLiveness() ++NumIters; for (const MachineBasicBlock *BB : BasicBlockNumbering) { - // Use an iterator to avoid repeated lookups. LivenessMap::iterator BI = BlockLiveness.find(BB); assert(BI != BlockLiveness.end() && "Block not found"); @@ -792,7 +806,7 @@ void StackColoring::calculateLocalLiveness() BlockInfo.LiveOut |= LocalLiveOut; } } - }// while changed. + } // while changed. NumIterations = NumIters; } @@ -818,7 +832,6 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { // Create the interval for the basic blocks containing lifetime begin/end. for (const MachineInstr &MI : MBB) { - SmallVector slots; bool IsStart = false; if (!isLifetimeStartOrEnd(MI, slots, IsStart)) @@ -1047,7 +1060,7 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo()) for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap) for (WinEHHandlerType &H : TBME.HandlerArray) - if (H.CatchObj.FrameIndex != INT_MAX && + if (H.CatchObj.FrameIndex != std::numeric_limits::max() && SlotRemap.count(H.CatchObj.FrameIndex)) H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex]; @@ -1231,7 +1244,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { LiveInterval *Second = &*Intervals[SecondSlot]; auto &FirstS = LiveStarts[FirstSlot]; auto &SecondS = LiveStarts[SecondSlot]; - assert (!First->empty() && !Second->empty() && "Found an empty range"); + assert(!First->empty() && !Second->empty() && "Found an empty range"); // Merge disjoint slots. This is a little bit tricky - see the // Implementation Notes section for an explanation. diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index d8e7840a25763..ae3d49c5e23ac 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -18,7 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/EHPersonalities.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -247,10 +247,12 @@ bool StackProtector::RequiresStackProtector() { OptimizationRemarkEmitter ORE(F); if (F->hasFnAttribute(Attribute::StackProtectReq)) { - ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F) + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F) << "Stack protection applied to function " << ore::NV("Function", F) - << " due to a function attribute or command-line switch"); + << " due to a function attribute or command-line switch"; + }); NeedsProtector = true; Strong = true; // Use the same heuristic as strong to determine SSPLayout } else if (F->hasFnAttribute(Attribute::StackProtectStrong)) @@ -264,29 +266,31 @@ bool StackProtector::RequiresStackProtector() { for (const Instruction &I : BB) { if (const AllocaInst *AI = dyn_cast(&I)) { if (AI->isArrayAllocation()) { - OptimizationRemark Remark(DEBUG_TYPE, "StackProtectorAllocaOrArray", - &I); - Remark - << "Stack protection applied to function " - << ore::NV("Function", F) - << " due to a call to alloca or use of a variable length array"; + auto RemarkBuilder = [&]() { + return OptimizationRemark(DEBUG_TYPE, "StackProtectorAllocaOrArray", + &I) + << "Stack protection applied to function " + << ore::NV("Function", F) + << " due to a call to alloca or use of a variable length " + "array"; + }; if (const auto *CI = dyn_cast(AI->getArraySize())) { if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { // A call to alloca with size >= SSPBufferSize requires // stack protectors. Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); - ORE.emit(Remark); + ORE.emit(RemarkBuilder); NeedsProtector = true; } else if (Strong) { // Require protectors for all alloca calls in strong mode. Layout.insert(std::make_pair(AI, SSPLK_SmallArray)); - ORE.emit(Remark); + ORE.emit(RemarkBuilder); NeedsProtector = true; } } else { // A call to alloca with a variable size requires protectors. Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); - ORE.emit(Remark); + ORE.emit(RemarkBuilder); NeedsProtector = true; } continue; @@ -296,11 +300,13 @@ bool StackProtector::RequiresStackProtector() { if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) { Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray : SSPLK_SmallArray)); - ORE.emit(OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I) + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I) << "Stack protection applied to function " << ore::NV("Function", F) << " due to a stack allocated buffer or struct containing a " - "buffer"); + "buffer"; + }); NeedsProtector = true; continue; } @@ -308,11 +314,13 @@ bool StackProtector::RequiresStackProtector() { if (Strong && HasAddressTaken(AI)) { ++NumAddrTaken; Layout.insert(std::make_pair(AI, SSPLK_AddrOf)); - ORE.emit( - OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken", &I) - << "Stack protection applied to function " - << ore::NV("Function", F) - << " due to the address of a local variable being taken"); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken", + &I) + << "Stack protection applied to function " + << ore::NV("Function", F) + << " due to the address of a local variable being taken"; + }); NeedsProtector = true; } } diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index b1edf02302214..bac12efd6395d 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -191,7 +191,7 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI, MachineInstr *CommutedMI = nullptr; if (NewMI) { // Create a new instruction. - MachineFunction &MF = *MI.getParent()->getParent(); + MachineFunction &MF = *MI.getMF(); CommutedMI = MF.CloneMachineInstr(&MI); } else { CommutedMI = &MI; @@ -438,7 +438,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && "Cannot fold physregs"); - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) @@ -518,21 +518,13 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI, return NewMI; } -/// foldMemoryOperand - Attempt to fold a load or store of the specified stack -/// slot into the specified machine instruction for the specified operand(s). -/// If this is possible, a new instruction is returned with the specified -/// operand folded, otherwise NULL is returned. The client is responsible for -/// removing the old instruction and adding the new one in the instruction -/// stream. MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, LiveIntervals *LIS) const { auto Flags = MachineMemOperand::MONone; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (MI.getOperand(Ops[i]).isDef()) - Flags |= MachineMemOperand::MOStore; - else - Flags |= MachineMemOperand::MOLoad; + for (unsigned OpIdx : Ops) + Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore + : MachineMemOperand::MOLoad; MachineBasicBlock *MBB = MI.getParent(); assert(MBB && "foldMemoryOperand needs an inserted instruction"); @@ -548,10 +540,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, if (Flags & MachineMemOperand::MOStore) { MemSize = MFI.getObjectSize(FI); } else { - for (unsigned Idx : Ops) { + for (unsigned OpIdx : Ops) { int64_t OpSize = MFI.getObjectSize(FI); - if (auto SubReg = MI.getOperand(Idx).getSubReg()) { + if (auto SubReg = MI.getOperand(OpIdx).getSubReg()) { unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg); if (SubRegSize > 0 && !(SubRegSize % 8)) OpSize = SubRegSize / 8; @@ -613,6 +605,54 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, return &*--Pos; } +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, + ArrayRef Ops, + MachineInstr &LoadMI, + LiveIntervals *LIS) const { + assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!"); +#ifndef NDEBUG + for (unsigned OpIdx : Ops) + assert(MI.getOperand(OpIdx).isUse() && "Folding load into def!"); +#endif + + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + + // Ask the target to do the actual folding. + MachineInstr *NewMI = nullptr; + int FrameIndex = 0; + + if ((MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::PATCHPOINT || + MI.getOpcode() == TargetOpcode::STATEPOINT) && + isLoadFromStackSlot(LoadMI, FrameIndex)) { + // Fold stackmap/patchpoint. + NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); + if (NewMI) + NewMI = &*MBB.insert(MI, NewMI); + } else { + // Ask the target to do the actual folding. + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS); + } + + if (!NewMI) + return nullptr; + + // Copy the memoperands from the load to the folded instruction. + if (MI.memoperands_empty()) { + NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end()); + } else { + // Handle the rare case of folding multiple loads. + NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(), + E = LoadMI.memoperands_end(); + I != E; ++I) { + NewMI->addMemOperand(MF, *I); + } + } + return NewMI; +} + bool TargetInstrInfo::hasReassociableOperands( const MachineInstr &Inst, const MachineBasicBlock *MBB) const { const MachineOperand &Op1 = Inst.getOperand(1); @@ -708,11 +748,13 @@ bool TargetInstrInfo::getMachineCombinerPatterns( return false; } + /// Return true when a code sequence can improve loop throughput. bool TargetInstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { return false; } + /// Attempt the reassociation transformation to reduce critical path length. /// See the above comments before getMachineCombinerPatterns(). void TargetInstrInfo::reassociateOps( @@ -721,7 +763,7 @@ void TargetInstrInfo::reassociateOps( SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg) const { - MachineFunction *MF = Root.getParent()->getParent(); + MachineFunction *MF = Root.getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); @@ -804,7 +846,7 @@ void TargetInstrInfo::genAlternativeCodeSequence( SmallVectorImpl &InsInstrs, SmallVectorImpl &DelInstrs, DenseMap &InstIdxForVirtReg) const { - MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); + MachineRegisterInfo &MRI = Root.getMF()->getRegInfo(); // Select the previous instruction in the sequence based on the input pattern. MachineInstr *Prev = nullptr; @@ -826,59 +868,9 @@ void TargetInstrInfo::genAlternativeCodeSequence( reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); } -/// foldMemoryOperand - Same as the previous version except it allows folding -/// of any load and store from / to any address, not just from a specific -/// stack slot. -MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, - ArrayRef Ops, - MachineInstr &LoadMI, - LiveIntervals *LIS) const { - assert(LoadMI.canFoldAsLoad() && "LoadMI isn't foldable!"); -#ifndef NDEBUG - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - assert(MI.getOperand(Ops[i]).isUse() && "Folding load into def!"); -#endif - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - - // Ask the target to do the actual folding. - MachineInstr *NewMI = nullptr; - int FrameIndex = 0; - - if ((MI.getOpcode() == TargetOpcode::STACKMAP || - MI.getOpcode() == TargetOpcode::PATCHPOINT || - MI.getOpcode() == TargetOpcode::STATEPOINT) && - isLoadFromStackSlot(LoadMI, FrameIndex)) { - // Fold stackmap/patchpoint. - NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); - if (NewMI) - NewMI = &*MBB.insert(MI, NewMI); - } else { - // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS); - } - - if (!NewMI) return nullptr; - - // Copy the memoperands from the load to the folded instruction. - if (MI.memoperands_empty()) { - NewMI->setMemRefs(LoadMI.memoperands_begin(), LoadMI.memoperands_end()); - } - else { - // Handle the rare case of folding multiple loads. - NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - for (MachineInstr::mmo_iterator I = LoadMI.memoperands_begin(), - E = LoadMI.memoperands_end(); - I != E; ++I) { - NewMI->addMemOperand(MF, *I); - } - } - return NewMI; -} - bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( const MachineInstr &MI, AliasAnalysis *AA) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); // Remat clients assume operand 0 is the defined register. @@ -956,7 +948,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( } int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const { - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); bool StackGrowsDown = TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index ea655e1faacf6..ec971e147ebd7 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -866,7 +866,7 @@ MachineBasicBlock * TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, MachineBasicBlock *MBB) const { MachineInstr *MI = &InitialMI; - MachineFunction &MF = *MI->getParent()->getParent(); + MachineFunction &MF = *MI->getMF(); MachineFrameInfo &MFI = MF.getFrameInfo(); // We're handling multiple types of operands here: diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 0149c82a00e8f..e45cdee43680a 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -168,8 +168,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( MMI, Streamer); } -static SectionKind -getELFKindForNamedSection(StringRef Name, SectionKind K) { +static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { // N.B.: The defaults used in here are no the same ones used in MC. // We follow gcc, MC follows gas. For example, given ".section .eh_frame", // both gas and MC will produce a section with no flags. Given @@ -1249,7 +1248,7 @@ static const Comdat *getWasmComdat(const GlobalValue *GV) { MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { StringRef Name = GO->getSection(); - return getContext().getWasmSection(Name, wasm::WASM_SEC_DATA); + return getContext().getWasmSection(Name, SectionKind::getData()); } static MCSectionWasm *selectWasmSectionForGlobal( @@ -1262,12 +1261,10 @@ static MCSectionWasm *selectWasmSectionForGlobal( bool UniqueSectionNames = TM.getUniqueSectionNames(); SmallString<128> Name = getSectionPrefixForGlobal(Kind); - uint32_t Type = wasm::WASM_SEC_DATA; if (const auto *F = dyn_cast(GO)) { const auto &OptionalPrefix = F->getSectionPrefix(); if (OptionalPrefix) Name += *OptionalPrefix; - Type = wasm::WASM_SEC_CODE; } if (EmitUniqueSection && UniqueSectionNames) { @@ -1279,7 +1276,7 @@ static MCSectionWasm *selectWasmSectionForGlobal( UniqueID = *NextUniqueID; (*NextUniqueID)++; } - return Ctx.getWasmSection(Name, Type, Group, UniqueID); + return Ctx.getWasmSection(Name, Kind, Group, UniqueID); } MCSection *TargetLoweringObjectFileWasm::SelectSectionForGlobal( @@ -1328,7 +1325,9 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference( MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext()); } -void -TargetLoweringObjectFileWasm::InitializeWasm() { - // TODO: Initialize StaticCtorSection and StaticDtorSection. +void TargetLoweringObjectFileWasm::InitializeWasm() { + StaticCtorSection = + getContext().getWasmSection(".init_array", SectionKind::getData()); + StaticDtorSection = + getContext().getWasmSection(".fini_array", SectionKind::getData()); } diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp index 4584f65619cc5..c5101b1ecfc22 100644 --- a/lib/CodeGen/TargetPassConfig.cpp +++ b/lib/CodeGen/TargetPassConfig.cpp @@ -111,6 +111,11 @@ static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, static cl::opt EnableMachineOutliner("enable-machine-outliner", cl::Hidden, cl::desc("Enable machine outliner")); +static cl::opt EnableLinkOnceODROutlining( + "enable-linkonceodr-outlining", + cl::Hidden, + cl::desc("Enable the machine outliner on linkonceodr functions"), + cl::init(false)); // Enable or disable FastISel. Both options are needed, because // FastISel is enabled by default with -fast, and we wish to be // able to enable or disable fast-isel independently from -O0. @@ -891,7 +896,7 @@ void TargetPassConfig::addMachinePasses() { addPass(&PatchableFunctionID, false); if (EnableMachineOutliner) - PM->add(createMachineOutlinerPass()); + PM->add(createMachineOutlinerPass(EnableLinkOnceODROutlining)); AddingMachinePasses = false; } diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index e1f7edc627b26..e1db9157f9016 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -316,7 +316,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, // correctly append imp-use operands, and readsReg() strangely returns false // for predicated defs. unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); - const MachineFunction &MF = *DefMI->getParent()->getParent(); + const MachineFunction &MF = *DefMI->getMF(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI)) return computeInstrLatency(DefMI); diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp index 859fac3cd63aa..29cfd9fb1786d 100644 --- a/lib/CodeGen/TargetSubtargetInfo.cpp +++ b/lib/CodeGen/TargetSubtargetInfo.cpp @@ -51,6 +51,10 @@ bool TargetSubtargetInfo::enableRALocalReassignment( return true; } +bool TargetSubtargetInfo::enableAdvancedRASplitCost() const { + return false; +} + bool TargetSubtargetInfo::enablePostRAScheduler() const { return getSchedModel().PostRAScheduler; } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 83c00e24d14fc..efd40b209e9f7 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1,4 +1,4 @@ -//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===// +//===- TwoAddressInstructionPass.cpp - Two-Address instruction pass -------===// // // The LLVM Compiler Infrastructure // @@ -28,27 +28,40 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Pass.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include using namespace llvm; @@ -76,6 +89,7 @@ static cl::opt MaxDataFlowEdge( "the benefit of commuting operands")); namespace { + class TwoAddressInstructionPass : public MachineFunctionPass { MachineFunction *MF; const TargetInstrInfo *TII; @@ -148,14 +162,16 @@ class TwoAddressInstructionPass : public MachineFunctionPass { void processCopy(MachineInstr *MI); - typedef SmallVector, 4> TiedPairList; - typedef SmallDenseMap TiedOperandMap; + using TiedPairList = SmallVector, 4>; + using TiedOperandMap = SmallDenseMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); void eliminateRegSequence(MachineBasicBlock::iterator&); public: static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass(ID) { initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); } @@ -175,17 +191,19 @@ class TwoAddressInstructionPass : public MachineFunctionPass { /// Pass entry point. bool runOnMachineFunction(MachineFunction&) override; }; + } // end anonymous namespace char TwoAddressInstructionPass::ID = 0; + +char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; + INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) -char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; - static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); /// A two-address instruction has been converted to a three-address instruction @@ -267,7 +285,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, ++KillPos; unsigned NumVisited = 0; - for (MachineInstr &OtherMI : llvm::make_range(std::next(OldPos), KillPos)) { + for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) { // DBG_VALUE cannot be counted against the limit. if (OtherMI.isDebugValue()) continue; @@ -452,7 +470,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg, LiveIntervals *LIS, bool allowFalsePositives) { MachineInstr *DefMI = &MI; - for (;;) { + while (true) { // All uses of physical registers are likely to be kills. if (TargetRegisterInfo::isPhysicalRegister(Reg) && (allowFalsePositives || MRI->hasOneUse(Reg))) @@ -904,7 +922,6 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Move the copies connected to MI down as well. MachineBasicBlock::iterator Begin = MI; MachineBasicBlock::iterator AfterMI = std::next(Begin); - MachineBasicBlock::iterator End = AfterMI; while (End->isCopy() && regOverlapsSet(Defs, End->getOperand(1).getReg(), TRI)) { @@ -916,7 +933,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, unsigned NumVisited = 0; MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; - for (MachineInstr &OtherMI : llvm::make_range(End, KillPos)) { + for (MachineInstr &OtherMI : make_range(End, KillPos)) { // DBG_VALUE cannot be counted against the limit. if (OtherMI.isDebugValue()) continue; @@ -1090,7 +1107,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Check if the reschedule will not break depedencies. unsigned NumVisited = 0; for (MachineInstr &OtherMI : - llvm::make_range(mi, MachineBasicBlock::iterator(KillMI))) { + make_range(mi, MachineBasicBlock::iterator(KillMI))) { // DBG_VALUE cannot be counted against the limit. if (OtherMI.isDebugValue()) continue; @@ -1609,7 +1626,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, if (I->end == UseIdx) LI.removeSegment(LastCopyIdx, UseIdx); } - } else if (RemovedKillFlag) { // Some tied uses of regB matched their destination registers, so // regB is still used in this instruction, but a kill flag was @@ -1690,7 +1706,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. if (TiedOperands.size() == 1) { - SmallVectorImpl > &TiedPairs + SmallVectorImpl> &TiedPairs = TiedOperands.begin()->second; if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; @@ -1751,7 +1767,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { /// /// %dst:ssub0 = COPY %v1 /// %dst:ssub1 = COPY %v2 -/// void TwoAddressInstructionPass:: eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 407fd9b162e97..bdd25f29aea41 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -207,11 +207,12 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { MachineRegisterInfo &MRI = F.getRegInfo(); unsigned InputSub = Input.getSubReg(); if (InputSub == 0 && - MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg))) { + MRI.constrainRegClass(InputReg, MRI.getRegClass(OutputReg)) && + !Input.isUndef()) { MRI.replaceRegWith(OutputReg, InputReg); } else { // The input register to the PHI has a subregister or it can't be - // constrained to the proper register class: + // constrained to the proper register class or it is undef: // insert a COPY instead of simply replacing the output // with the input. const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo(); diff --git a/lib/DebugInfo/CodeView/EnumTables.cpp b/lib/DebugInfo/CodeView/EnumTables.cpp index e58d2f8a1d3be..d8301cab1657c 100644 --- a/lib/DebugInfo/CodeView/EnumTables.cpp +++ b/lib/DebugInfo/CodeView/EnumTables.cpp @@ -33,55 +33,9 @@ static const EnumEntry TypeLeafNames[] = { }; static const EnumEntry RegisterNames[] = { - CV_ENUM_CLASS_ENT(RegisterId, Unknown), - CV_ENUM_CLASS_ENT(RegisterId, VFrame), - CV_ENUM_CLASS_ENT(RegisterId, AL), - CV_ENUM_CLASS_ENT(RegisterId, CL), - CV_ENUM_CLASS_ENT(RegisterId, DL), - CV_ENUM_CLASS_ENT(RegisterId, BL), - CV_ENUM_CLASS_ENT(RegisterId, AH), - CV_ENUM_CLASS_ENT(RegisterId, CH), - CV_ENUM_CLASS_ENT(RegisterId, DH), - CV_ENUM_CLASS_ENT(RegisterId, BH), - CV_ENUM_CLASS_ENT(RegisterId, AX), - CV_ENUM_CLASS_ENT(RegisterId, CX), - CV_ENUM_CLASS_ENT(RegisterId, DX), - CV_ENUM_CLASS_ENT(RegisterId, BX), - CV_ENUM_CLASS_ENT(RegisterId, SP), - CV_ENUM_CLASS_ENT(RegisterId, BP), - CV_ENUM_CLASS_ENT(RegisterId, SI), - CV_ENUM_CLASS_ENT(RegisterId, DI), - CV_ENUM_CLASS_ENT(RegisterId, EAX), - CV_ENUM_CLASS_ENT(RegisterId, ECX), - CV_ENUM_CLASS_ENT(RegisterId, EDX), - CV_ENUM_CLASS_ENT(RegisterId, EBX), - CV_ENUM_CLASS_ENT(RegisterId, ESP), - CV_ENUM_CLASS_ENT(RegisterId, EBP), - CV_ENUM_CLASS_ENT(RegisterId, ESI), - CV_ENUM_CLASS_ENT(RegisterId, EDI), - CV_ENUM_CLASS_ENT(RegisterId, ES), - CV_ENUM_CLASS_ENT(RegisterId, CS), - CV_ENUM_CLASS_ENT(RegisterId, SS), - CV_ENUM_CLASS_ENT(RegisterId, DS), - CV_ENUM_CLASS_ENT(RegisterId, FS), - CV_ENUM_CLASS_ENT(RegisterId, GS), - CV_ENUM_CLASS_ENT(RegisterId, IP), - CV_ENUM_CLASS_ENT(RegisterId, RAX), - CV_ENUM_CLASS_ENT(RegisterId, RBX), - CV_ENUM_CLASS_ENT(RegisterId, RCX), - CV_ENUM_CLASS_ENT(RegisterId, RDX), - CV_ENUM_CLASS_ENT(RegisterId, RSI), - CV_ENUM_CLASS_ENT(RegisterId, RDI), - CV_ENUM_CLASS_ENT(RegisterId, RBP), - CV_ENUM_CLASS_ENT(RegisterId, RSP), - CV_ENUM_CLASS_ENT(RegisterId, R8), - CV_ENUM_CLASS_ENT(RegisterId, R9), - CV_ENUM_CLASS_ENT(RegisterId, R10), - CV_ENUM_CLASS_ENT(RegisterId, R11), - CV_ENUM_CLASS_ENT(RegisterId, R12), - CV_ENUM_CLASS_ENT(RegisterId, R13), - CV_ENUM_CLASS_ENT(RegisterId, R14), - CV_ENUM_CLASS_ENT(RegisterId, R15), +#define CV_REGISTER(name, val) CV_ENUM_CLASS_ENT(RegisterId, name), +#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def" +#undef CV_REGISTER }; static const EnumEntry PublicSymFlagNames[] = { diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index 62e73acc72d6d..e64404be6dc0d 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -317,7 +317,8 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeRegisterRelSym &DefRangeRegisterRel) { - W.printNumber("BaseRegister", DefRangeRegisterRel.Hdr.Register); + W.printEnum("BaseRegister", uint16_t(DefRangeRegisterRel.Hdr.Register), + getRegisterNames()); W.printBoolean("HasSpilledUDTMember", DefRangeRegisterRel.hasSpilledUDTMember()); W.printNumber("OffsetInParent", DefRangeRegisterRel.offsetInParent()); @@ -330,7 +331,8 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) { - W.printNumber("Register", DefRangeRegister.Hdr.Register); + W.printEnum("Register", uint16_t(DefRangeRegister.Hdr.Register), + getRegisterNames()); W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName); printLocalVariableAddrRange(DefRangeRegister.Range, DefRangeRegister.getRelocationOffset()); @@ -340,7 +342,8 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) { - W.printNumber("Register", DefRangeSubfieldRegister.Hdr.Register); + W.printEnum("Register", uint16_t(DefRangeSubfieldRegister.Hdr.Register), + getRegisterNames()); W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName); W.printNumber("OffsetInParent", DefRangeSubfieldRegister.Hdr.OffsetInParent); printLocalVariableAddrRange(DefRangeSubfieldRegister.Range, @@ -393,7 +396,7 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FrameCookie.getRelocationOffset(), FrameCookie.CodeOffset, &LinkageName); } - W.printHex("Register", FrameCookie.Register); + W.printEnum("Register", uint16_t(FrameCookie.Register), getRegisterNames()); W.printEnum("CookieKind", uint16_t(FrameCookie.CookieKind), getFrameCookieKindNames()); W.printHex("Flags", FrameCookie.Flags); diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp index 650f1942b94e7..c23fadc230482 100644 --- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp +++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -404,6 +404,7 @@ static bool discoverTypeIndices(ArrayRef Content, SymbolKind Kind, break; case SymbolKind::S_CALLERS: case SymbolKind::S_CALLEES: + case SymbolKind::S_INLINEES: // The record is a count followed by an array of type indices. Count = *reinterpret_cast(Content.data()); Refs.push_back({TiRefKind::IndexRef, 4, Count}); // Callees @@ -412,8 +413,7 @@ static bool discoverTypeIndices(ArrayRef Content, SymbolKind Kind, Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee break; case SymbolKind::S_HEAPALLOCSITE: - // FIXME: It's not clear if this is a type or item reference. - Refs.push_back({TiRefKind::IndexRef, 8, 1}); // signature + Refs.push_back({TiRefKind::TypeRef, 8, 1}); // UDT allocated break; // Defranges don't have types, just registers and code offsets. @@ -434,6 +434,8 @@ static bool discoverTypeIndices(ArrayRef Content, SymbolKind Kind, case SymbolKind::S_ENVBLOCK: case SymbolKind::S_BLOCK32: case SymbolKind::S_FRAMEPROC: + case SymbolKind::S_THUNK32: + case SymbolKind::S_FRAMECOOKIE: break; // Scope ending symbols. case SymbolKind::S_END: diff --git a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index f17b00e5605b5..dbe6fe52407dd 100644 --- a/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -12,7 +12,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Format.h" @@ -52,6 +51,7 @@ bool DWARFAcceleratorTable::extract() { HdrData.Atoms.push_back(std::make_pair(AtomType, AtomForm)); } + IsValid = true; return true; } @@ -109,6 +109,9 @@ DWARFAcceleratorTable::readAtoms(uint32_t &HashDataOffset) { } LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { + if (!IsValid) + return; + // Dump the header. OS << "Magic = " << format("0x%08x", Hdr.Magic) << '\n' << "Version = " << format("0x%04x", Hdr.Version) << '\n' @@ -190,3 +193,67 @@ LLVM_DUMP_METHOD void DWARFAcceleratorTable::dump(raw_ostream &OS) const { } } } + +DWARFAcceleratorTable::ValueIterator::ValueIterator( + const DWARFAcceleratorTable &AccelTable, unsigned Offset) + : AccelTable(&AccelTable), DataOffset(Offset) { + if (!AccelTable.AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) + return; + + for (const auto &Atom : AccelTable.HdrData.Atoms) + AtomForms.push_back(DWARFFormValue(Atom.second)); + + // Read the first entry. + NumData = AccelTable.AccelSection.getU32(&DataOffset); + Next(); +} + +void DWARFAcceleratorTable::ValueIterator::Next() { + assert(NumData > 0 && "attempted to increment iterator past the end"); + auto &AccelSection = AccelTable->AccelSection; + if (Data >= NumData || + !AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) { + NumData = 0; + return; + } + for (auto &Atom : AtomForms) + Atom.extractValue(AccelSection, &DataOffset, nullptr); + ++Data; +} + +iterator_range +DWARFAcceleratorTable::equal_range(StringRef Key) const { + if (!IsValid) + return make_range(ValueIterator(), ValueIterator()); + + // Find the bucket. + unsigned HashValue = dwarf::djbHash(Key); + unsigned Bucket = HashValue % Hdr.NumBuckets; + unsigned BucketBase = sizeof(Hdr) + Hdr.HeaderDataLength; + unsigned HashesBase = BucketBase + Hdr.NumBuckets * 4; + unsigned OffsetsBase = HashesBase + Hdr.NumHashes * 4; + + unsigned BucketOffset = BucketBase + Bucket * 4; + unsigned Index = AccelSection.getU32(&BucketOffset); + + // Search through all hashes in the bucket. + for (unsigned HashIdx = Index; HashIdx < Hdr.NumHashes; ++HashIdx) { + unsigned HashOffset = HashesBase + HashIdx * 4; + unsigned OffsetsOffset = OffsetsBase + HashIdx * 4; + uint32_t Hash = AccelSection.getU32(&HashOffset); + + if (Hash % Hdr.NumBuckets != Bucket) + // We are already in the next bucket. + break; + + unsigned DataOffset = AccelSection.getU32(&OffsetsOffset); + unsigned StringOffset = AccelSection.getRelocatedValue(4, &DataOffset); + if (!StringOffset) + break; + + // Finally, compare the key. + if (Key == StringSection.getCStr(&StringOffset)) + return make_range({*this, DataOffset}, ValueIterator()); + } + return make_range(ValueIterator(), ValueIterator()); +} diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index d82a03e4fed47..24aa666fb81f1 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -48,7 +48,6 @@ #include #include #include -#include #include #include @@ -68,17 +67,6 @@ DWARFContext::DWARFContext(std::unique_ptr DObj, DWARFContext::~DWARFContext() = default; -static void dumpAccelSection(raw_ostream &OS, const DWARFObject &Obj, - const DWARFSection &Section, - StringRef StringSection, bool LittleEndian) { - DWARFDataExtractor AccelSection(Obj, Section, LittleEndian, 0); - DataExtractor StrData(StringSection, LittleEndian, 0); - DWARFAcceleratorTable Accel(AccelSection, StrData); - if (!Accel.extract()) - return; - Accel.dump(OS); -} - /// Dump the UUID load command. static void dumpUUID(raw_ostream &OS, const ObjectFile &Obj) { auto *MachO = dyn_cast(&Obj); @@ -453,23 +441,19 @@ void DWARFContext::dump( if (shouldDump(Explicit, ".apple_names", DIDT_ID_AppleNames, DObj->getAppleNamesSection().Data)) - dumpAccelSection(OS, *DObj, DObj->getAppleNamesSection(), - DObj->getStringSection(), isLittleEndian()); + getAppleNames().dump(OS); if (shouldDump(Explicit, ".apple_types", DIDT_ID_AppleTypes, DObj->getAppleTypesSection().Data)) - dumpAccelSection(OS, *DObj, DObj->getAppleTypesSection(), - DObj->getStringSection(), isLittleEndian()); + getAppleTypes().dump(OS); if (shouldDump(Explicit, ".apple_namespaces", DIDT_ID_AppleNamespaces, DObj->getAppleNamespacesSection().Data)) - dumpAccelSection(OS, *DObj, DObj->getAppleNamespacesSection(), - DObj->getStringSection(), isLittleEndian()); + getAppleNamespaces().dump(OS); if (shouldDump(Explicit, ".apple_objc", DIDT_ID_AppleObjC, DObj->getAppleObjCSection().Data)) - dumpAccelSection(OS, *DObj, DObj->getAppleObjCSection(), - DObj->getStringSection(), isLittleEndian()); + getAppleObjC().dump(OS); } DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) { @@ -638,6 +622,40 @@ const DWARFDebugMacro *DWARFContext::getDebugMacro() { return Macro.get(); } +static DWARFAcceleratorTable & +getAccelTable(std::unique_ptr &Cache, + const DWARFObject &Obj, const DWARFSection &Section, + StringRef StringSection, bool IsLittleEndian) { + if (Cache) + return *Cache; + DWARFDataExtractor AccelSection(Obj, Section, IsLittleEndian, 0); + DataExtractor StrData(StringSection, IsLittleEndian, 0); + Cache.reset(new DWARFAcceleratorTable(AccelSection, StrData)); + Cache->extract(); + return *Cache; +} + +const DWARFAcceleratorTable &DWARFContext::getAppleNames() { + return getAccelTable(AppleNames, *DObj, DObj->getAppleNamesSection(), + DObj->getStringSection(), isLittleEndian()); +} + +const DWARFAcceleratorTable &DWARFContext::getAppleTypes() { + return getAccelTable(AppleTypes, *DObj, DObj->getAppleTypesSection(), + DObj->getStringSection(), isLittleEndian()); +} + +const DWARFAcceleratorTable &DWARFContext::getAppleNamespaces() { + return getAccelTable(AppleNamespaces, *DObj, + DObj->getAppleNamespacesSection(), + DObj->getStringSection(), isLittleEndian()); +} + +const DWARFAcceleratorTable &DWARFContext::getAppleObjC() { + return getAccelTable(AppleObjC, *DObj, DObj->getAppleObjCSection(), + DObj->getStringSection(), isLittleEndian()); +} + const DWARFLineTable * DWARFContext::getLineTableForUnit(DWARFUnit *U) { if (!Line) @@ -704,6 +722,35 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { return getCompileUnitForOffset(CUOffset); } +DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) { + DIEsForAddress Result; + + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + if (!CU) + return Result; + + Result.CompileUnit = CU; + Result.FunctionDIE = CU->getSubroutineForAddress(Address); + + std::vector Worklist; + Worklist.push_back(Result.FunctionDIE); + while (!Worklist.empty()) { + DWARFDie DIE = Worklist.back(); + Worklist.pop_back(); + + if (DIE.getTag() == DW_TAG_lexical_block && + DIE.addressRangeContainsAddress(Address)) { + Result.BlockDIE = DIE; + break; + } + + for (auto Child : DIE) + Worklist.push_back(Child); + } + + return Result; +} + static bool getFunctionNameAndStartLineForAddress(DWARFCompileUnit *CU, uint64_t Address, FunctionNameKind Kind, diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index bceb0162b3515..3312da67804b5 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -163,6 +163,7 @@ void FrameEntry::parseInstructions(DataExtractor Data, uint32_t *Offset, case DW_CFA_same_value: case DW_CFA_def_cfa_register: case DW_CFA_def_cfa_offset: + case DW_CFA_GNU_args_size: // Operands: ULEB128 addInstruction(Opcode, Data.getULEB128(Offset)); break; diff --git a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index 62bd5af4e6499..f0b7ec2751de0 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -17,6 +17,11 @@ using namespace llvm; +raw_ostream &llvm::operator<<(raw_ostream &OS, const DWARFAddressRange &R) { + return OS << format("[0x%16.16" PRIx64 ", 0x%16.16" PRIx64 ")", R.LowPC, + R.HighPC); +} + void DWARFDebugRangeList::clear() { Offset = -1U; AddressSize = 0; diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp index 31074a81a989b..d20eabff7f042 100644 --- a/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -124,6 +124,64 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue, } } +/// Dump the name encoded in the type tag. +static void dumpTypeTagName(raw_ostream &OS, dwarf::Tag T) { + StringRef TagStr = TagString(T); + if (!TagStr.startswith("DW_TAG_") || !TagStr.endswith("_type")) + return; + OS << TagStr.substr(7, TagStr.size() - 12) << " "; +} + +/// Recursively dump the DIE type name when applicable. +static void dumpTypeName(raw_ostream &OS, const DWARFDie &Die) { + DWARFDie D = Die.getAttributeValueAsReferencedDie(DW_AT_type); + + if (!D.isValid()) + return; + + if (const char *Name = D.getName(DINameKind::LinkageName)) { + OS << Name; + return; + } + + // FIXME: We should have pretty printers per language. Currently we print + // everything as if it was C++ and fall back to the TAG type name. + const dwarf::Tag T = D.getTag(); + switch (T) { + case DW_TAG_array_type: + case DW_TAG_pointer_type: + case DW_TAG_ptr_to_member_type: + case DW_TAG_reference_type: + case DW_TAG_rvalue_reference_type: + break; + default: + dumpTypeTagName(OS, T); + } + + // Follow the DW_AT_type if possible. + dumpTypeName(OS, D); + + switch (T) { + case DW_TAG_array_type: + OS << "[]"; + break; + case DW_TAG_pointer_type: + OS << '*'; + break; + case DW_TAG_ptr_to_member_type: + OS << '*'; + break; + case DW_TAG_reference_type: + OS << '&'; + break; + case DW_TAG_rvalue_reference_type: + OS << "&&"; + break; + default: + break; + } +} + static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, uint32_t *OffsetPtr, dwarf::Attribute Attr, dwarf::Form Form, unsigned Indent, @@ -132,14 +190,14 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, return; const char BaseIndent[] = " "; OS << BaseIndent; - OS.indent(Indent+2); + OS.indent(Indent + 2); auto attrString = AttributeString(Attr); if (!attrString.empty()) WithColor(OS, syntax::Attribute) << attrString; else WithColor(OS, syntax::Attribute).get() << format("DW_AT_Unknown_%x", Attr); - if (DumpOpts.Verbose) { + if (DumpOpts.Verbose || DumpOpts.ShowForm) { auto formString = FormEncodingString(Form); if (!formString.empty()) OS << " [" << formString << ']'; @@ -161,7 +219,10 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, if (Attr == DW_AT_decl_file || Attr == DW_AT_call_file) { Color = syntax::String; if (const auto *LT = U->getContext().getLineTableForUnit(U)) - if (LT->getFileNameByIndex(formValue.getAsUnsignedConstant().getValue(), U->getCompilationDir(), DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) { + if (LT->getFileNameByIndex( + formValue.getAsUnsignedConstant().getValue(), + U->getCompilationDir(), + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) { File = '"' + File + '"'; Name = File; } @@ -173,7 +234,8 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line) OS << *formValue.getAsUnsignedConstant(); else if (Attr == DW_AT_location || Attr == DW_AT_frame_base || - Attr == DW_AT_data_member_location) + Attr == DW_AT_data_member_location || + Attr == DW_AT_GNU_call_site_value) dumpLocation(OS, formValue, U, sizeof(BaseIndent) + Indent + 4, DumpOpts); else formValue.dump(OS, DumpOpts); @@ -182,8 +244,13 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, // having both the raw value and the pretty-printed value is // interesting. These attributes are handled below. if (Attr == DW_AT_specification || Attr == DW_AT_abstract_origin) { - if (const char *Name = Die.getAttributeValueAsReferencedDie(Attr).getName(DINameKind::LinkageName)) - OS << " \"" << Name << '\"'; + if (const char *Name = Die.getAttributeValueAsReferencedDie(Attr).getName( + DINameKind::LinkageName)) + OS << " \"" << Name << '\"'; + } else if (Attr == DW_AT_type) { + OS << " \""; + dumpTypeName(OS, Die); + OS << '"'; } else if (Attr == DW_AT_APPLE_property_attribute) { if (Optional OptVal = formValue.getAsUnsignedConstant()) dumpApplePropertyAttribute(OS, *OptVal); @@ -196,17 +263,14 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, OS << ")\n"; } -bool DWARFDie::isSubprogramDIE() const { - return getTag() == DW_TAG_subprogram; -} +bool DWARFDie::isSubprogramDIE() const { return getTag() == DW_TAG_subprogram; } bool DWARFDie::isSubroutineDIE() const { auto Tag = getTag(); return Tag == DW_TAG_subprogram || Tag == DW_TAG_inlined_subroutine; } -Optional -DWARFDie::find(dwarf::Attribute Attr) const { +Optional DWARFDie::find(dwarf::Attribute Attr) const { if (!isValid()) return None; auto AbbrevDecl = getAbbreviationDeclarationPtr(); @@ -249,17 +313,14 @@ DWARFDie::findRecursively(ArrayRef Attrs) const { DWARFDie DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const { - auto SpecRef = toReference(find(Attr)); - if (SpecRef) { - auto SpecUnit = U->getUnitSection().getUnitForOffset(*SpecRef); - if (SpecUnit) + if (auto SpecRef = toReference(find(Attr))) { + if (auto SpecUnit = U->getUnitSection().getUnitForOffset(*SpecRef)) return SpecUnit->getDIEForOffset(*SpecRef); } return DWARFDie(); } -Optional -DWARFDie::getRangesBaseAttribute() const { +Optional DWARFDie::getRangesBaseAttribute() const { return toSectionOffset(find({DW_AT_rnglists_base, DW_AT_GNU_ranges_base})); } @@ -292,8 +353,7 @@ bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC, return false; } -DWARFAddressRangesVector -DWARFDie::getAddressRanges() const { +DWARFAddressRangesVector DWARFDie::getAddressRanges() const { if (isNULL()) return DWARFAddressRangesVector(); // Single range specified by low/high PC. @@ -311,8 +371,8 @@ DWARFDie::getAddressRanges() const { return DWARFAddressRangesVector(); } -void -DWARFDie::collectChildrenAddressRanges(DWARFAddressRangesVector& Ranges) const { +void DWARFDie::collectChildrenAddressRanges( + DWARFAddressRangesVector &Ranges) const { if (isNULL()) return; if (isSubprogramDIE()) { @@ -320,33 +380,32 @@ DWARFDie::collectChildrenAddressRanges(DWARFAddressRangesVector& Ranges) const { Ranges.insert(Ranges.end(), DIERanges.begin(), DIERanges.end()); } - for (auto Child: children()) + for (auto Child : children()) Child.collectChildrenAddressRanges(Ranges); } bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const { - for (const auto& R : getAddressRanges()) { + for (const auto &R : getAddressRanges()) { if (R.LowPC <= Address && Address < R.HighPC) return true; } return false; } -const char * -DWARFDie::getSubroutineName(DINameKind Kind) const { +const char *DWARFDie::getSubroutineName(DINameKind Kind) const { if (!isSubroutineDIE()) return nullptr; return getName(Kind); } -const char * -DWARFDie::getName(DINameKind Kind) const { +const char *DWARFDie::getName(DINameKind Kind) const { if (!isValid() || Kind == DINameKind::None) return nullptr; // Try to get mangled name only if it was asked for. if (Kind == DINameKind::LinkageName) { - if (auto Name = dwarf::toString(findRecursively({DW_AT_MIPS_linkage_name, - DW_AT_linkage_name}), nullptr)) + if (auto Name = dwarf::toString( + findRecursively({DW_AT_MIPS_linkage_name, DW_AT_linkage_name}), + nullptr)) return Name; } if (auto Name = dwarf::toString(findRecursively(DW_AT_name), nullptr)) @@ -384,8 +443,6 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, DWARFDataExtractor debug_info_data = U->getDebugInfoExtractor(); const uint32_t Offset = getOffset(); uint32_t offset = Offset; - // if (DumpOpts.ShowChildren && DumpOpts.RecurseDepth) - // DumpOpts.RecurseDepth++; if (DumpOpts.ShowParents) { DumpOpts.ShowParents = false; Indent = dumpParentChain(getParent(), OS, Indent, DumpOpts); @@ -403,7 +460,7 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, WithColor(OS, syntax::Tag).get().indent(Indent) << tagString; else WithColor(OS, syntax::Tag).get().indent(Indent) - << format("DW_TAG_Unknown_%x", getTag()); + << format("DW_TAG_Unknown_%x", getTag()); if (DumpOpts.Verbose) OS << format(" [%u] %c", abbrCode, @@ -426,13 +483,13 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent, if (DumpOpts.RecurseDepth > 0 && child) { DumpOpts.RecurseDepth--; while (child) { - child.dump(OS, Indent+2, DumpOpts); + child.dump(OS, Indent + 2, DumpOpts); child = child.getSibling(); } } } else { OS << "Abbreviation code not found in 'debug_abbrev' class for code: " - << abbrCode << '\n'; + << abbrCode << '\n'; } } else { OS.indent(Indent) << "NULL\n"; @@ -454,14 +511,19 @@ DWARFDie DWARFDie::getSibling() const { return DWARFDie(); } -iterator_range -DWARFDie::attributes() const { +DWARFDie DWARFDie::getFirstChild() const { + if (isValid()) + return U->getFirstChild(Die); + return DWARFDie(); +} + +iterator_range DWARFDie::attributes() const { return make_range(attribute_iterator(*this, false), attribute_iterator(*this, true)); } -DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End) : - Die(D), AttrValue(0), Index(0) { +DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End) + : Die(D), AttrValue(0), Index(0) { auto AbbrDecl = Die.getAbbreviationDeclarationPtr(); assert(AbbrDecl && "Must have abbreviation declaration"); if (End) { diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp index 86451faa79deb..c3d8ff2cbc294 100644 --- a/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -440,7 +440,7 @@ DWARFDie DWARFUnit::getSibling(const DWARFDebugInfoEntry *Die) { // NULL DIEs don't have siblings. if (Die->getAbbreviationDeclarationPtr() == nullptr) return DWARFDie(); - + // Find the next DIE whose depth is the same as the Die's depth. for (size_t I = getDIEIndex(Die) + 1, EndIdx = DieArray.size(); I < EndIdx; ++I) { @@ -450,6 +450,17 @@ DWARFDie DWARFUnit::getSibling(const DWARFDebugInfoEntry *Die) { return DWARFDie(); } +DWARFDie DWARFUnit::getFirstChild(const DWARFDebugInfoEntry *Die) { + if (!Die->hasChildren()) + return DWARFDie(); + + // We do not want access out of bounds when parsing corrupted debug data. + size_t I = getDIEIndex(Die) + 1; + if (I >= DieArray.size()) + return DWARFDie(); + return DWARFDie(this, &DieArray[I]); +} + const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const { if (!Abbrevs) Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset); diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 096d5ff8c33f4..b10697c9a31f8 100644 --- a/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "SyntaxHighlighting.h" #include "llvm/DebugInfo/DWARF/DWARFVerifier.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" @@ -23,6 +24,7 @@ using namespace llvm; using namespace dwarf; using namespace object; +using namespace syntax; DWARFVerifier::DieRangeInfo::address_range_iterator DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) { @@ -132,7 +134,7 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, UnitType = DebugInfoData.getU8(Offset); AddrSize = DebugInfoData.getU8(Offset); AbbrOffset = DebugInfoData.getU32(Offset); - ValidType = DWARFUnit::isValidUnitType(UnitType); + ValidType = dwarf::isUnitType(UnitType); } else { UnitType = 0; AbbrOffset = DebugInfoData.getU32(Offset); @@ -148,25 +150,26 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset || !ValidType) { Success = false; - OS << format("Units[%d] - start offset: 0x%08x \n", UnitIndex, OffsetStart); + error() << format("Units[%d] - start offset: 0x%08x \n", UnitIndex, + OffsetStart); if (!ValidLength) - OS << "\tError: The length for this unit is too " + note() << "The length for this unit is too " "large for the .debug_info provided.\n"; if (!ValidVersion) - OS << "\tError: The 16 bit unit header version is not valid.\n"; + note() << "The 16 bit unit header version is not valid.\n"; if (!ValidType) - OS << "\tError: The unit type encoding is not valid.\n"; + note() << "The unit type encoding is not valid.\n"; if (!ValidAbbrevOffset) - OS << "\tError: The offset into the .debug_abbrev section is " + note() << "The offset into the .debug_abbrev section is " "not valid.\n"; if (!ValidAddrSize) - OS << "\tError: The address size is unsupported.\n"; + note() << "The address size is unsupported.\n"; } *Offset = OffsetStart + Length + 4; return Success; } -bool DWARFVerifier::verifyUnitContents(DWARFUnit Unit) { +bool DWARFVerifier::verifyUnitContents(DWARFUnit Unit, uint8_t UnitType) { uint32_t NumUnitErrors = 0; unsigned NumDies = Unit.getNumDIEs(); for (unsigned I = 0; I < NumDies; ++I) { @@ -179,9 +182,30 @@ bool DWARFVerifier::verifyUnitContents(DWARFUnit Unit) { } } - DieRangeInfo RI; DWARFDie Die = Unit.getUnitDIE(/* ExtractUnitDIEOnly = */ false); + if (!Die) { + error() << "Compilation unit without DIE.\n"; + NumUnitErrors++; + return NumUnitErrors == 0; + } + + if (!dwarf::isUnitType(Die.getTag())) { + error() << "Compilation unit root DIE is not a unit DIE: " + << dwarf::TagString(Die.getTag()) << ".\n"; + NumUnitErrors++; + } + + if (UnitType != 0 && + !DWARFUnit::isMatchingUnitTypeAndTag(UnitType, Die.getTag())) { + error() << "Compilation unit type (" << dwarf::UnitTypeString(UnitType) + << ") and root DIE (" << dwarf::TagString(Die.getTag()) + << ") do not match.\n"; + NumUnitErrors++; + } + + DieRangeInfo RI; NumUnitErrors += verifyDieRanges(Die, RI); + return NumUnitErrors == 0; } @@ -195,8 +219,8 @@ unsigned DWARFVerifier::verifyAbbrevSection(const DWARFDebugAbbrev *Abbrev) { for (auto Attribute : AbbrDecl.attributes()) { auto Result = AttributeSet.insert(Attribute.Attr); if (!Result.second) { - OS << "Error: Abbreviation declaration contains multiple " - << AttributeString(Attribute.Attr) << " attributes.\n"; + error() << "Abbreviation declaration contains multiple " + << AttributeString(Attribute.Attr) << " attributes.\n"; AbbrDecl.dump(OS); ++NumErrors; } @@ -238,6 +262,8 @@ bool DWARFVerifier::handleDebugInfo() { bool isUnitDWARF64 = false; bool isHeaderChainValid = true; bool hasDIE = DebugInfoData.isValidOffset(Offset); + DWARFUnitSection TUSection{}; + DWARFUnitSection CUSection{}; while (hasDIE) { OffsetStart = Offset; if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType, @@ -250,7 +276,6 @@ bool DWARFVerifier::handleDebugInfo() { switch (UnitType) { case dwarf::DW_UT_type: case dwarf::DW_UT_split_type: { - DWARFUnitSection TUSection{}; Unit.reset(new DWARFTypeUnit( DCtx, DObj.getInfoSection(), DCtx.getDebugAbbrev(), &DObj.getRangeSection(), DObj.getStringSection(), @@ -266,7 +291,6 @@ bool DWARFVerifier::handleDebugInfo() { // UnitType = 0 means that we are // verifying a compile unit in DWARF v4. case 0: { - DWARFUnitSection CUSection{}; Unit.reset(new DWARFCompileUnit( DCtx, DObj.getInfoSection(), DCtx.getDebugAbbrev(), &DObj.getRangeSection(), DObj.getStringSection(), @@ -278,14 +302,14 @@ bool DWARFVerifier::handleDebugInfo() { default: { llvm_unreachable("Invalid UnitType."); } } Unit->extract(DebugInfoData, &OffsetStart); - if (!verifyUnitContents(*Unit)) + if (!verifyUnitContents(*Unit, UnitType)) ++NumDebugInfoErrors; } hasDIE = DebugInfoData.isValidOffset(Offset); ++UnitIdx; } if (UnitIdx == 0 && !hasDIE) { - OS << "Warning: .debug_info is empty.\n"; + warn() << ".debug_info is empty.\n"; isHeaderChainValid = true; } NumDebugInfoErrors += verifyDebugInfoReferences(); @@ -307,9 +331,7 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die, for (auto Range : Ranges) { if (!Range.valid()) { ++NumErrors; - OS << format("error: Invalid address range [0x%08" PRIx64 - " - 0x%08" PRIx64 "].\n", - Range.LowPC, Range.HighPC); + error() << "Invalid address range " << Range << "\n"; continue; } @@ -317,11 +339,8 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die, const auto IntersectingRange = RI.insert(Range); if (IntersectingRange != RI.Ranges.end()) { ++NumErrors; - OS << format("error: DIE has overlapping address ranges: [0x%08" PRIx64 - " - 0x%08" PRIx64 "] and [0x%08" PRIx64 " - 0x%08" PRIx64 - "].\n", - Range.LowPC, Range.HighPC, IntersectingRange->LowPC, - IntersectingRange->HighPC); + error() << "DIE has overlapping address ranges: " << Range << " and " + << *IntersectingRange << "\n"; break; } } @@ -330,7 +349,7 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die, const auto IntersectingChild = ParentRI.insert(RI); if (IntersectingChild != ParentRI.Children.end()) { ++NumErrors; - OS << "error: DIEs have overlapping address ranges:"; + error() << "DIEs have overlapping address ranges:"; Die.dump(OS, 0); IntersectingChild->Die.dump(OS, 0); OS << "\n"; @@ -342,8 +361,8 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die, ParentRI.Die.getTag() == DW_TAG_subprogram); if (ShouldBeContained && !ParentRI.contains(RI)) { ++NumErrors; - OS << "error: DIE address ranges are not " - "contained in its parent's ranges:"; + error() << "DIE address ranges are not " + "contained in its parent's ranges:"; Die.dump(OS, 0); ParentRI.Die.dump(OS, 0); OS << "\n"; @@ -367,14 +386,14 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { if (*SectionOffset >= DObj.getRangeSection().Data.size()) { ++NumErrors; - OS << "error: DW_AT_ranges offset is beyond .debug_ranges " - "bounds:\n"; + error() << "DW_AT_ranges offset is beyond .debug_ranges " + "bounds:\n"; Die.dump(OS, 0, DumpOpts); OS << "\n"; } } else { ++NumErrors; - OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; + error() << "DIE has invalid DW_AT_ranges encoding:\n"; Die.dump(OS, 0, DumpOpts); OS << "\n"; } @@ -384,15 +403,15 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { if (*SectionOffset >= DObj.getLineSection().Data.size()) { ++NumErrors; - OS << "error: DW_AT_stmt_list offset is beyond .debug_line " - "bounds: " - << format("0x%08" PRIx64, *SectionOffset) << "\n"; + error() << "DW_AT_stmt_list offset is beyond .debug_line " + "bounds: " + << format("0x%08" PRIx64, *SectionOffset) << "\n"; Die.dump(OS, 0, DumpOpts); OS << "\n"; } } else { ++NumErrors; - OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; + error() << "DIE has invalid DW_AT_stmt_list encoding:\n"; Die.dump(OS, 0, DumpOpts); OS << "\n"; } @@ -424,10 +443,10 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, auto CUOffset = AttrValue.Value.getRawUValue(); if (CUOffset >= CUSize) { ++NumErrors; - OS << "error: " << FormEncodingString(Form) << " CU offset " - << format("0x%08" PRIx64, CUOffset) - << " is invalid (must be less than CU size of " - << format("0x%08" PRIx32, CUSize) << "):\n"; + error() << FormEncodingString(Form) << " CU offset " + << format("0x%08" PRIx64, CUOffset) + << " is invalid (must be less than CU size of " + << format("0x%08" PRIx32, CUSize) << "):\n"; Die.dump(OS, 0, DumpOpts); OS << "\n"; } else { @@ -446,8 +465,8 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, if (RefVal) { if (*RefVal >= DObj.getInfoSection().Data.size()) { ++NumErrors; - OS << "error: DW_FORM_ref_addr offset beyond .debug_info " - "bounds:\n"; + error() << "DW_FORM_ref_addr offset beyond .debug_info " + "bounds:\n"; Die.dump(OS, 0, DumpOpts); OS << "\n"; } else { @@ -463,7 +482,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die, assert(SecOffset); // DW_FORM_strp is a section offset. if (SecOffset && *SecOffset >= DObj.getStringSection().size()) { ++NumErrors; - OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; + error() << "DW_FORM_strp offset beyond .debug_str bounds:\n"; Die.dump(OS, 0, DumpOpts); OS << "\n"; } @@ -485,8 +504,8 @@ unsigned DWARFVerifier::verifyDebugInfoReferences() { if (Die) continue; ++NumErrors; - OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) - << ". Offset is in between DIEs:\n"; + error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first) + << ". Offset is in between DIEs:\n"; for (auto Offset : Pair.second) { auto ReferencingDie = DCtx.getDIEForOffset(Offset); ReferencingDie.dump(OS, 0, DumpOpts); @@ -512,8 +531,8 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() { if (LineTableOffset < DCtx.getDWARFObj().getLineSection().Data.size()) { if (!LineTable) { ++NumDebugLineErrors; - OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) - << "] was not able to be parsed for CU:\n"; + error() << ".debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] was not able to be parsed for CU:\n"; Die.dump(OS, 0, DumpOpts); OS << '\n'; continue; @@ -528,10 +547,10 @@ void DWARFVerifier::verifyDebugLineStmtOffsets() { auto Iter = StmtListToDie.find(LineTableOffset); if (Iter != StmtListToDie.end()) { ++NumDebugLineErrors; - OS << "error: two compile unit DIEs, " - << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " - << format("0x%08" PRIx32, Die.getOffset()) - << ", have the same DW_AT_stmt_list section offset:\n"; + error() << "two compile unit DIEs, " + << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " + << format("0x%08" PRIx32, Die.getOffset()) + << ", have the same DW_AT_stmt_list section offset:\n"; Iter->second.dump(OS, 0, DumpOpts); Die.dump(OS, 0, DumpOpts); OS << '\n'; @@ -560,12 +579,12 @@ void DWARFVerifier::verifyDebugLineRows() { // Verify directory index. if (FileName.DirIdx > MaxDirIndex) { ++NumDebugLineErrors; - OS << "error: .debug_line[" - << format("0x%08" PRIx64, - *toSectionOffset(Die.find(DW_AT_stmt_list))) - << "].prologue.file_names[" << FileIndex - << "].dir_idx contains an invalid index: " << FileName.DirIdx - << "\n"; + error() << ".debug_line[" + << format("0x%08" PRIx64, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "].prologue.file_names[" << FileIndex + << "].dir_idx contains an invalid index: " << FileName.DirIdx + << "\n"; } // Check file paths for duplicates. @@ -579,11 +598,11 @@ void DWARFVerifier::verifyDebugLineRows() { if (It == FullPathMap.end()) FullPathMap[FullPath] = FileIndex; else if (It->second != FileIndex) { - OS << "warning: .debug_line[" - << format("0x%08" PRIx64, - *toSectionOffset(Die.find(DW_AT_stmt_list))) - << "].prologue.file_names[" << FileIndex - << "] is a duplicate of file_names[" << It->second << "]\n"; + warn() << ".debug_line[" + << format("0x%08" PRIx64, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "].prologue.file_names[" << FileIndex + << "] is a duplicate of file_names[" << It->second << "]\n"; } FileIndex++; @@ -596,11 +615,11 @@ void DWARFVerifier::verifyDebugLineRows() { // Verify row address. if (Row.Address < PrevAddress) { ++NumDebugLineErrors; - OS << "error: .debug_line[" - << format("0x%08" PRIx64, - *toSectionOffset(Die.find(DW_AT_stmt_list))) - << "] row[" << RowIndex - << "] decreases in address from previous row:\n"; + error() << ".debug_line[" + << format("0x%08" PRIx64, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "] row[" << RowIndex + << "] decreases in address from previous row:\n"; DWARFDebugLine::Row::dumpTableHeader(OS); if (RowIndex > 0) @@ -612,11 +631,11 @@ void DWARFVerifier::verifyDebugLineRows() { // Verify file index. if (Row.File > MaxFileIndex) { ++NumDebugLineErrors; - OS << "error: .debug_line[" - << format("0x%08" PRIx64, - *toSectionOffset(Die.find(DW_AT_stmt_list))) - << "][" << RowIndex << "] has invalid file index " << Row.File - << " (valid values are [1," << MaxFileIndex << "]):\n"; + error() << ".debug_line[" + << format("0x%08" PRIx64, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "][" << RowIndex << "] has invalid file index " << Row.File + << " (valid values are [1," << MaxFileIndex << "]):\n"; DWARFDebugLine::Row::dumpTableHeader(OS); Row.dump(OS); OS << '\n'; @@ -647,17 +666,19 @@ unsigned DWARFVerifier::verifyAccelTable(const DWARFSection *AccelSection, DWARFAcceleratorTable AccelTable(AccelSectionData, *StrData); OS << "Verifying " << SectionName << "...\n"; - // Verify that the fixed part of the header is not too short. + // Verify that the fixed part of the header is not too short. if (!AccelSectionData.isValidOffset(AccelTable.getSizeHdr())) { - OS << "\terror: Section is too small to fit a section header.\n"; + error() << "Section is too small to fit a section header.\n"; return 1; } + // Verify that the section is not too short. if (!AccelTable.extract()) { - OS << "\terror: Section is smaller than size described in section header.\n"; + error() << "Section is smaller than size described in section header.\n"; return 1; } + // Verify that all buckets have a valid hash index or are empty. uint32_t NumBuckets = AccelTable.getNumBuckets(); uint32_t NumHashes = AccelTable.getNumHashes(); @@ -669,18 +690,18 @@ unsigned DWARFVerifier::verifyAccelTable(const DWARFSection *AccelSection, for (uint32_t BucketIdx = 0; BucketIdx < NumBuckets; ++BucketIdx) { uint32_t HashIdx = AccelSectionData.getU32(&BucketsOffset); if (HashIdx >= NumHashes && HashIdx != UINT32_MAX) { - OS << format("\terror: Bucket[%d] has invalid hash index: %u.\n", BucketIdx, - HashIdx); + error() << format("Bucket[%d] has invalid hash index: %u.\n", BucketIdx, + HashIdx); ++NumErrors; } } uint32_t NumAtoms = AccelTable.getAtomsDesc().size(); if (NumAtoms == 0) { - OS << "\terror: no atoms; failed to read HashData.\n"; + error() << "No atoms: failed to read HashData.\n"; return 1; } if (!AccelTable.validateForms()) { - OS << "\terror: unsupported form; failed to read HashData.\n"; + error() << "Unsupported form: failed to read HashData.\n"; return 1; } @@ -691,8 +712,8 @@ unsigned DWARFVerifier::verifyAccelTable(const DWARFSection *AccelSection, uint32_t HashDataOffset = AccelSectionData.getU32(&DataOffset); if (!AccelSectionData.isValidOffsetForDataOfSize(HashDataOffset, sizeof(uint64_t))) { - OS << format("\terror: Hash[%d] has invalid HashData offset: 0x%08x.\n", - HashIdx, HashDataOffset); + error() << format("Hash[%d] has invalid HashData offset: 0x%08x.\n", + HashIdx, HashDataOffset); ++NumErrors; } @@ -716,8 +737,8 @@ unsigned DWARFVerifier::verifyAccelTable(const DWARFSection *AccelSection, if (!Name) Name = ""; - OS << format( - "\terror: %s Bucket[%d] Hash[%d] = 0x%08x " + error() << format( + "%s Bucket[%d] Hash[%d] = 0x%08x " "Str[%u] = 0x%08x " "DIE[%d] = 0x%08x is not a valid DIE offset for \"%s\".\n", SectionName, BucketIdx, HashIdx, Hash, StringCount, StrpOffset, @@ -727,10 +748,10 @@ unsigned DWARFVerifier::verifyAccelTable(const DWARFSection *AccelSection, continue; } if ((Tag != dwarf::DW_TAG_null) && (Die.getTag() != Tag)) { - OS << "\terror: Tag " << dwarf::TagString(Tag) - << " in accelerator table does not match Tag " - << dwarf::TagString(Die.getTag()) << " of DIE[" << HashDataIdx - << "].\n"; + error() << "Tag " << dwarf::TagString(Tag) + << " in accelerator table does not match Tag " + << dwarf::TagString(Die.getTag()) << " of DIE[" << HashDataIdx + << "].\n"; ++NumErrors; } } @@ -758,3 +779,15 @@ bool DWARFVerifier::handleAccelTables() { verifyAccelTable(&D.getAppleObjCSection(), &StrData, ".apple_objc"); return NumErrors == 0; } + +raw_ostream &DWARFVerifier::error() const { + return WithColor(OS, syntax::Error).get() << "error: "; +} + +raw_ostream &DWARFVerifier::warn() const { + return WithColor(OS, syntax::Warning).get() << "warning: "; +} + +raw_ostream &DWARFVerifier::note() const { + return WithColor(OS, syntax::Note).get() << "note: "; +} diff --git a/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp b/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp index d4f44e446954f..65d66fc8f5148 100644 --- a/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp +++ b/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp @@ -24,12 +24,15 @@ WithColor::WithColor(raw_ostream &OS, enum HighlightColor Type) : OS(OS) { // Detect color from terminal type unless the user passed the --color option. if (UseColor == cl::BOU_UNSET ? OS.has_colors() : UseColor == cl::BOU_TRUE) { switch (Type) { - case Address: OS.changeColor(raw_ostream::YELLOW); break; - case String: OS.changeColor(raw_ostream::GREEN); break; - case Tag: OS.changeColor(raw_ostream::BLUE); break; - case Attribute: OS.changeColor(raw_ostream::CYAN); break; - case Enumerator: OS.changeColor(raw_ostream::MAGENTA); break; - case Macro: OS.changeColor(raw_ostream::RED); break; + case Address: OS.changeColor(raw_ostream::YELLOW); break; + case String: OS.changeColor(raw_ostream::GREEN); break; + case Tag: OS.changeColor(raw_ostream::BLUE); break; + case Attribute: OS.changeColor(raw_ostream::CYAN); break; + case Enumerator: OS.changeColor(raw_ostream::MAGENTA); break; + case Macro: OS.changeColor(raw_ostream::RED); break; + case Error: OS.changeColor(raw_ostream::RED, true); break; + case Warning: OS.changeColor(raw_ostream::MAGENTA, true); break; + case Note: OS.changeColor(raw_ostream::BLACK, true); break; } } } diff --git a/lib/DebugInfo/DWARF/SyntaxHighlighting.h b/lib/DebugInfo/DWARF/SyntaxHighlighting.h index 277de973dbf0e..686cf2c77608d 100644 --- a/lib/DebugInfo/DWARF/SyntaxHighlighting.h +++ b/lib/DebugInfo/DWARF/SyntaxHighlighting.h @@ -18,7 +18,17 @@ namespace dwarf { namespace syntax { // Symbolic names for various syntax elements. -enum HighlightColor { Address, String, Tag, Attribute, Enumerator, Macro }; +enum HighlightColor { + Address, + String, + Tag, + Attribute, + Enumerator, + Macro, + Error, + Warning, + Note +}; /// An RAII object that temporarily switches an output stream to a /// specific color. @@ -30,8 +40,8 @@ class WithColor { WithColor(raw_ostream &OS, enum HighlightColor Type); ~WithColor(); - raw_ostream& get() { return OS; } - operator raw_ostream& () { return OS; } + raw_ostream &get() { return OS; } + operator raw_ostream &() { return OS; } }; } // end namespace syntax diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp index d7be2d576c2dd..7be4c762b02ec 100644 --- a/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -68,15 +68,9 @@ NativeSession::NativeSession(std::unique_ptr PdbFile, NativeSession::~NativeSession() = default; -Error NativeSession::createFromPdb(StringRef Path, +Error NativeSession::createFromPdb(std::unique_ptr Buffer, std::unique_ptr &Session) { - ErrorOr> ErrorOrBuffer = - MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1, - /*RequiresNullTerminator=*/false); - if (!ErrorOrBuffer) - return make_error(generic_error_code::invalid_path); - - std::unique_ptr Buffer = std::move(*ErrorOrBuffer); + StringRef Path = Buffer->getBufferIdentifier(); auto Stream = llvm::make_unique( std::move(Buffer), llvm::support::little); diff --git a/lib/DebugInfo/PDB/PDB.cpp b/lib/DebugInfo/PDB/PDB.cpp index 501d4f5985b7d..c1b21c1203621 100644 --- a/lib/DebugInfo/PDB/PDB.cpp +++ b/lib/DebugInfo/PDB/PDB.cpp @@ -23,8 +23,15 @@ using namespace llvm::pdb; Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path, std::unique_ptr &Session) { // Create the correct concrete instance type based on the value of Type. - if (Type == PDB_ReaderType::Native) - return NativeSession::createFromPdb(Path, Session); + if (Type == PDB_ReaderType::Native) { + ErrorOr> ErrorOrBuffer = + MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1, + /*RequiresNullTerminator=*/false); + if (!ErrorOrBuffer) + return make_error(generic_error_code::invalid_path, Path); + + return NativeSession::createFromPdb(std::move(*ErrorOrBuffer), Session); + } #if LLVM_ENABLE_DIA_SDK return DIASession::createFromPdb(Path, Session); diff --git a/lib/DebugInfo/PDB/PDBExtras.cpp b/lib/DebugInfo/PDB/PDBExtras.cpp index 504cfdf37a831..147e2f7abfe78 100644 --- a/lib/DebugInfo/PDB/PDBExtras.cpp +++ b/lib/DebugInfo/PDB/PDBExtras.cpp @@ -94,56 +94,11 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_DataKind &Data) { raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const codeview::RegisterId &Reg) { switch (Reg) { - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, AL, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, CL, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, DL, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, BL, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, AH, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, CH, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, DH, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, BH, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, AX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, CX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, DX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, BX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, SP, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, BP, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, SI, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, DI, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, EAX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, ECX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, EDX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, EBX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, ESP, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, EBP, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, ESI, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, EDI, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, ES, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, CS, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, SS, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, DS, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, FS, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, GS, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, IP, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, RAX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, RBX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, RCX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, RDX, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, RSI, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, RDI, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, RBP, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, RSP, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, R8, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, R9, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, R10, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, R11, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, R12, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, R13, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, R14, OS) - CASE_OUTPUT_ENUM_CLASS_NAME(codeview::RegisterId, R15, OS) - default: - OS << static_cast(Reg); +#define CV_REGISTER(name, val) case codeview::RegisterId::name: OS << #name; return OS; +#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def" +#undef CV_REGISTER } + OS << static_cast(Reg); return OS; } diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index cb48d6cb230d0..1dc8d4ac7bc5f 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -341,7 +341,7 @@ class OrcMCJITReplacement : public ExecutionEngine { void operator()(RTDyldObjectLinkingLayerBase::ObjHandleT H, const RTDyldObjectLinkingLayer::ObjectPtr &Obj, - const LoadedObjectInfo &Info) const { + const RuntimeDyld::LoadedObjectInfo &Info) const { M.UnfinalizedSections[H] = std::move(M.SectionsAllocatedSinceLastLoad); M.SectionsAllocatedSinceLastLoad = SectionAddrSet(); M.MemMgr->notifyObjectLoaded(&M, *Obj->getBinary()); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 4d1d74cf34a75..c5e4dfa1e536d 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -878,7 +878,7 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, // and stubs for branches Thumb - ARM and ARM - Thumb. writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc, [pc, #-4] return Addr + 4; - } else if (IsMipsO32ABI) { + } else if (IsMipsO32ABI || IsMipsN32ABI) { // 0: 3c190000 lui t9,%hi(addr). // 4: 27390000 addiu t9,t9,%lo(addr). // 8: 03200008 jr t9. @@ -886,13 +886,39 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, const unsigned LuiT9Instr = 0x3c190000, AdduiT9Instr = 0x27390000; const unsigned NopInstr = 0x0; unsigned JrT9Instr = 0x03200008; - if ((AbiVariant & ELF::EF_MIPS_ARCH) == ELF::EF_MIPS_ARCH_32R6) - JrT9Instr = 0x03200009; + if ((AbiVariant & ELF::EF_MIPS_ARCH) == ELF::EF_MIPS_ARCH_32R6 || + (AbiVariant & ELF::EF_MIPS_ARCH) == ELF::EF_MIPS_ARCH_64R6) + JrT9Instr = 0x03200009; writeBytesUnaligned(LuiT9Instr, Addr, 4); - writeBytesUnaligned(AdduiT9Instr, Addr+4, 4); - writeBytesUnaligned(JrT9Instr, Addr+8, 4); - writeBytesUnaligned(NopInstr, Addr+12, 4); + writeBytesUnaligned(AdduiT9Instr, Addr + 4, 4); + writeBytesUnaligned(JrT9Instr, Addr + 8, 4); + writeBytesUnaligned(NopInstr, Addr + 12, 4); + return Addr; + } else if (IsMipsN64ABI) { + // 0: 3c190000 lui t9,%highest(addr). + // 4: 67390000 daddiu t9,t9,%higher(addr). + // 8: 0019CC38 dsll t9,t9,16. + // c: 67390000 daddiu t9,t9,%hi(addr). + // 10: 0019CC38 dsll t9,t9,16. + // 14: 67390000 daddiu t9,t9,%lo(addr). + // 18: 03200008 jr t9. + // 1c: 00000000 nop. + const unsigned LuiT9Instr = 0x3c190000, DaddiuT9Instr = 0x67390000, + DsllT9Instr = 0x19CC38; + const unsigned NopInstr = 0x0; + unsigned JrT9Instr = 0x03200008; + if ((AbiVariant & ELF::EF_MIPS_ARCH) == ELF::EF_MIPS_ARCH_64R6) + JrT9Instr = 0x03200009; + + writeBytesUnaligned(LuiT9Instr, Addr, 4); + writeBytesUnaligned(DaddiuT9Instr, Addr + 4, 4); + writeBytesUnaligned(DsllT9Instr, Addr + 8, 4); + writeBytesUnaligned(DaddiuT9Instr, Addr + 12, 4); + writeBytesUnaligned(DsllT9Instr, Addr + 16, 4); + writeBytesUnaligned(DaddiuT9Instr, Addr + 20, 4); + writeBytesUnaligned(JrT9Instr, Addr + 24, 4); + writeBytesUnaligned(NopInstr, Addr + 28, 4); return Addr; } else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) { // Depending on which version of the ELF ABI is in use, we need to diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index a079d95a50f07..4f53bc7dc5a4c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -69,8 +69,11 @@ template class DyldELFObject : public ELFObjectFile { typedef typename ELFDataTypeTypedefHelper::value_type addr_type; + DyldELFObject(ELFObjectFile &&Obj); + public: - DyldELFObject(MemoryBufferRef Wrapper, std::error_code &ec); + static Expected> + create(MemoryBufferRef Wrapper); void updateSectionAddress(const SectionRef &Sec, uint64_t Addr); @@ -92,11 +95,22 @@ template class DyldELFObject : public ELFObjectFile { // actual memory. Ultimately, the Binary parent class will take ownership of // this MemoryBuffer object but not the underlying memory. template -DyldELFObject::DyldELFObject(MemoryBufferRef Wrapper, std::error_code &EC) - : ELFObjectFile(Wrapper, EC) { +DyldELFObject::DyldELFObject(ELFObjectFile &&Obj) + : ELFObjectFile(std::move(Obj)) { this->isDyldELFObject = true; } +template +Expected>> +DyldELFObject::create(MemoryBufferRef Wrapper) { + auto Obj = ELFObjectFile::create(Wrapper); + if (auto E = Obj.takeError()) + return std::move(E); + std::unique_ptr> Ret( + new DyldELFObject(std::move(*Obj))); + return std::move(Ret); +} + template void DyldELFObject::updateSectionAddress(const SectionRef &Sec, uint64_t Addr) { @@ -133,16 +147,18 @@ class LoadedELFObjectInfo final }; template -std::unique_ptr> -createRTDyldELFObject(MemoryBufferRef Buffer, - const ObjectFile &SourceObject, - const LoadedELFObjectInfo &L, - std::error_code &ec) { +static Expected>> +createRTDyldELFObject(MemoryBufferRef Buffer, const ObjectFile &SourceObject, + const LoadedELFObjectInfo &L) { typedef typename ELFFile::Elf_Shdr Elf_Shdr; typedef typename ELFDataTypeTypedefHelper::value_type addr_type; - std::unique_ptr> Obj = - llvm::make_unique>(Buffer, ec); + Expected>> ObjOrErr = + DyldELFObject::create(Buffer); + if (Error E = ObjOrErr.takeError()) + return std::move(E); + + std::unique_ptr> Obj = std::move(*ObjOrErr); // Iterate over all sections in the object. auto SI = SourceObject.section_begin(); @@ -163,41 +179,35 @@ createRTDyldELFObject(MemoryBufferRef Buffer, ++SI; } - return Obj; + return std::move(Obj); } -OwningBinary createELFDebugObject(const ObjectFile &Obj, - const LoadedELFObjectInfo &L) { +static OwningBinary +createELFDebugObject(const ObjectFile &Obj, const LoadedELFObjectInfo &L) { assert(Obj.isELF() && "Not an ELF object file."); std::unique_ptr Buffer = MemoryBuffer::getMemBufferCopy(Obj.getData(), Obj.getFileName()); - std::error_code ec; - - std::unique_ptr DebugObj; - if (Obj.getBytesInAddress() == 4 && Obj.isLittleEndian()) { - typedef ELFType ELF32LE; - DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L, - ec); - } else if (Obj.getBytesInAddress() == 4 && !Obj.isLittleEndian()) { - typedef ELFType ELF32BE; - DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L, - ec); - } else if (Obj.getBytesInAddress() == 8 && !Obj.isLittleEndian()) { - typedef ELFType ELF64BE; - DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L, - ec); - } else if (Obj.getBytesInAddress() == 8 && Obj.isLittleEndian()) { - typedef ELFType ELF64LE; - DebugObj = createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L, - ec); - } else + Expected> DebugObj(nullptr); + handleAllErrors(DebugObj.takeError()); + if (Obj.getBytesInAddress() == 4 && Obj.isLittleEndian()) + DebugObj = + createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L); + else if (Obj.getBytesInAddress() == 4 && !Obj.isLittleEndian()) + DebugObj = + createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L); + else if (Obj.getBytesInAddress() == 8 && !Obj.isLittleEndian()) + DebugObj = + createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L); + else if (Obj.getBytesInAddress() == 8 && Obj.isLittleEndian()) + DebugObj = + createRTDyldELFObject(Buffer->getMemBufferRef(), Obj, L); + else llvm_unreachable("Unexpected ELF format"); - assert(!ec && "Could not construct copy ELF object file"); - - return OwningBinary(std::move(DebugObj), std::move(Buffer)); + handleAllErrors(DebugObj.takeError()); + return OwningBinary(std::move(*DebugObj), std::move(Buffer)); } OwningBinary @@ -601,7 +611,7 @@ Error RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj, if (auto AddendOrErr = i->getAddend()) Addend = *AddendOrErr; else - return errorCodeToError(AddendOrErr.getError()); + return AddendOrErr.takeError(); ++i; if (i == e) @@ -1069,8 +1079,11 @@ RuntimeDyldELF::processRelocationRef( ObjSectionToIDMap &ObjSectionToID, StubMap &Stubs) { const auto &Obj = cast(O); uint64_t RelType = RelI->getType(); - ErrorOr AddendOrErr = ELFRelocationRef(*RelI).getAddend(); - int64_t Addend = AddendOrErr ? *AddendOrErr : 0; + int64_t Addend = 0; + if (Expected AddendOrErr = ELFRelocationRef(*RelI).getAddend()) + Addend = *AddendOrErr; + else + consumeError(AddendOrErr.takeError()); elf_symbol_iterator Symbol = RelI->getSymbol(); // Obtain the symbol name which is referenced in the relocation @@ -1263,8 +1276,7 @@ RuntimeDyldELF::processRelocationRef( if (Value.SymbolName) { addRelocationForSymbol(REHi, Value.SymbolName); addRelocationForSymbol(RELo, Value.SymbolName); - } - else { + } else { addRelocationForSection(REHi, Value.SectionID); addRelocationForSection(RELo, Value.SectionID); } @@ -1324,11 +1336,81 @@ RuntimeDyldELF::processRelocationRef( RE.SymOffset = allocateGOTEntries(1); GOTSymbolOffsets[TargetName] = RE.SymOffset; } + if (Value.SymbolName) + addRelocationForSymbol(RE, Value.SymbolName); + else + addRelocationForSection(RE, Value.SectionID); + } else if (RelType == ELF::R_MIPS_26) { + // This is an Mips branch relocation, need to use a stub function. + DEBUG(dbgs() << "\t\tThis is a Mips branch relocation."); + SectionEntry &Section = Sections[SectionID]; + + // Look up for existing stub. + StubMap::const_iterator i = Stubs.find(Value); + if (i != Stubs.end()) { + RelocationEntry RE(SectionID, Offset, RelType, i->second); + addRelocationForSection(RE, SectionID); + DEBUG(dbgs() << " Stub function found\n"); + } else { + // Create a new stub function. + DEBUG(dbgs() << " Create a new stub function\n"); + Stubs[Value] = Section.getStubOffset(); + + unsigned AbiVariant; + O.getPlatformFlags(AbiVariant); + + uint8_t *StubTargetAddr = createStubFunction( + Section.getAddressWithOffset(Section.getStubOffset()), AbiVariant); + + if (IsMipsN32ABI) { + // Creating Hi and Lo relocations for the filled stub instructions. + RelocationEntry REHi(SectionID, StubTargetAddr - Section.getAddress(), + ELF::R_MIPS_HI16, Value.Addend); + RelocationEntry RELo(SectionID, + StubTargetAddr - Section.getAddress() + 4, + ELF::R_MIPS_LO16, Value.Addend); + if (Value.SymbolName) { + addRelocationForSymbol(REHi, Value.SymbolName); + addRelocationForSymbol(RELo, Value.SymbolName); + } else { + addRelocationForSection(REHi, Value.SectionID); + addRelocationForSection(RELo, Value.SectionID); + } + } else { + // Creating Highest, Higher, Hi and Lo relocations for the filled stub + // instructions. + RelocationEntry REHighest(SectionID, + StubTargetAddr - Section.getAddress(), + ELF::R_MIPS_HIGHEST, Value.Addend); + RelocationEntry REHigher(SectionID, + StubTargetAddr - Section.getAddress() + 4, + ELF::R_MIPS_HIGHER, Value.Addend); + RelocationEntry REHi(SectionID, + StubTargetAddr - Section.getAddress() + 12, + ELF::R_MIPS_HI16, Value.Addend); + RelocationEntry RELo(SectionID, + StubTargetAddr - Section.getAddress() + 20, + ELF::R_MIPS_LO16, Value.Addend); + if (Value.SymbolName) { + addRelocationForSymbol(REHighest, Value.SymbolName); + addRelocationForSymbol(REHigher, Value.SymbolName); + addRelocationForSymbol(REHi, Value.SymbolName); + addRelocationForSymbol(RELo, Value.SymbolName); + } else { + addRelocationForSection(REHighest, Value.SectionID); + addRelocationForSection(REHigher, Value.SectionID); + addRelocationForSection(REHi, Value.SectionID); + addRelocationForSection(RELo, Value.SectionID); + } + } + RelocationEntry RE(SectionID, Offset, RelType, Section.getStubOffset()); + addRelocationForSection(RE, SectionID); + Section.advanceStubOffset(getMaxStubSize()); + } + } else { + processSimpleRelocation(SectionID, Offset, RelType, Value); } - if (Value.SymbolName) - addRelocationForSymbol(RE, Value.SymbolName); - else - addRelocationForSection(RE, Value.SectionID); + } else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) { if (RelType == ELF::R_PPC64_REL24) { // Determine ABI variant in use for this object. diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index fb5da6dd8bbb7..f37bd0bbaea65 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -66,8 +66,10 @@ class RuntimeDyldELF : public RuntimeDyldImpl { return 20; // movz; movk; movk; movk; br if (Arch == Triple::arm || Arch == Triple::thumb) return 8; // 32-bit instruction and 32-bit address - else if (IsMipsO32ABI) + else if (IsMipsO32ABI || IsMipsN32ABI) return 16; + else if (IsMipsN64ABI) + return 32; else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) return 44; else if (Arch == Triple::x86_64) diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h index 901f77865ba18..04678f2244660 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h @@ -144,10 +144,7 @@ class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF { ? Value : Sections[RE.Sections.SectionA].getLoadAddressWithOffset( RE.Addend); - assert(static_cast(Result) <= INT32_MAX && - "relocation overflow"); - assert(static_cast(Result) >= INT32_MIN && - "relocation underflow"); + assert(Result <= UINT32_MAX && "relocation overflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_I386_DIR32" << " TargetSection: " << RE.Sections.SectionA @@ -161,10 +158,7 @@ class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF { uint64_t Result = Sections[RE.Sections.SectionA].getLoadAddressWithOffset(RE.Addend) - Sections[0].getLoadAddress(); - assert(static_cast(Result) <= INT32_MAX && - "relocation overflow"); - assert(static_cast(Result) >= INT32_MIN && - "relocation underflow"); + assert(Result <= UINT32_MAX && "relocation overflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_I386_DIR32NB" << " TargetSection: " << RE.Sections.SectionA @@ -178,9 +172,9 @@ class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF { ? Value : Sections[RE.Sections.SectionA].getLoadAddress(); Result = Result - Section.getLoadAddress() + RE.Addend - 4 - RE.Offset; - assert(static_cast(Result) <= INT32_MAX && + assert(static_cast(Result) <= INT32_MAX && "relocation overflow"); - assert(static_cast(Result) >= INT32_MIN && + assert(static_cast(Result) >= INT32_MIN && "relocation underflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_I386_REL32" @@ -191,10 +185,8 @@ class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF { } case COFF::IMAGE_REL_I386_SECTION: // 16-bit section index of the section that contains the target. - assert(static_cast(RE.SectionID) <= INT16_MAX && + assert(static_cast(RE.SectionID) <= UINT16_MAX && "relocation overflow"); - assert(static_cast(RE.SectionID) >= INT16_MIN && - "relocation underflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_I386_SECTION Value: " << RE.SectionID << '\n'); @@ -202,14 +194,12 @@ class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF { break; case COFF::IMAGE_REL_I386_SECREL: // 32-bit offset of the target from the beginning of its section. - assert(static_cast(RE.Addend) <= INT32_MAX && + assert(static_cast(RE.Addend) <= UINT32_MAX && "relocation overflow"); - assert(static_cast(RE.Addend) >= INT32_MIN && - "relocation underflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_I386_SECREL Value: " << RE.Addend << '\n'); - writeBytesUnaligned(RE.Addend, Target, 2); + writeBytesUnaligned(RE.Addend, Target, 4); break; default: llvm_unreachable("unsupported relocation type"); diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h index 3e4b0c8f75bb4..9000435764dfa 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h @@ -186,10 +186,7 @@ class RuntimeDyldCOFFThumb : public RuntimeDyldCOFF { ? Value : Sections[RE.Sections.SectionA].getLoadAddressWithOffset(RE.Addend); Result |= ISASelectionBit; - assert(static_cast(Result) <= INT32_MAX && - "relocation overflow"); - assert(static_cast(Result) >= INT32_MIN && - "relocation underflow"); + assert(Result <= UINT32_MAX && "relocation overflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_ARM_ADDR32" << " TargetSection: " << RE.Sections.SectionA @@ -202,10 +199,7 @@ class RuntimeDyldCOFFThumb : public RuntimeDyldCOFF { // NOTE: use Section[0].getLoadAddress() as an approximation of ImageBase uint64_t Result = Sections[RE.Sections.SectionA].getLoadAddress() - Sections[0].getLoadAddress() + RE.Addend; - assert(static_cast(Result) <= INT32_MAX && - "relocation overflow"); - assert(static_cast(Result) >= INT32_MIN && - "relocation underflow"); + assert(Result <= UINT32_MAX && "relocation overflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_ARM_ADDR32NB" << " TargetSection: " << RE.Sections.SectionA @@ -216,10 +210,8 @@ class RuntimeDyldCOFFThumb : public RuntimeDyldCOFF { } case COFF::IMAGE_REL_ARM_SECTION: // 16-bit section index of the section that contains the target. - assert(static_cast(RE.SectionID) <= INT16_MAX && + assert(static_cast(RE.SectionID) <= UINT16_MAX && "relocation overflow"); - assert(static_cast(RE.SectionID) >= INT16_MIN && - "relocation underflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_ARM_SECTION Value: " << RE.SectionID << '\n'); @@ -227,10 +219,8 @@ class RuntimeDyldCOFFThumb : public RuntimeDyldCOFF { break; case COFF::IMAGE_REL_ARM_SECREL: // 32-bit offset of the target from the beginning of its section. - assert(static_cast(RE.Addend) <= INT32_MAX && + assert(static_cast(RE.Addend) <= UINT32_MAX && "relocation overflow"); - assert(static_cast(RE.Addend) >= INT32_MIN && - "relocation underflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_ARM_SECREL Value: " << RE.Addend << '\n'); @@ -240,10 +230,7 @@ class RuntimeDyldCOFFThumb : public RuntimeDyldCOFF { // 32-bit VA of the target applied to a contiguous MOVW+MOVT pair. uint64_t Result = Sections[RE.Sections.SectionA].getLoadAddressWithOffset(RE.Addend); - assert(static_cast(Result) <= INT32_MAX && - "relocation overflow"); - assert(static_cast(Result) >= INT32_MIN && - "relocation underflow"); + assert(Result <= UINT32_MAX && "relocation overflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_ARM_MOV32T" << " TargetSection: " << RE.Sections.SectionA @@ -271,9 +258,9 @@ class RuntimeDyldCOFFThumb : public RuntimeDyldCOFF { // The most significant 20-bits of the signed 21-bit relative displacement uint64_t Value = RE.Addend - (Sections[RE.SectionID].getLoadAddress() + RE.Offset) - 4; - assert(static_cast(RE.Addend) <= INT32_MAX && + assert(static_cast(RE.Addend) <= INT32_MAX && "relocation overflow"); - assert(static_cast(RE.Addend) >= INT32_MIN && + assert(static_cast(RE.Addend) >= INT32_MIN && "relocation underflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_ARM_BRANCH20T" @@ -286,9 +273,9 @@ class RuntimeDyldCOFFThumb : public RuntimeDyldCOFF { // The most significant 24-bits of the signed 25-bit relative displacement uint64_t Value = RE.Addend - (Sections[RE.SectionID].getLoadAddress() + RE.Offset) - 4; - assert(static_cast(RE.Addend) <= INT32_MAX && + assert(static_cast(RE.Addend) <= INT32_MAX && "relocation overflow"); - assert(static_cast(RE.Addend) >= INT32_MIN && + assert(static_cast(RE.Addend) >= INT32_MIN && "relocation underflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_ARM_BRANCH24T" @@ -301,9 +288,9 @@ class RuntimeDyldCOFFThumb : public RuntimeDyldCOFF { // The most significant 24-bits of the signed 25-bit relative displacement uint64_t Value = RE.Addend - (Sections[RE.SectionID].getLoadAddress() + RE.Offset) - 4; - assert(static_cast(RE.Addend) <= INT32_MAX && + assert(static_cast(RE.Addend) <= INT32_MAX && "relocation overflow"); - assert(static_cast(RE.Addend) >= INT32_MIN && + assert(static_cast(RE.Addend) >= INT32_MIN && "relocation underflow"); DEBUG(dbgs() << "\t\tOffset: " << RE.Offset << " RelType: IMAGE_REL_ARM_BLX23T" @@ -321,4 +308,3 @@ class RuntimeDyldCOFFThumb : public RuntimeDyldCOFF { } #endif - diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp index 926996d6f7b3c..fe0f48e66a81d 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp @@ -116,6 +116,8 @@ int64_t RuntimeDyldELFMips::evaluateMIPS64Relocation( << format("%llx", Section.getLoadAddressWithOffset(Offset)) << " Value: 0x" << format("%llx", Value) << " Type: 0x" << format("%x", Type) << " Addend: 0x" << format("%llx", Addend) + << " Offset: " << format("%llx" PRIx64, Offset) + << " SID: " << format("%d", SectionID) << " SymOffset: " << format("%x", SymOffset) << "\n"); switch (Type) { @@ -141,6 +143,10 @@ int64_t RuntimeDyldELFMips::evaluateMIPS64Relocation( return ((Value + Addend + 0x8000) >> 16) & 0xffff; case ELF::R_MIPS_LO16: return (Value + Addend) & 0xffff; + case ELF::R_MIPS_HIGHER: + return ((Value + Addend + 0x80008000) >> 32) & 0xffff; + case ELF::R_MIPS_HIGHEST: + return ((Value + Addend + 0x800080008000) >> 48) & 0xffff; case ELF::R_MIPS_CALL16: case ELF::R_MIPS_GOT_DISP: case ELF::R_MIPS_GOT_PAGE: { @@ -215,6 +221,8 @@ void RuntimeDyldELFMips::applyMIPSRelocation(uint8_t *TargetPtr, int64_t Value, case ELF::R_MIPS_GPREL16: case ELF::R_MIPS_HI16: case ELF::R_MIPS_LO16: + case ELF::R_MIPS_HIGHER: + case ELF::R_MIPS_HIGHEST: case ELF::R_MIPS_PC16: case ELF::R_MIPS_PCHI16: case ELF::R_MIPS_PCLO16: @@ -304,7 +312,8 @@ void RuntimeDyldELFMips::resolveMIPSO32Relocation(const SectionEntry &Section, << format("%p", Section.getLoadAddressWithOffset(Offset)) << " Value: " << format("%x", Value) << " Type: " << format("%x", Type) - << " Addend: " << format("%x", Addend) << "\n"); + << " Addend: " << format("%x", Addend) + << " SymOffset: " << format("%x", Offset) << "\n"); Value = evaluateMIPS32Relocation(Section, Offset, Value, Type); diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index 5df5e1eabec37..18dfa4e3c319c 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -95,7 +95,8 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr, Options, RelocModel, CMModel, OptLevel, - /*JIT*/ true); + /*JIT*/ true); + Target->Options.EmulatedTLS = EmulatedTLS; assert(Target && "Could not allocate target machine!"); return Target; } diff --git a/lib/FuzzMutate/FuzzerCLI.cpp b/lib/FuzzMutate/FuzzerCLI.cpp index 3b71cde5af7c3..ef6958efa0b7c 100644 --- a/lib/FuzzMutate/FuzzerCLI.cpp +++ b/lib/FuzzMutate/FuzzerCLI.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/FuzzMutate/FuzzerCLI.h" -#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" @@ -31,6 +31,42 @@ void llvm::parseFuzzerCLOpts(int ArgC, char *ArgV[]) { cl::ParseCommandLineOptions(CLArgs.size(), CLArgs.data()); } +void llvm::handleExecNameEncodedBEOpts(StringRef ExecName) { + std::vector Args{ExecName}; + + auto NameAndArgs = ExecName.split("--"); + if (NameAndArgs.second.empty()) + return; + + SmallVector Opts; + NameAndArgs.second.split(Opts, '-'); + for (StringRef Opt : Opts) { + if (Opt.equals("gisel")) { + Args.push_back("-global-isel"); + // For now we default GlobalISel to -O0 + Args.push_back("-O0"); + } else if (Opt.startswith("O")) { + Args.push_back("-" + Opt.str()); + } else if (Triple(Opt).getArch()) { + Args.push_back("-mtriple=" + Opt.str()); + } else { + errs() << ExecName << ": Unknown option: " << Opt << ".\n"; + exit(1); + } + } + errs() << NameAndArgs.first << ": Injected args:"; + for (int I = 1, E = Args.size(); I < E; ++I) + errs() << " " << Args[I]; + errs() << "\n"; + + std::vector CLArgs; + CLArgs.reserve(Args.size()); + for (std::string &S : Args) + CLArgs.push_back(S.c_str()); + + cl::ParseCommandLineOptions(CLArgs.size(), CLArgs.data()); +} + int llvm::runFuzzerOnInputs(int ArgC, char *ArgV[], FuzzerTestFun TestOne, FuzzerInitFun Init) { errs() << "*** This tool was not linked to libFuzzer.\n" diff --git a/lib/Fuzzer/FuzzerClangCounters.cpp b/lib/Fuzzer/FuzzerClangCounters.cpp deleted file mode 100644 index f69e922cf0042..0000000000000 --- a/lib/Fuzzer/FuzzerClangCounters.cpp +++ /dev/null @@ -1,49 +0,0 @@ -//===- FuzzerExtraCounters.cpp - Extra coverage counters ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Coverage counters from Clang's SourceBasedCodeCoverage. -//===----------------------------------------------------------------------===// - -// Support for SourceBasedCodeCoverage is experimental: -// * Works only for the main binary, not DSOs yet. -// * Works only on Linux. -// * Does not implement print_pcs/print_coverage yet. -// * Is not fully evaluated for performance and sensitivity. -// We expect large performance drop due to 64-bit counters, -// and *maybe* better sensitivity due to more fine-grained counters. -// Preliminary comparison on a single benchmark (RE2) shows -// a bit worse sensitivity though. - -#include "FuzzerDefs.h" - -#if LIBFUZZER_LINUX -__attribute__((weak)) extern uint64_t __start___llvm_prf_cnts; -__attribute__((weak)) extern uint64_t __stop___llvm_prf_cnts; -namespace fuzzer { -uint64_t *ClangCountersBegin() { return &__start___llvm_prf_cnts; } -uint64_t *ClangCountersEnd() { return &__stop___llvm_prf_cnts; } -} // namespace fuzzer -#else -// TODO: Implement on Mac (if the data shows it's worth it). -//__attribute__((visibility("hidden"))) -//extern uint64_t CountersStart __asm("section$start$__DATA$__llvm_prf_cnts"); -//__attribute__((visibility("hidden"))) -//extern uint64_t CountersEnd __asm("section$end$__DATA$__llvm_prf_cnts"); -namespace fuzzer { -uint64_t *ClangCountersBegin() { return nullptr; } -uint64_t *ClangCountersEnd() { return nullptr; } -} // namespace fuzzer -#endif - -namespace fuzzer { -ATTRIBUTE_NO_SANITIZE_ALL -void ClearClangCounters() { // hand-written memset, don't asan-ify. - for (auto P = ClangCountersBegin(); P < ClangCountersEnd(); P++) - *P = 0; -} -} diff --git a/lib/Fuzzer/FuzzerCorpus.h b/lib/Fuzzer/FuzzerCorpus.h deleted file mode 100644 index bae0aea78f13a..0000000000000 --- a/lib/Fuzzer/FuzzerCorpus.h +++ /dev/null @@ -1,275 +0,0 @@ -//===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::InputCorpus -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_CORPUS -#define LLVM_FUZZER_CORPUS - -#include "FuzzerDefs.h" -#include "FuzzerIO.h" -#include "FuzzerRandom.h" -#include "FuzzerSHA1.h" -#include "FuzzerTracePC.h" -#include -#include -#include -#include - -namespace fuzzer { - -struct InputInfo { - Unit U; // The actual input data. - uint8_t Sha1[kSHA1NumBytes]; // Checksum. - // Number of features that this input has and no smaller input has. - size_t NumFeatures = 0; - size_t Tmp = 0; // Used by ValidateFeatureSet. - // Stats. - size_t NumExecutedMutations = 0; - size_t NumSuccessfullMutations = 0; - bool MayDeleteFile = false; - bool Reduced = false; - std::vector UniqFeatureSet; -}; - -class InputCorpus { - static const size_t kFeatureSetSize = 1 << 21; - public: - InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) { - memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); - memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); - } - ~InputCorpus() { - for (auto II : Inputs) - delete II; - } - size_t size() const { return Inputs.size(); } - size_t SizeInBytes() const { - size_t Res = 0; - for (auto II : Inputs) - Res += II->U.size(); - return Res; - } - size_t NumActiveUnits() const { - size_t Res = 0; - for (auto II : Inputs) - Res += !II->U.empty(); - return Res; - } - size_t MaxInputSize() const { - size_t Res = 0; - for (auto II : Inputs) - Res = std::max(Res, II->U.size()); - return Res; - } - bool empty() const { return Inputs.empty(); } - const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } - void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, - const std::vector &FeatureSet) { - assert(!U.empty()); - if (FeatureDebug) - Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); - Inputs.push_back(new InputInfo()); - InputInfo &II = *Inputs.back(); - II.U = U; - II.NumFeatures = NumFeatures; - II.MayDeleteFile = MayDeleteFile; - II.UniqFeatureSet = FeatureSet; - std::sort(II.UniqFeatureSet.begin(), II.UniqFeatureSet.end()); - ComputeSHA1(U.data(), U.size(), II.Sha1); - Hashes.insert(Sha1ToString(II.Sha1)); - UpdateCorpusDistribution(); - PrintCorpus(); - // ValidateFeatureSet(); - } - - // Debug-only - void PrintUnit(const Unit &U) { - if (!FeatureDebug) return; - for (uint8_t C : U) { - if (C != 'F' && C != 'U' && C != 'Z') - C = '.'; - Printf("%c", C); - } - } - - // Debug-only - void PrintFeatureSet(const std::vector &FeatureSet) { - if (!FeatureDebug) return; - Printf("{"); - for (uint32_t Feature: FeatureSet) - Printf("%u,", Feature); - Printf("}"); - } - - // Debug-only - void PrintCorpus() { - if (!FeatureDebug) return; - Printf("======= CORPUS:\n"); - int i = 0; - for (auto II : Inputs) { - if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) { - Printf("[%2d] ", i); - Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size()); - PrintUnit(II->U); - Printf(" "); - PrintFeatureSet(II->UniqFeatureSet); - Printf("\n"); - } - i++; - } - } - - void Replace(InputInfo *II, const Unit &U) { - assert(II->U.size() > U.size()); - Hashes.erase(Sha1ToString(II->Sha1)); - DeleteFile(*II); - ComputeSHA1(U.data(), U.size(), II->Sha1); - Hashes.insert(Sha1ToString(II->Sha1)); - II->U = U; - II->Reduced = true; - } - - bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } - bool HasUnit(const std::string &H) { return Hashes.count(H); } - InputInfo &ChooseUnitToMutate(Random &Rand) { - InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; - assert(!II.U.empty()); - return II; - }; - - // Returns an index of random unit from the corpus to mutate. - // Hypothesis: units added to the corpus last are more likely to be - // interesting. This function gives more weight to the more recent units. - size_t ChooseUnitIdxToMutate(Random &Rand) { - size_t Idx = static_cast(CorpusDistribution(Rand)); - assert(Idx < Inputs.size()); - return Idx; - } - - void PrintStats() { - for (size_t i = 0; i < Inputs.size(); i++) { - const auto &II = *Inputs[i]; - Printf(" [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i, - Sha1ToString(II.Sha1).c_str(), II.U.size(), - II.NumExecutedMutations, II.NumSuccessfullMutations); - } - } - - void PrintFeatureSet() { - for (size_t i = 0; i < kFeatureSetSize; i++) { - if(size_t Sz = GetFeature(i)) - Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz); - } - Printf("\n\t"); - for (size_t i = 0; i < Inputs.size(); i++) - if (size_t N = Inputs[i]->NumFeatures) - Printf(" %zd=>%zd ", i, N); - Printf("\n"); - } - - void DeleteFile(const InputInfo &II) { - if (!OutputCorpus.empty() && II.MayDeleteFile) - RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1))); - } - - void DeleteInput(size_t Idx) { - InputInfo &II = *Inputs[Idx]; - DeleteFile(II); - Unit().swap(II.U); - if (FeatureDebug) - Printf("EVICTED %zd\n", Idx); - } - - bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { - assert(NewSize); - Idx = Idx % kFeatureSetSize; - uint32_t OldSize = GetFeature(Idx); - if (OldSize == 0 || (Shrink && OldSize > NewSize)) { - if (OldSize > 0) { - size_t OldIdx = SmallestElementPerFeature[Idx]; - InputInfo &II = *Inputs[OldIdx]; - assert(II.NumFeatures > 0); - II.NumFeatures--; - if (II.NumFeatures == 0) - DeleteInput(OldIdx); - } else { - NumAddedFeatures++; - } - NumUpdatedFeatures++; - if (FeatureDebug) - Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize); - SmallestElementPerFeature[Idx] = Inputs.size(); - InputSizesPerFeature[Idx] = NewSize; - return true; - } - return false; - } - - size_t NumFeatures() const { return NumAddedFeatures; } - size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } - - void ResetFeatureSet() { - assert(Inputs.empty()); - memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); - memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); - } - -private: - - static const bool FeatureDebug = false; - - size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; } - - void ValidateFeatureSet() { - if (FeatureDebug) - PrintFeatureSet(); - for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) - if (GetFeature(Idx)) - Inputs[SmallestElementPerFeature[Idx]]->Tmp++; - for (auto II: Inputs) { - if (II->Tmp != II->NumFeatures) - Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures); - assert(II->Tmp == II->NumFeatures); - II->Tmp = 0; - } - } - - // Updates the probability distribution for the units in the corpus. - // Must be called whenever the corpus or unit weights are changed. - void UpdateCorpusDistribution() { - size_t N = Inputs.size(); - assert(N); - Intervals.resize(N + 1); - Weights.resize(N); - std::iota(Intervals.begin(), Intervals.end(), 0); - for (size_t i = 0; i < N; i++) - Weights[i] = Inputs[i]->NumFeatures * (i + 1); - CorpusDistribution = std::piecewise_constant_distribution( - Intervals.begin(), Intervals.end(), Weights.begin()); - } - std::piecewise_constant_distribution CorpusDistribution; - - std::vector Intervals; - std::vector Weights; - - std::unordered_set Hashes; - std::vector Inputs; - - size_t NumAddedFeatures = 0; - size_t NumUpdatedFeatures = 0; - uint32_t InputSizesPerFeature[kFeatureSetSize]; - uint32_t SmallestElementPerFeature[kFeatureSetSize]; - - std::string OutputCorpus; -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_CORPUS diff --git a/lib/Fuzzer/FuzzerCrossOver.cpp b/lib/Fuzzer/FuzzerCrossOver.cpp deleted file mode 100644 index 8b0fd7d529a81..0000000000000 --- a/lib/Fuzzer/FuzzerCrossOver.cpp +++ /dev/null @@ -1,52 +0,0 @@ -//===- FuzzerCrossOver.cpp - Cross over two test inputs -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Cross over test inputs. -//===----------------------------------------------------------------------===// - -#include "FuzzerDefs.h" -#include "FuzzerMutate.h" -#include "FuzzerRandom.h" -#include - -namespace fuzzer { - -// Cross Data1 and Data2, store the result (up to MaxOutSize bytes) in Out. -size_t MutationDispatcher::CrossOver(const uint8_t *Data1, size_t Size1, - const uint8_t *Data2, size_t Size2, - uint8_t *Out, size_t MaxOutSize) { - assert(Size1 || Size2); - MaxOutSize = Rand(MaxOutSize) + 1; - size_t OutPos = 0; - size_t Pos1 = 0; - size_t Pos2 = 0; - size_t *InPos = &Pos1; - size_t InSize = Size1; - const uint8_t *Data = Data1; - bool CurrentlyUsingFirstData = true; - while (OutPos < MaxOutSize && (Pos1 < Size1 || Pos2 < Size2)) { - // Merge a part of Data into Out. - size_t OutSizeLeft = MaxOutSize - OutPos; - if (*InPos < InSize) { - size_t InSizeLeft = InSize - *InPos; - size_t MaxExtraSize = std::min(OutSizeLeft, InSizeLeft); - size_t ExtraSize = Rand(MaxExtraSize) + 1; - memcpy(Out + OutPos, Data + *InPos, ExtraSize); - OutPos += ExtraSize; - (*InPos) += ExtraSize; - } - // Use the other input data on the next iteration. - InPos = CurrentlyUsingFirstData ? &Pos2 : &Pos1; - InSize = CurrentlyUsingFirstData ? Size2 : Size1; - Data = CurrentlyUsingFirstData ? Data2 : Data1; - CurrentlyUsingFirstData = !CurrentlyUsingFirstData; - } - return OutPos; -} - -} // namespace fuzzer diff --git a/lib/Fuzzer/FuzzerDefs.h b/lib/Fuzzer/FuzzerDefs.h deleted file mode 100644 index bbb44514aab79..0000000000000 --- a/lib/Fuzzer/FuzzerDefs.h +++ /dev/null @@ -1,132 +0,0 @@ -//===- FuzzerDefs.h - Internal header for the Fuzzer ------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Basic definitions. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_DEFS_H -#define LLVM_FUZZER_DEFS_H - -#include -#include -#include -#include -#include -#include - -// Platform detection. -#ifdef __linux__ -#define LIBFUZZER_APPLE 0 -#define LIBFUZZER_LINUX 1 -#define LIBFUZZER_WINDOWS 0 -#elif __APPLE__ -#define LIBFUZZER_APPLE 1 -#define LIBFUZZER_LINUX 0 -#define LIBFUZZER_WINDOWS 0 -#elif _WIN32 -#define LIBFUZZER_APPLE 0 -#define LIBFUZZER_LINUX 0 -#define LIBFUZZER_WINDOWS 1 -#else -#error "Support for your platform has not been implemented" -#endif - -#ifndef __has_attribute -# define __has_attribute(x) 0 -#endif - -#define LIBFUZZER_POSIX LIBFUZZER_APPLE || LIBFUZZER_LINUX - -#ifdef __x86_64 -# if __has_attribute(target) -# define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt"))) -# else -# define ATTRIBUTE_TARGET_POPCNT -# endif -#else -# define ATTRIBUTE_TARGET_POPCNT -#endif - - -#ifdef __clang__ // avoid gcc warning. -# if __has_attribute(no_sanitize) -# define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory"))) -# else -# define ATTRIBUTE_NO_SANITIZE_MEMORY -# endif -# define ALWAYS_INLINE __attribute__((always_inline)) -#else -# define ATTRIBUTE_NO_SANITIZE_MEMORY -# define ALWAYS_INLINE -#endif // __clang__ - -#define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address)) - -#if defined(__has_feature) -# if __has_feature(address_sanitizer) -# define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_ADDRESS -# elif __has_feature(memory_sanitizer) -# define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_MEMORY -# else -# define ATTRIBUTE_NO_SANITIZE_ALL -# endif -#else -# define ATTRIBUTE_NO_SANITIZE_ALL -#endif - -#if LIBFUZZER_WINDOWS -#define ATTRIBUTE_INTERFACE __declspec(dllexport) -#else -#define ATTRIBUTE_INTERFACE __attribute__((visibility("default"))) -#endif - -namespace fuzzer { - -template T Min(T a, T b) { return a < b ? a : b; } -template T Max(T a, T b) { return a > b ? a : b; } - -class Random; -class Dictionary; -class DictionaryEntry; -class MutationDispatcher; -struct FuzzingOptions; -class InputCorpus; -struct InputInfo; -struct ExternalFunctions; - -// Global interface to functions that may or may not be available. -extern ExternalFunctions *EF; - -typedef std::vector Unit; -typedef std::vector UnitVector; -typedef int (*UserCallback)(const uint8_t *Data, size_t Size); - -int FuzzerDriver(int *argc, char ***argv, UserCallback Callback); - -struct ScopedDoingMyOwnMemOrStr { - ScopedDoingMyOwnMemOrStr() { DoingMyOwnMemOrStr++; } - ~ScopedDoingMyOwnMemOrStr() { DoingMyOwnMemOrStr--; } - static int DoingMyOwnMemOrStr; -}; - -inline uint8_t Bswap(uint8_t x) { return x; } -inline uint16_t Bswap(uint16_t x) { return __builtin_bswap16(x); } -inline uint32_t Bswap(uint32_t x) { return __builtin_bswap32(x); } -inline uint64_t Bswap(uint64_t x) { return __builtin_bswap64(x); } - -uint8_t *ExtraCountersBegin(); -uint8_t *ExtraCountersEnd(); -void ClearExtraCounters(); - -uint64_t *ClangCountersBegin(); -uint64_t *ClangCountersEnd(); -void ClearClangCounters(); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_DEFS_H diff --git a/lib/Fuzzer/FuzzerDictionary.h b/lib/Fuzzer/FuzzerDictionary.h deleted file mode 100644 index 84cee87b8971f..0000000000000 --- a/lib/Fuzzer/FuzzerDictionary.h +++ /dev/null @@ -1,127 +0,0 @@ -//===- FuzzerDictionary.h - Internal header for the Fuzzer ------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::Dictionary -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_DICTIONARY_H -#define LLVM_FUZZER_DICTIONARY_H - -#include "FuzzerDefs.h" -#include "FuzzerIO.h" -#include "FuzzerUtil.h" -#include -#include - -namespace fuzzer { -// A simple POD sized array of bytes. -template class FixedWord { -public: - static const size_t kMaxSize = kMaxSizeT; - FixedWord() {} - FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); } - - void Set(const uint8_t *B, uint8_t S) { - assert(S <= kMaxSize); - memcpy(Data, B, S); - Size = S; - } - - bool operator==(const FixedWord &w) const { - ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str; - return Size == w.Size && 0 == memcmp(Data, w.Data, Size); - } - - bool operator<(const FixedWord &w) const { - ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str; - if (Size != w.Size) - return Size < w.Size; - return memcmp(Data, w.Data, Size) < 0; - } - - static size_t GetMaxSize() { return kMaxSize; } - const uint8_t *data() const { return Data; } - uint8_t size() const { return Size; } - -private: - uint8_t Size = 0; - uint8_t Data[kMaxSize]; -}; - -typedef FixedWord<64> Word; - -class DictionaryEntry { - public: - DictionaryEntry() {} - DictionaryEntry(Word W) : W(W) {} - DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {} - const Word &GetW() const { return W; } - - bool HasPositionHint() const { return PositionHint != std::numeric_limits::max(); } - size_t GetPositionHint() const { - assert(HasPositionHint()); - return PositionHint; - } - void IncUseCount() { UseCount++; } - void IncSuccessCount() { SuccessCount++; } - size_t GetUseCount() const { return UseCount; } - size_t GetSuccessCount() const {return SuccessCount; } - - void Print(const char *PrintAfter = "\n") { - PrintASCII(W.data(), W.size()); - if (HasPositionHint()) - Printf("@%zd", GetPositionHint()); - Printf("%s", PrintAfter); - } - -private: - Word W; - size_t PositionHint = std::numeric_limits::max(); - size_t UseCount = 0; - size_t SuccessCount = 0; -}; - -class Dictionary { - public: - static const size_t kMaxDictSize = 1 << 14; - - bool ContainsWord(const Word &W) const { - return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { - return DE.GetW() == W; - }); - } - const DictionaryEntry *begin() const { return &DE[0]; } - const DictionaryEntry *end() const { return begin() + Size; } - DictionaryEntry & operator[] (size_t Idx) { - assert(Idx < Size); - return DE[Idx]; - } - void push_back(DictionaryEntry DE) { - if (Size < kMaxDictSize) - this->DE[Size++] = DE; - } - void clear() { Size = 0; } - bool empty() const { return Size == 0; } - size_t size() const { return Size; } - -private: - DictionaryEntry DE[kMaxDictSize]; - size_t Size = 0; -}; - -// Parses one dictionary entry. -// If successfull, write the enty to Unit and returns true, -// otherwise returns false. -bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); -// Parses the dictionary file, fills Units, returns true iff all lines -// were parsed succesfully. -bool ParseDictionaryFile(const std::string &Text, std::vector *Units); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_DICTIONARY_H diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp deleted file mode 100644 index 17891d29c5d5a..0000000000000 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ /dev/null @@ -1,764 +0,0 @@ -//===- FuzzerDriver.cpp - FuzzerDriver function and flags -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// FuzzerDriver and flag parsing. -//===----------------------------------------------------------------------===// - -#include "FuzzerCorpus.h" -#include "FuzzerIO.h" -#include "FuzzerInterface.h" -#include "FuzzerInternal.h" -#include "FuzzerMutate.h" -#include "FuzzerRandom.h" -#include "FuzzerShmem.h" -#include "FuzzerTracePC.h" -#include -#include -#include -#include -#include -#include -#include -#include - -// This function should be present in the libFuzzer so that the client -// binary can test for its existence. -extern "C" __attribute__((used)) void __libfuzzer_is_present() {} - -namespace fuzzer { - -// Program arguments. -struct FlagDescription { - const char *Name; - const char *Description; - int Default; - int *IntFlag; - const char **StrFlag; - unsigned int *UIntFlag; -}; - -struct { -#define FUZZER_DEPRECATED_FLAG(Name) -#define FUZZER_FLAG_INT(Name, Default, Description) int Name; -#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) unsigned int Name; -#define FUZZER_FLAG_STRING(Name, Description) const char *Name; -#include "FuzzerFlags.def" -#undef FUZZER_DEPRECATED_FLAG -#undef FUZZER_FLAG_INT -#undef FUZZER_FLAG_UNSIGNED -#undef FUZZER_FLAG_STRING -} Flags; - -static const FlagDescription FlagDescriptions [] { -#define FUZZER_DEPRECATED_FLAG(Name) \ - {#Name, "Deprecated; don't use", 0, nullptr, nullptr, nullptr}, -#define FUZZER_FLAG_INT(Name, Default, Description) \ - {#Name, Description, Default, &Flags.Name, nullptr, nullptr}, -#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) \ - {#Name, Description, static_cast(Default), \ - nullptr, nullptr, &Flags.Name}, -#define FUZZER_FLAG_STRING(Name, Description) \ - {#Name, Description, 0, nullptr, &Flags.Name, nullptr}, -#include "FuzzerFlags.def" -#undef FUZZER_DEPRECATED_FLAG -#undef FUZZER_FLAG_INT -#undef FUZZER_FLAG_UNSIGNED -#undef FUZZER_FLAG_STRING -}; - -static const size_t kNumFlags = - sizeof(FlagDescriptions) / sizeof(FlagDescriptions[0]); - -static std::vector *Inputs; -static std::string *ProgName; - -static void PrintHelp() { - Printf("Usage:\n"); - auto Prog = ProgName->c_str(); - Printf("\nTo run fuzzing pass 0 or more directories.\n"); - Printf("%s [-flag1=val1 [-flag2=val2 ...] ] [dir1 [dir2 ...] ]\n", Prog); - - Printf("\nTo run individual tests without fuzzing pass 1 or more files:\n"); - Printf("%s [-flag1=val1 [-flag2=val2 ...] ] file1 [file2 ...]\n", Prog); - - Printf("\nFlags: (strictly in form -flag=value)\n"); - size_t MaxFlagLen = 0; - for (size_t F = 0; F < kNumFlags; F++) - MaxFlagLen = std::max(strlen(FlagDescriptions[F].Name), MaxFlagLen); - - for (size_t F = 0; F < kNumFlags; F++) { - const auto &D = FlagDescriptions[F]; - if (strstr(D.Description, "internal flag") == D.Description) continue; - Printf(" %s", D.Name); - for (size_t i = 0, n = MaxFlagLen - strlen(D.Name); i < n; i++) - Printf(" "); - Printf("\t"); - Printf("%d\t%s\n", D.Default, D.Description); - } - Printf("\nFlags starting with '--' will be ignored and " - "will be passed verbatim to subprocesses.\n"); -} - -static const char *FlagValue(const char *Param, const char *Name) { - size_t Len = strlen(Name); - if (Param[0] == '-' && strstr(Param + 1, Name) == Param + 1 && - Param[Len + 1] == '=') - return &Param[Len + 2]; - return nullptr; -} - -// Avoid calling stol as it triggers a bug in clang/glibc build. -static long MyStol(const char *Str) { - long Res = 0; - long Sign = 1; - if (*Str == '-') { - Str++; - Sign = -1; - } - for (size_t i = 0; Str[i]; i++) { - char Ch = Str[i]; - if (Ch < '0' || Ch > '9') - return Res; - Res = Res * 10 + (Ch - '0'); - } - return Res * Sign; -} - -static bool ParseOneFlag(const char *Param) { - if (Param[0] != '-') return false; - if (Param[1] == '-') { - static bool PrintedWarning = false; - if (!PrintedWarning) { - PrintedWarning = true; - Printf("INFO: libFuzzer ignores flags that start with '--'\n"); - } - for (size_t F = 0; F < kNumFlags; F++) - if (FlagValue(Param + 1, FlagDescriptions[F].Name)) - Printf("WARNING: did you mean '%s' (single dash)?\n", Param + 1); - return true; - } - for (size_t F = 0; F < kNumFlags; F++) { - const char *Name = FlagDescriptions[F].Name; - const char *Str = FlagValue(Param, Name); - if (Str) { - if (FlagDescriptions[F].IntFlag) { - int Val = MyStol(Str); - *FlagDescriptions[F].IntFlag = Val; - if (Flags.verbosity >= 2) - Printf("Flag: %s %d\n", Name, Val); - return true; - } else if (FlagDescriptions[F].UIntFlag) { - unsigned int Val = std::stoul(Str); - *FlagDescriptions[F].UIntFlag = Val; - if (Flags.verbosity >= 2) - Printf("Flag: %s %u\n", Name, Val); - return true; - } else if (FlagDescriptions[F].StrFlag) { - *FlagDescriptions[F].StrFlag = Str; - if (Flags.verbosity >= 2) - Printf("Flag: %s %s\n", Name, Str); - return true; - } else { // Deprecated flag. - Printf("Flag: %s: deprecated, don't use\n", Name); - return true; - } - } - } - Printf("\n\nWARNING: unrecognized flag '%s'; " - "use -help=1 to list all flags\n\n", Param); - return true; -} - -// We don't use any library to minimize dependencies. -static void ParseFlags(const std::vector &Args) { - for (size_t F = 0; F < kNumFlags; F++) { - if (FlagDescriptions[F].IntFlag) - *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default; - if (FlagDescriptions[F].UIntFlag) - *FlagDescriptions[F].UIntFlag = - static_cast(FlagDescriptions[F].Default); - if (FlagDescriptions[F].StrFlag) - *FlagDescriptions[F].StrFlag = nullptr; - } - Inputs = new std::vector; - for (size_t A = 1; A < Args.size(); A++) { - if (ParseOneFlag(Args[A].c_str())) { - if (Flags.ignore_remaining_args) - break; - continue; - } - Inputs->push_back(Args[A]); - } -} - -static std::mutex Mu; - -static void PulseThread() { - while (true) { - SleepSeconds(600); - std::lock_guard Lock(Mu); - Printf("pulse...\n"); - } -} - -static void WorkerThread(const std::string &Cmd, std::atomic *Counter, - unsigned NumJobs, std::atomic *HasErrors) { - while (true) { - unsigned C = (*Counter)++; - if (C >= NumJobs) break; - std::string Log = "fuzz-" + std::to_string(C) + ".log"; - std::string ToRun = Cmd + " > " + Log + " 2>&1\n"; - if (Flags.verbosity) - Printf("%s", ToRun.c_str()); - int ExitCode = ExecuteCommand(ToRun); - if (ExitCode != 0) - *HasErrors = true; - std::lock_guard Lock(Mu); - Printf("================== Job %u exited with exit code %d ============\n", - C, ExitCode); - fuzzer::CopyFileToErr(Log); - } -} - -std::string CloneArgsWithoutX(const std::vector &Args, - const char *X1, const char *X2) { - std::string Cmd; - for (auto &S : Args) { - if (FlagValue(S.c_str(), X1) || FlagValue(S.c_str(), X2)) - continue; - Cmd += S + " "; - } - return Cmd; -} - -static int RunInMultipleProcesses(const std::vector &Args, - unsigned NumWorkers, unsigned NumJobs) { - std::atomic Counter(0); - std::atomic HasErrors(false); - std::string Cmd = CloneArgsWithoutX(Args, "jobs", "workers"); - std::vector V; - std::thread Pulse(PulseThread); - Pulse.detach(); - for (unsigned i = 0; i < NumWorkers; i++) - V.push_back(std::thread(WorkerThread, Cmd, &Counter, NumJobs, &HasErrors)); - for (auto &T : V) - T.join(); - return HasErrors ? 1 : 0; -} - -static void RssThread(Fuzzer *F, size_t RssLimitMb) { - while (true) { - SleepSeconds(1); - size_t Peak = GetPeakRSSMb(); - if (Peak > RssLimitMb) - F->RssLimitCallback(); - } -} - -static void StartRssThread(Fuzzer *F, size_t RssLimitMb) { - if (!RssLimitMb) return; - std::thread T(RssThread, F, RssLimitMb); - T.detach(); -} - -int RunOneTest(Fuzzer *F, const char *InputFilePath, size_t MaxLen) { - Unit U = FileToVector(InputFilePath); - if (MaxLen && MaxLen < U.size()) - U.resize(MaxLen); - F->ExecuteCallback(U.data(), U.size()); - F->TryDetectingAMemoryLeak(U.data(), U.size(), true); - return 0; -} - -static bool AllInputsAreFiles() { - if (Inputs->empty()) return false; - for (auto &Path : *Inputs) - if (!IsFile(Path)) - return false; - return true; -} - -static std::string GetDedupTokenFromFile(const std::string &Path) { - auto S = FileToString(Path); - auto Beg = S.find("DEDUP_TOKEN:"); - if (Beg == std::string::npos) - return ""; - auto End = S.find('\n', Beg); - if (End == std::string::npos) - return ""; - return S.substr(Beg, End - Beg); -} - -int CleanseCrashInput(const std::vector &Args, - const FuzzingOptions &Options) { - if (Inputs->size() != 1 || !Flags.exact_artifact_path) { - Printf("ERROR: -cleanse_crash should be given one input file and" - " -exact_artifact_path\n"); - exit(1); - } - std::string InputFilePath = Inputs->at(0); - std::string OutputFilePath = Flags.exact_artifact_path; - std::string BaseCmd = - CloneArgsWithoutX(Args, "cleanse_crash", "cleanse_crash"); - - auto InputPos = BaseCmd.find(" " + InputFilePath + " "); - assert(InputPos != std::string::npos); - BaseCmd.erase(InputPos, InputFilePath.size() + 1); - - auto LogFilePath = DirPlusFile( - TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".txt"); - auto TmpFilePath = DirPlusFile( - TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".repro"); - auto LogFileRedirect = " > " + LogFilePath + " 2>&1 "; - - auto Cmd = BaseCmd + " " + TmpFilePath + LogFileRedirect; - - std::string CurrentFilePath = InputFilePath; - auto U = FileToVector(CurrentFilePath); - size_t Size = U.size(); - - const std::vector ReplacementBytes = {' ', 0xff}; - for (int NumAttempts = 0; NumAttempts < 5; NumAttempts++) { - bool Changed = false; - for (size_t Idx = 0; Idx < Size; Idx++) { - Printf("CLEANSE[%d]: Trying to replace byte %zd of %zd\n", NumAttempts, - Idx, Size); - uint8_t OriginalByte = U[Idx]; - if (ReplacementBytes.end() != std::find(ReplacementBytes.begin(), - ReplacementBytes.end(), - OriginalByte)) - continue; - for (auto NewByte : ReplacementBytes) { - U[Idx] = NewByte; - WriteToFile(U, TmpFilePath); - auto ExitCode = ExecuteCommand(Cmd); - RemoveFile(TmpFilePath); - if (!ExitCode) { - U[Idx] = OriginalByte; - } else { - Changed = true; - Printf("CLEANSE: Replaced byte %zd with 0x%x\n", Idx, NewByte); - WriteToFile(U, OutputFilePath); - break; - } - } - } - if (!Changed) break; - } - RemoveFile(LogFilePath); - return 0; -} - -int MinimizeCrashInput(const std::vector &Args, - const FuzzingOptions &Options) { - if (Inputs->size() != 1) { - Printf("ERROR: -minimize_crash should be given one input file\n"); - exit(1); - } - std::string InputFilePath = Inputs->at(0); - auto BaseCmd = SplitBefore( - "-ignore_remaining_args=1", - CloneArgsWithoutX(Args, "minimize_crash", "exact_artifact_path")); - auto InputPos = BaseCmd.first.find(" " + InputFilePath + " "); - assert(InputPos != std::string::npos); - BaseCmd.first.erase(InputPos, InputFilePath.size() + 1); - if (Flags.runs <= 0 && Flags.max_total_time == 0) { - Printf("INFO: you need to specify -runs=N or " - "-max_total_time=N with -minimize_crash=1\n" - "INFO: defaulting to -max_total_time=600\n"); - BaseCmd.first += " -max_total_time=600"; - } - - auto LogFilePath = DirPlusFile( - TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".txt"); - auto LogFileRedirect = " > " + LogFilePath + " 2>&1 "; - - std::string CurrentFilePath = InputFilePath; - while (true) { - Unit U = FileToVector(CurrentFilePath); - Printf("CRASH_MIN: minimizing crash input: '%s' (%zd bytes)\n", - CurrentFilePath.c_str(), U.size()); - - auto Cmd = BaseCmd.first + " " + CurrentFilePath + LogFileRedirect + " " + - BaseCmd.second; - - Printf("CRASH_MIN: executing: %s\n", Cmd.c_str()); - int ExitCode = ExecuteCommand(Cmd); - if (ExitCode == 0) { - Printf("ERROR: the input %s did not crash\n", CurrentFilePath.c_str()); - exit(1); - } - Printf("CRASH_MIN: '%s' (%zd bytes) caused a crash. Will try to minimize " - "it further\n", - CurrentFilePath.c_str(), U.size()); - auto DedupToken1 = GetDedupTokenFromFile(LogFilePath); - if (!DedupToken1.empty()) - Printf("CRASH_MIN: DedupToken1: %s\n", DedupToken1.c_str()); - - std::string ArtifactPath = - Flags.exact_artifact_path - ? Flags.exact_artifact_path - : Options.ArtifactPrefix + "minimized-from-" + Hash(U); - Cmd += " -minimize_crash_internal_step=1 -exact_artifact_path=" + - ArtifactPath; - Printf("CRASH_MIN: executing: %s\n", Cmd.c_str()); - ExitCode = ExecuteCommand(Cmd); - CopyFileToErr(LogFilePath); - if (ExitCode == 0) { - if (Flags.exact_artifact_path) { - CurrentFilePath = Flags.exact_artifact_path; - WriteToFile(U, CurrentFilePath); - } - Printf("CRASH_MIN: failed to minimize beyond %s (%d bytes), exiting\n", - CurrentFilePath.c_str(), U.size()); - break; - } - auto DedupToken2 = GetDedupTokenFromFile(LogFilePath); - if (!DedupToken2.empty()) - Printf("CRASH_MIN: DedupToken2: %s\n", DedupToken2.c_str()); - - if (DedupToken1 != DedupToken2) { - if (Flags.exact_artifact_path) { - CurrentFilePath = Flags.exact_artifact_path; - WriteToFile(U, CurrentFilePath); - } - Printf("CRASH_MIN: mismatch in dedup tokens" - " (looks like a different bug). Won't minimize further\n"); - break; - } - - CurrentFilePath = ArtifactPath; - Printf("*********************************\n"); - } - RemoveFile(LogFilePath); - return 0; -} - -int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) { - assert(Inputs->size() == 1); - std::string InputFilePath = Inputs->at(0); - Unit U = FileToVector(InputFilePath); - Printf("INFO: Starting MinimizeCrashInputInternalStep: %zd\n", U.size()); - if (U.size() < 2) { - Printf("INFO: The input is small enough, exiting\n"); - exit(0); - } - F->SetMaxInputLen(U.size()); - F->SetMaxMutationLen(U.size() - 1); - F->MinimizeCrashLoop(U); - Printf("INFO: Done MinimizeCrashInputInternalStep, no crashes found\n"); - exit(0); - return 0; -} - -int AnalyzeDictionary(Fuzzer *F, const std::vector& Dict, - UnitVector& Corpus) { - Printf("Started dictionary minimization (up to %d tests)\n", - Dict.size() * Corpus.size() * 2); - - // Scores and usage count for each dictionary unit. - std::vector Scores(Dict.size()); - std::vector Usages(Dict.size()); - - std::vector InitialFeatures; - std::vector ModifiedFeatures; - for (auto &C : Corpus) { - // Get coverage for the testcase without modifications. - F->ExecuteCallback(C.data(), C.size()); - InitialFeatures.clear(); - TPC.CollectFeatures([&](size_t Feature) -> bool { - InitialFeatures.push_back(Feature); - return true; - }); - - for (size_t i = 0; i < Dict.size(); ++i) { - auto Data = C; - auto StartPos = std::search(Data.begin(), Data.end(), - Dict[i].begin(), Dict[i].end()); - // Skip dictionary unit, if the testcase does not contain it. - if (StartPos == Data.end()) - continue; - - ++Usages[i]; - while (StartPos != Data.end()) { - // Replace all occurrences of dictionary unit in the testcase. - auto EndPos = StartPos + Dict[i].size(); - for (auto It = StartPos; It != EndPos; ++It) - *It ^= 0xFF; - - StartPos = std::search(EndPos, Data.end(), - Dict[i].begin(), Dict[i].end()); - } - - // Get coverage for testcase with masked occurrences of dictionary unit. - F->ExecuteCallback(Data.data(), Data.size()); - ModifiedFeatures.clear(); - TPC.CollectFeatures([&](size_t Feature) -> bool { - ModifiedFeatures.push_back(Feature); - return true; - }); - - if (InitialFeatures == ModifiedFeatures) - --Scores[i]; - else - Scores[i] += 2; - } - } - - Printf("###### Useless dictionary elements. ######\n"); - for (size_t i = 0; i < Dict.size(); ++i) { - // Dictionary units with positive score are treated as useful ones. - if (Scores[i] > 0) - continue; - - Printf("\""); - PrintASCII(Dict[i].data(), Dict[i].size(), "\""); - Printf(" # Score: %d, Used: %d\n", Scores[i], Usages[i]); - } - Printf("###### End of useless dictionary elements. ######\n"); - return 0; -} - -int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { - using namespace fuzzer; - assert(argc && argv && "Argument pointers cannot be nullptr"); - std::string Argv0((*argv)[0]); - EF = new ExternalFunctions(); - if (EF->LLVMFuzzerInitialize) - EF->LLVMFuzzerInitialize(argc, argv); - const std::vector Args(*argv, *argv + *argc); - assert(!Args.empty()); - ProgName = new std::string(Args[0]); - if (Argv0 != *ProgName) { - Printf("ERROR: argv[0] has been modified in LLVMFuzzerInitialize\n"); - exit(1); - } - ParseFlags(Args); - if (Flags.help) { - PrintHelp(); - return 0; - } - - if (Flags.close_fd_mask & 2) - DupAndCloseStderr(); - if (Flags.close_fd_mask & 1) - CloseStdout(); - - if (Flags.jobs > 0 && Flags.workers == 0) { - Flags.workers = std::min(NumberOfCpuCores() / 2, Flags.jobs); - if (Flags.workers > 1) - Printf("Running %u workers\n", Flags.workers); - } - - if (Flags.workers > 0 && Flags.jobs > 0) - return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs); - - const size_t kMaxSaneLen = 1 << 20; - const size_t kMinDefaultLen = 4096; - FuzzingOptions Options; - Options.Verbosity = Flags.verbosity; - Options.MaxLen = Flags.max_len; - Options.ExperimentalLenControl = Flags.experimental_len_control; - Options.UnitTimeoutSec = Flags.timeout; - Options.ErrorExitCode = Flags.error_exitcode; - Options.TimeoutExitCode = Flags.timeout_exitcode; - Options.MaxTotalTimeSec = Flags.max_total_time; - Options.DoCrossOver = Flags.cross_over; - Options.MutateDepth = Flags.mutate_depth; - Options.UseCounters = Flags.use_counters; - Options.UseIndirCalls = Flags.use_indir_calls; - Options.UseMemmem = Flags.use_memmem; - Options.UseCmp = Flags.use_cmp; - Options.UseValueProfile = Flags.use_value_profile; - Options.Shrink = Flags.shrink; - Options.ReduceInputs = Flags.reduce_inputs; - Options.ShuffleAtStartUp = Flags.shuffle; - Options.PreferSmall = Flags.prefer_small; - Options.ReloadIntervalSec = Flags.reload; - Options.OnlyASCII = Flags.only_ascii; - Options.DetectLeaks = Flags.detect_leaks; - Options.TraceMalloc = Flags.trace_malloc; - Options.RssLimitMb = Flags.rss_limit_mb; - if (Flags.runs >= 0) - Options.MaxNumberOfRuns = Flags.runs; - if (!Inputs->empty() && !Flags.minimize_crash_internal_step) - Options.OutputCorpus = (*Inputs)[0]; - Options.ReportSlowUnits = Flags.report_slow_units; - if (Flags.artifact_prefix) - Options.ArtifactPrefix = Flags.artifact_prefix; - if (Flags.exact_artifact_path) - Options.ExactArtifactPath = Flags.exact_artifact_path; - std::vector Dictionary; - if (Flags.dict) - if (!ParseDictionaryFile(FileToString(Flags.dict), &Dictionary)) - return 1; - if (Flags.verbosity > 0 && !Dictionary.empty()) - Printf("Dictionary: %zd entries\n", Dictionary.size()); - bool DoPlainRun = AllInputsAreFiles(); - Options.SaveArtifacts = - !DoPlainRun || Flags.minimize_crash_internal_step; - Options.PrintNewCovPcs = Flags.print_pcs; - Options.PrintFinalStats = Flags.print_final_stats; - Options.PrintCorpusStats = Flags.print_corpus_stats; - Options.PrintCoverage = Flags.print_coverage; - Options.DumpCoverage = Flags.dump_coverage; - if (Flags.exit_on_src_pos) - Options.ExitOnSrcPos = Flags.exit_on_src_pos; - if (Flags.exit_on_item) - Options.ExitOnItem = Flags.exit_on_item; - - unsigned Seed = Flags.seed; - // Initialize Seed. - if (Seed == 0) - Seed = - std::chrono::system_clock::now().time_since_epoch().count() + GetPid(); - if (Flags.verbosity) - Printf("INFO: Seed: %u\n", Seed); - - Random Rand(Seed); - auto *MD = new MutationDispatcher(Rand, Options); - auto *Corpus = new InputCorpus(Options.OutputCorpus); - auto *F = new Fuzzer(Callback, *Corpus, *MD, Options); - - for (auto &U: Dictionary) - if (U.size() <= Word::GetMaxSize()) - MD->AddWordToManualDictionary(Word(U.data(), U.size())); - - StartRssThread(F, Flags.rss_limit_mb); - - Options.HandleAbrt = Flags.handle_abrt; - Options.HandleBus = Flags.handle_bus; - Options.HandleFpe = Flags.handle_fpe; - Options.HandleIll = Flags.handle_ill; - Options.HandleInt = Flags.handle_int; - Options.HandleSegv = Flags.handle_segv; - Options.HandleTerm = Flags.handle_term; - Options.HandleXfsz = Flags.handle_xfsz; - SetSignalHandler(Options); - - std::atexit(Fuzzer::StaticExitCallback); - - if (Flags.minimize_crash) - return MinimizeCrashInput(Args, Options); - - if (Flags.minimize_crash_internal_step) - return MinimizeCrashInputInternalStep(F, Corpus); - - if (Flags.cleanse_crash) - return CleanseCrashInput(Args, Options); - - if (auto Name = Flags.run_equivalence_server) { - SMR.Destroy(Name); - if (!SMR.Create(Name)) { - Printf("ERROR: can't create shared memory region\n"); - return 1; - } - Printf("INFO: EQUIVALENCE SERVER UP\n"); - while (true) { - SMR.WaitClient(); - size_t Size = SMR.ReadByteArraySize(); - SMR.WriteByteArray(nullptr, 0); - const Unit tmp(SMR.GetByteArray(), SMR.GetByteArray() + Size); - F->ExecuteCallback(tmp.data(), tmp.size()); - SMR.PostServer(); - } - return 0; - } - - if (auto Name = Flags.use_equivalence_server) { - if (!SMR.Open(Name)) { - Printf("ERROR: can't open shared memory region\n"); - return 1; - } - Printf("INFO: EQUIVALENCE CLIENT UP\n"); - } - - if (DoPlainRun) { - Options.SaveArtifacts = false; - int Runs = std::max(1, Flags.runs); - Printf("%s: Running %zd inputs %d time(s) each.\n", ProgName->c_str(), - Inputs->size(), Runs); - for (auto &Path : *Inputs) { - auto StartTime = system_clock::now(); - Printf("Running: %s\n", Path.c_str()); - for (int Iter = 0; Iter < Runs; Iter++) - RunOneTest(F, Path.c_str(), Options.MaxLen); - auto StopTime = system_clock::now(); - auto MS = duration_cast(StopTime - StartTime).count(); - Printf("Executed %s in %zd ms\n", Path.c_str(), (long)MS); - } - Printf("***\n" - "*** NOTE: fuzzing was not performed, you have only\n" - "*** executed the target code on a fixed set of inputs.\n" - "***\n"); - F->PrintFinalStats(); - exit(0); - } - - if (Flags.merge) { - if (Options.MaxLen == 0) - F->SetMaxInputLen(kMaxSaneLen); - if (Flags.merge_control_file) - F->CrashResistantMergeInternalStep(Flags.merge_control_file); - else - F->CrashResistantMerge(Args, *Inputs, - Flags.load_coverage_summary, - Flags.save_coverage_summary); - exit(0); - } - - size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen; - - UnitVector InitialCorpus; - for (auto &Inp : *Inputs) { - Printf("Loading corpus dir: %s\n", Inp.c_str()); - ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr, - TemporaryMaxLen, /*ExitOnError=*/false); - } - - if (Flags.analyze_dict) { - if (Dictionary.empty() || Inputs->empty()) { - Printf("ERROR: can't analyze dict without dict and corpus provided\n"); - return 1; - } - if (AnalyzeDictionary(F, Dictionary, InitialCorpus)) { - Printf("Dictionary analysis failed\n"); - exit(1); - } - Printf("Dictionary analysis suceeded\n"); - exit(0); - } - - if (Options.MaxLen == 0) { - size_t MaxLen = 0; - for (auto &U : InitialCorpus) - MaxLen = std::max(U.size(), MaxLen); - F->SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen)); - } - - if (InitialCorpus.empty()) { - InitialCorpus.push_back(Unit({'\n'})); // Valid ASCII input. - if (Options.Verbosity) - Printf("INFO: A corpus is not provided, starting from an empty corpus\n"); - } - F->ShuffleAndMinimize(&InitialCorpus); - InitialCorpus.clear(); // Don't need this memory any more. - F->Loop(); - - if (Flags.verbosity) - Printf("Done %zd runs in %zd second(s)\n", F->getTotalNumberOfRuns(), - F->secondsSinceProcessStartUp()); - F->PrintFinalStats(); - - exit(0); // Don't let F destroy itself. -} - -// Storage for global ExternalFunctions object. -ExternalFunctions *EF = nullptr; - -} // namespace fuzzer diff --git a/lib/Fuzzer/FuzzerExtFunctions.def b/lib/Fuzzer/FuzzerExtFunctions.def deleted file mode 100644 index 3bc5302c31c63..0000000000000 --- a/lib/Fuzzer/FuzzerExtFunctions.def +++ /dev/null @@ -1,46 +0,0 @@ -//===- FuzzerExtFunctions.def - External functions --------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This defines the external function pointers that -// ``fuzzer::ExternalFunctions`` should contain and try to initialize. The -// EXT_FUNC macro must be defined at the point of inclusion. The signature of -// the macro is: -// -// EXT_FUNC(, , , ) -//===----------------------------------------------------------------------===// - -// Optional user functions -EXT_FUNC(LLVMFuzzerInitialize, int, (int *argc, char ***argv), false); -EXT_FUNC(LLVMFuzzerCustomMutator, size_t, - (uint8_t * Data, size_t Size, size_t MaxSize, unsigned int Seed), - false); -EXT_FUNC(LLVMFuzzerCustomCrossOver, size_t, - (const uint8_t * Data1, size_t Size1, - const uint8_t * Data2, size_t Size2, - uint8_t * Out, size_t MaxOutSize, unsigned int Seed), - false); - -// Sanitizer functions -EXT_FUNC(__lsan_enable, void, (), false); -EXT_FUNC(__lsan_disable, void, (), false); -EXT_FUNC(__lsan_do_recoverable_leak_check, int, (), false); -EXT_FUNC(__sanitizer_install_malloc_and_free_hooks, int, - (void (*malloc_hook)(const volatile void *, size_t), - void (*free_hook)(const volatile void *)), - false); -EXT_FUNC(__sanitizer_print_memory_profile, int, (size_t, size_t), false); -EXT_FUNC(__sanitizer_print_stack_trace, void, (), true); -EXT_FUNC(__sanitizer_symbolize_pc, void, - (void *, const char *fmt, char *out_buf, size_t out_buf_size), false); -EXT_FUNC(__sanitizer_get_module_and_offset_for_pc, int, - (void *pc, char *module_path, - size_t module_path_len,void **pc_offset), false); -EXT_FUNC(__sanitizer_set_death_callback, void, (void (*)(void)), true); -EXT_FUNC(__sanitizer_set_report_fd, void, (void*), false); -EXT_FUNC(__sanitizer_dump_coverage, void, (const uintptr_t *, uintptr_t), - false); diff --git a/lib/Fuzzer/FuzzerExtFunctions.h b/lib/Fuzzer/FuzzerExtFunctions.h deleted file mode 100644 index 2672a385478d1..0000000000000 --- a/lib/Fuzzer/FuzzerExtFunctions.h +++ /dev/null @@ -1,35 +0,0 @@ -//===- FuzzerExtFunctions.h - Interface to external functions ---*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Defines an interface to (possibly optional) functions. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_EXT_FUNCTIONS_H -#define LLVM_FUZZER_EXT_FUNCTIONS_H - -#include -#include - -namespace fuzzer { - -struct ExternalFunctions { - // Initialize function pointers. Functions that are not available will be set - // to nullptr. Do not call this constructor before ``main()`` has been - // entered. - ExternalFunctions(); - -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - RETURN_TYPE(*NAME) FUNC_SIG = nullptr - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -}; -} // namespace fuzzer - -#endif diff --git a/lib/Fuzzer/FuzzerExtFunctionsDlsym.cpp b/lib/Fuzzer/FuzzerExtFunctionsDlsym.cpp deleted file mode 100644 index 06bddd5de38f3..0000000000000 --- a/lib/Fuzzer/FuzzerExtFunctionsDlsym.cpp +++ /dev/null @@ -1,52 +0,0 @@ -//===- FuzzerExtFunctionsDlsym.cpp - Interface to external functions ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Implementation for operating systems that support dlsym(). We only use it on -// Apple platforms for now. We don't use this approach on Linux because it -// requires that clients of LibFuzzer pass ``--export-dynamic`` to the linker. -// That is a complication we don't wish to expose to clients right now. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_APPLE - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include - -using namespace fuzzer; - -template -static T GetFnPtr(const char *FnName, bool WarnIfMissing) { - dlerror(); // Clear any previous errors. - void *Fn = dlsym(RTLD_DEFAULT, FnName); - if (Fn == nullptr) { - if (WarnIfMissing) { - const char *ErrorMsg = dlerror(); - Printf("WARNING: Failed to find function \"%s\".", FnName); - if (ErrorMsg) - Printf(" Reason %s.", ErrorMsg); - Printf("\n"); - } - } - return reinterpret_cast(Fn); -} - -namespace fuzzer { - -ExternalFunctions::ExternalFunctions() { -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - this->NAME = GetFnPtr(#NAME, WARN) - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -} - -} // namespace fuzzer - -#endif // LIBFUZZER_APPLE diff --git a/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp b/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp deleted file mode 100644 index 321b3ec5d4140..0000000000000 --- a/lib/Fuzzer/FuzzerExtFunctionsDlsymWin.cpp +++ /dev/null @@ -1,62 +0,0 @@ -//===- FuzzerExtFunctionsDlsymWin.cpp - Interface to external functions ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Implementation using dynamic loading for Windows. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include "Windows.h" - -// This must be included after Windows.h. -#include "Psapi.h" - -namespace fuzzer { - -ExternalFunctions::ExternalFunctions() { - HMODULE Modules[1024]; - DWORD BytesNeeded; - HANDLE CurrentProcess = GetCurrentProcess(); - - if (!EnumProcessModules(CurrentProcess, Modules, sizeof(Modules), - &BytesNeeded)) { - Printf("EnumProcessModules failed (error: %d).\n", GetLastError()); - exit(1); - } - - if (sizeof(Modules) < BytesNeeded) { - Printf("Error: the array is not big enough to hold all loaded modules.\n"); - exit(1); - } - - for (size_t i = 0; i < (BytesNeeded / sizeof(HMODULE)); i++) - { - FARPROC Fn; -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - if (this->NAME == nullptr) { \ - Fn = GetProcAddress(Modules[i], #NAME); \ - if (Fn == nullptr) \ - Fn = GetProcAddress(Modules[i], #NAME "__dll"); \ - this->NAME = (decltype(ExternalFunctions::NAME)) Fn; \ - } -#include "FuzzerExtFunctions.def" -#undef EXT_FUNC - } - -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - if (this->NAME == nullptr && WARN) \ - Printf("WARNING: Failed to find function \"%s\".\n", #NAME); -#include "FuzzerExtFunctions.def" -#undef EXT_FUNC -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff --git a/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp b/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp deleted file mode 100644 index 503f0395cf8f8..0000000000000 --- a/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp +++ /dev/null @@ -1,54 +0,0 @@ -//===- FuzzerExtFunctionsWeak.cpp - Interface to external functions -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Implementation for Linux. This relies on the linker's support for weak -// symbols. We don't use this approach on Apple platforms because it requires -// clients of LibFuzzer to pass ``-U _`` to the linker to allow -// weak symbols to be undefined. That is a complication we don't want to expose -// to clients right now. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_LINUX - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" - -extern "C" { -// Declare these symbols as weak to allow them to be optionally defined. -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - __attribute__((weak)) RETURN_TYPE NAME FUNC_SIG - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -} - -using namespace fuzzer; - -static void CheckFnPtr(void *FnPtr, const char *FnName, bool WarnIfMissing) { - if (FnPtr == nullptr && WarnIfMissing) { - Printf("WARNING: Failed to find function \"%s\".\n", FnName); - } -} - -namespace fuzzer { - -ExternalFunctions::ExternalFunctions() { -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - this->NAME = ::NAME; \ - CheckFnPtr(reinterpret_cast(reinterpret_cast(::NAME)), \ - #NAME, WARN); - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -} - -} // namespace fuzzer - -#endif // LIBFUZZER_LINUX diff --git a/lib/Fuzzer/FuzzerExtFunctionsWeakAlias.cpp b/lib/Fuzzer/FuzzerExtFunctionsWeakAlias.cpp deleted file mode 100644 index e10f7b4dcac20..0000000000000 --- a/lib/Fuzzer/FuzzerExtFunctionsWeakAlias.cpp +++ /dev/null @@ -1,56 +0,0 @@ -//===- FuzzerExtFunctionsWeakAlias.cpp - Interface to external functions --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Implementation using weak aliases. Works for Windows. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" - -using namespace fuzzer; - -extern "C" { -// Declare these symbols as weak to allow them to be optionally defined. -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - RETURN_TYPE NAME##Def FUNC_SIG { \ - Printf("ERROR: Function \"%s\" not defined.\n", #NAME); \ - exit(1); \ - } \ - RETURN_TYPE NAME FUNC_SIG __attribute__((weak, alias(#NAME "Def"))); - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -} - -template -static T *GetFnPtr(T *Fun, T *FunDef, const char *FnName, bool WarnIfMissing) { - if (Fun == FunDef) { - if (WarnIfMissing) - Printf("WARNING: Failed to find function \"%s\".\n", FnName); - return nullptr; - } - return Fun; -} - -namespace fuzzer { - -ExternalFunctions::ExternalFunctions() { -#define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ - this->NAME = GetFnPtr(::NAME, ::NAME##Def, #NAME, WARN); - -#include "FuzzerExtFunctions.def" - -#undef EXT_FUNC -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff --git a/lib/Fuzzer/FuzzerExtraCounters.cpp b/lib/Fuzzer/FuzzerExtraCounters.cpp deleted file mode 100644 index 07dbe0fdee765..0000000000000 --- a/lib/Fuzzer/FuzzerExtraCounters.cpp +++ /dev/null @@ -1,41 +0,0 @@ -//===- FuzzerExtraCounters.cpp - Extra coverage counters ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Extra coverage counters defined by user code. -//===----------------------------------------------------------------------===// - -#include "FuzzerDefs.h" - -#if LIBFUZZER_LINUX -__attribute__((weak)) extern uint8_t __start___libfuzzer_extra_counters; -__attribute__((weak)) extern uint8_t __stop___libfuzzer_extra_counters; - -namespace fuzzer { -uint8_t *ExtraCountersBegin() { return &__start___libfuzzer_extra_counters; } -uint8_t *ExtraCountersEnd() { return &__stop___libfuzzer_extra_counters; } -ATTRIBUTE_NO_SANITIZE_ALL -void ClearExtraCounters() { // hand-written memset, don't asan-ify. - uintptr_t *Beg = reinterpret_cast(ExtraCountersBegin()); - uintptr_t *End = reinterpret_cast(ExtraCountersEnd()); - for (; Beg < End; Beg++) { - *Beg = 0; - __asm__ __volatile__("" : : : "memory"); - } -} - -} // namespace fuzzer - -#else -// TODO: implement for other platforms. -namespace fuzzer { -uint8_t *ExtraCountersBegin() { return nullptr; } -uint8_t *ExtraCountersEnd() { return nullptr; } -void ClearExtraCounters() {} -} // namespace fuzzer - -#endif diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def deleted file mode 100644 index 07fdf8425fad9..0000000000000 --- a/lib/Fuzzer/FuzzerFlags.def +++ /dev/null @@ -1,139 +0,0 @@ -//===- FuzzerFlags.def - Run-time flags -------------------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Flags. FUZZER_FLAG_INT/FUZZER_FLAG_STRING macros should be defined at the -// point of inclusion. We are not using any flag parsing library for better -// portability and independence. -//===----------------------------------------------------------------------===// -FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.") -FUZZER_FLAG_UNSIGNED(seed, 0, "Random seed. If 0, seed is generated.") -FUZZER_FLAG_INT(runs, -1, - "Number of individual test runs (-1 for infinite runs).") -FUZZER_FLAG_INT(max_len, 0, "Maximum length of the test input. " - "If 0, libFuzzer tries to guess a good value based on the corpus " - "and reports it. ") -FUZZER_FLAG_INT(experimental_len_control, 0, "experimental flag") -FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") -FUZZER_FLAG_INT(mutate_depth, 5, - "Apply this number of consecutive mutations to each input.") -FUZZER_FLAG_INT(shuffle, 1, "Shuffle inputs at startup") -FUZZER_FLAG_INT(prefer_small, 1, - "If 1, always prefer smaller inputs during the corpus shuffle.") -FUZZER_FLAG_INT( - timeout, 1200, - "Timeout in seconds (if positive). " - "If one unit runs more than this number of seconds the process will abort.") -FUZZER_FLAG_INT(error_exitcode, 77, "When libFuzzer itself reports a bug " - "this exit code will be used.") -FUZZER_FLAG_INT(timeout_exitcode, 77, "When libFuzzer reports a timeout " - "this exit code will be used.") -FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total " - "time in seconds to run the fuzzer.") -FUZZER_FLAG_INT(help, 0, "Print help.") -FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be " - "merged into the 1-st corpus. Only interesting units will be taken. " - "This flag can be used to minimize a corpus.") -FUZZER_FLAG_STRING(merge_control_file, "internal flag") -FUZZER_FLAG_STRING(save_coverage_summary, "Experimental:" - " save coverage summary to a given file." - " Used with -merge=1") -FUZZER_FLAG_STRING(load_coverage_summary, "Experimental:" - " load coverage summary from a given file." - " Treat this coverage as belonging to the first corpus. " - " Used with -merge=1") -FUZZER_FLAG_INT(minimize_crash, 0, "If 1, minimizes the provided" - " crash input. Use with -runs=N or -max_total_time=N to limit " - "the number attempts." - " Use with -exact_artifact_path to specify the output." - " Combine with ASAN_OPTIONS=dedup_token_length=3 (or similar) to ensure that" - " the minimized input triggers the same crash." - ) -FUZZER_FLAG_INT(cleanse_crash, 0, "If 1, tries to cleanse the provided" - " crash input to make it contain fewer original bytes." - " Use with -exact_artifact_path to specify the output." - ) -FUZZER_FLAG_INT(minimize_crash_internal_step, 0, "internal flag") -FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters") -FUZZER_FLAG_INT(use_indir_calls, 1, "Use indirect caller-callee counters") -FUZZER_FLAG_INT(use_memmem, 1, - "Use hints from intercepting memmem, strstr, etc") -FUZZER_FLAG_INT(use_value_profile, 0, - "Experimental. Use value profile to guide fuzzing.") -FUZZER_FLAG_INT(use_cmp, 1, "Use CMP traces to guide mutations") -FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus inputs.") -FUZZER_FLAG_INT(reduce_inputs, 1, - "Try to reduce the size of inputs while preserving their full feature sets") -FUZZER_FLAG_UNSIGNED(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" - " this number of jobs in separate worker processes" - " with stdout/stderr redirected to fuzz-JOB.log.") -FUZZER_FLAG_UNSIGNED(workers, 0, - "Number of simultaneous worker processes to run the jobs." - " If zero, \"min(jobs,NumberOfCpuCores()/2)\" is used.") -FUZZER_FLAG_INT(reload, 1, - "Reload the main corpus every seconds to get new units" - " discovered by other processes. If 0, disabled") -FUZZER_FLAG_INT(report_slow_units, 10, - "Report slowest units if they run for more than this number of seconds.") -FUZZER_FLAG_INT(only_ascii, 0, - "If 1, generate only ASCII (isprint+isspace) inputs.") -FUZZER_FLAG_STRING(dict, "Experimental. Use the dictionary file.") -FUZZER_FLAG_STRING(artifact_prefix, "Write fuzzing artifacts (crash, " - "timeout, or slow inputs) as " - "$(artifact_prefix)file") -FUZZER_FLAG_STRING(exact_artifact_path, - "Write the single artifact on failure (crash, timeout) " - "as $(exact_artifact_path). This overrides -artifact_prefix " - "and will not use checksum in the file name. Do not " - "use the same path for several parallel processes.") -FUZZER_FLAG_INT(print_pcs, 0, "If 1, print out newly covered PCs.") -FUZZER_FLAG_INT(print_final_stats, 0, "If 1, print statistics at exit.") -FUZZER_FLAG_INT(print_corpus_stats, 0, - "If 1, print statistics on corpus elements at exit.") -FUZZER_FLAG_INT(print_coverage, 0, "If 1, print coverage information as text" - " at exit. To-be-deprecated.") -FUZZER_FLAG_INT(dump_coverage, 0, "If 1, dump coverage information as a" - " .sancov file at exit. To-be-deprecated.") -FUZZER_FLAG_INT(handle_segv, 1, "If 1, try to intercept SIGSEGV.") -FUZZER_FLAG_INT(handle_bus, 1, "If 1, try to intercept SIGBUS.") -FUZZER_FLAG_INT(handle_abrt, 1, "If 1, try to intercept SIGABRT.") -FUZZER_FLAG_INT(handle_ill, 1, "If 1, try to intercept SIGILL.") -FUZZER_FLAG_INT(handle_fpe, 1, "If 1, try to intercept SIGFPE.") -FUZZER_FLAG_INT(handle_int, 1, "If 1, try to intercept SIGINT.") -FUZZER_FLAG_INT(handle_term, 1, "If 1, try to intercept SIGTERM.") -FUZZER_FLAG_INT(handle_xfsz, 1, "If 1, try to intercept SIGXFSZ.") -FUZZER_FLAG_INT(close_fd_mask, 0, "If 1, close stdout at startup; " - "if 2, close stderr; if 3, close both. " - "Be careful, this will also close e.g. asan's stderr/stdout.") -FUZZER_FLAG_INT(detect_leaks, 1, "If 1, and if LeakSanitizer is enabled " - "try to detect memory leaks during fuzzing (i.e. not only at shut down).") -FUZZER_FLAG_INT(trace_malloc, 0, "If >= 1 will print all mallocs/frees. " - "If >= 2 will also print stack traces.") -FUZZER_FLAG_INT(rss_limit_mb, 2048, "If non-zero, the fuzzer will exit upon" - "reaching this limit of RSS memory usage.") -FUZZER_FLAG_STRING(exit_on_src_pos, "Exit if a newly found PC originates" - " from the given source location. Example: -exit_on_src_pos=foo.cc:123. " - "Used primarily for testing libFuzzer itself.") -FUZZER_FLAG_STRING(exit_on_item, "Exit if an item with a given sha1 sum" - " was added to the corpus. " - "Used primarily for testing libFuzzer itself.") -FUZZER_FLAG_INT(ignore_remaining_args, 0, "If 1, ignore all arguments passed " - "after this one. Useful for fuzzers that need to do their own " - "argument parsing.") - -FUZZER_FLAG_STRING(run_equivalence_server, "Experimental") -FUZZER_FLAG_STRING(use_equivalence_server, "Experimental") -FUZZER_FLAG_INT(analyze_dict, 0, "Experimental") - -FUZZER_DEPRECATED_FLAG(exit_on_first) -FUZZER_DEPRECATED_FLAG(save_minimized_corpus) -FUZZER_DEPRECATED_FLAG(sync_command) -FUZZER_DEPRECATED_FLAG(sync_timeout) -FUZZER_DEPRECATED_FLAG(test_single_input) -FUZZER_DEPRECATED_FLAG(drill) -FUZZER_DEPRECATED_FLAG(truncate_units) -FUZZER_DEPRECATED_FLAG(output_csv) diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp deleted file mode 100644 index 1a06d4420f9a3..0000000000000 --- a/lib/Fuzzer/FuzzerIO.cpp +++ /dev/null @@ -1,120 +0,0 @@ -//===- FuzzerIO.cpp - IO utils. -------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// IO functions. -//===----------------------------------------------------------------------===// - -#include "FuzzerIO.h" -#include "FuzzerDefs.h" -#include "FuzzerExtFunctions.h" -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -static FILE *OutputFile = stderr; - -long GetEpoch(const std::string &Path) { - struct stat St; - if (stat(Path.c_str(), &St)) - return 0; // Can't stat, be conservative. - return St.st_mtime; -} - -Unit FileToVector(const std::string &Path, size_t MaxSize, bool ExitOnError) { - std::ifstream T(Path); - if (ExitOnError && !T) { - Printf("No such directory: %s; exiting\n", Path.c_str()); - exit(1); - } - - T.seekg(0, T.end); - auto EndPos = T.tellg(); - if (EndPos < 0) return {}; - size_t FileLen = EndPos; - if (MaxSize) - FileLen = std::min(FileLen, MaxSize); - - T.seekg(0, T.beg); - Unit Res(FileLen); - T.read(reinterpret_cast(Res.data()), FileLen); - return Res; -} - -std::string FileToString(const std::string &Path) { - std::ifstream T(Path); - return std::string((std::istreambuf_iterator(T)), - std::istreambuf_iterator()); -} - -void CopyFileToErr(const std::string &Path) { - Printf("%s", FileToString(Path).c_str()); -} - -void WriteToFile(const Unit &U, const std::string &Path) { - // Use raw C interface because this function may be called from a sig handler. - FILE *Out = fopen(Path.c_str(), "w"); - if (!Out) return; - fwrite(U.data(), sizeof(U[0]), U.size(), Out); - fclose(Out); -} - -void ReadDirToVectorOfUnits(const char *Path, std::vector *V, - long *Epoch, size_t MaxSize, bool ExitOnError) { - long E = Epoch ? *Epoch : 0; - std::vector Files; - ListFilesInDirRecursive(Path, Epoch, &Files, /*TopDir*/true); - size_t NumLoaded = 0; - for (size_t i = 0; i < Files.size(); i++) { - auto &X = Files[i]; - if (Epoch && GetEpoch(X) < E) continue; - NumLoaded++; - if ((NumLoaded & (NumLoaded - 1)) == 0 && NumLoaded >= 1024) - Printf("Loaded %zd/%zd files from %s\n", NumLoaded, Files.size(), Path); - auto S = FileToVector(X, MaxSize, ExitOnError); - if (!S.empty()) - V->push_back(S); - } -} - -std::string DirPlusFile(const std::string &DirPath, - const std::string &FileName) { - return DirPath + GetSeparator() + FileName; -} - -void DupAndCloseStderr() { - int OutputFd = DuplicateFile(2); - if (OutputFd > 0) { - FILE *NewOutputFile = OpenFile(OutputFd, "w"); - if (NewOutputFile) { - OutputFile = NewOutputFile; - if (EF->__sanitizer_set_report_fd) - EF->__sanitizer_set_report_fd( - reinterpret_cast(GetHandleFromFd(OutputFd))); - DiscardOutput(2); - } - } -} - -void CloseStdout() { - DiscardOutput(1); -} - -void Printf(const char *Fmt, ...) { - va_list ap; - va_start(ap, Fmt); - vfprintf(OutputFile, Fmt, ap); - va_end(ap); - fflush(OutputFile); -} - -} // namespace fuzzer diff --git a/lib/Fuzzer/FuzzerIO.h b/lib/Fuzzer/FuzzerIO.h deleted file mode 100644 index 3b66a52d1a649..0000000000000 --- a/lib/Fuzzer/FuzzerIO.h +++ /dev/null @@ -1,76 +0,0 @@ -//===- FuzzerIO.h - Internal header for IO utils ----------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// IO interface. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_IO_H -#define LLVM_FUZZER_IO_H - -#include "FuzzerDefs.h" - -namespace fuzzer { - -long GetEpoch(const std::string &Path); - -Unit FileToVector(const std::string &Path, size_t MaxSize = 0, - bool ExitOnError = true); - -std::string FileToString(const std::string &Path); - -void CopyFileToErr(const std::string &Path); - -void WriteToFile(const Unit &U, const std::string &Path); - -void ReadDirToVectorOfUnits(const char *Path, std::vector *V, - long *Epoch, size_t MaxSize, bool ExitOnError); - -// Returns "Dir/FileName" or equivalent for the current OS. -std::string DirPlusFile(const std::string &DirPath, - const std::string &FileName); - -// Returns the name of the dir, similar to the 'dirname' utility. -std::string DirName(const std::string &FileName); - -// Returns path to a TmpDir. -std::string TmpDir(); - -bool IsInterestingCoverageFile(const std::string &FileName); - -void DupAndCloseStderr(); - -void CloseStdout(); - -void Printf(const char *Fmt, ...); - -// Print using raw syscalls, useful when printing at early init stages. -void RawPrint(const char *Str); - -// Platform specific functions: -bool IsFile(const std::string &Path); - -void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, - std::vector *V, bool TopDir); - -char GetSeparator(); - -FILE* OpenFile(int Fd, const char *Mode); - -int CloseFile(int Fd); - -int DuplicateFile(int Fd); - -void RemoveFile(const std::string &Path); - -void DiscardOutput(int Fd); - -intptr_t GetHandleFromFd(int fd); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_IO_H diff --git a/lib/Fuzzer/FuzzerIOPosix.cpp b/lib/Fuzzer/FuzzerIOPosix.cpp deleted file mode 100644 index c5ebdbac467bf..0000000000000 --- a/lib/Fuzzer/FuzzerIOPosix.cpp +++ /dev/null @@ -1,123 +0,0 @@ -//===- FuzzerIOPosix.cpp - IO utils for Posix. ----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// IO functions implementation using Posix API. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_POSIX - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -bool IsFile(const std::string &Path) { - struct stat St; - if (stat(Path.c_str(), &St)) - return false; - return S_ISREG(St.st_mode); -} - -void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, - std::vector *V, bool TopDir) { - auto E = GetEpoch(Dir); - if (Epoch) - if (E && *Epoch >= E) return; - - DIR *D = opendir(Dir.c_str()); - if (!D) { - Printf("No such directory: %s; exiting\n", Dir.c_str()); - exit(1); - } - while (auto E = readdir(D)) { - std::string Path = DirPlusFile(Dir, E->d_name); - if (E->d_type == DT_REG || E->d_type == DT_LNK) - V->push_back(Path); - else if (E->d_type == DT_DIR && *E->d_name != '.') - ListFilesInDirRecursive(Path, Epoch, V, false); - } - closedir(D); - if (Epoch && TopDir) - *Epoch = E; -} - -char GetSeparator() { - return '/'; -} - -FILE* OpenFile(int Fd, const char* Mode) { - return fdopen(Fd, Mode); -} - -int CloseFile(int fd) { - return close(fd); -} - -int DuplicateFile(int Fd) { - return dup(Fd); -} - -void RemoveFile(const std::string &Path) { - unlink(Path.c_str()); -} - -void DiscardOutput(int Fd) { - FILE* Temp = fopen("/dev/null", "w"); - if (!Temp) - return; - dup2(fileno(Temp), Fd); - fclose(Temp); -} - -intptr_t GetHandleFromFd(int fd) { - return static_cast(fd); -} - -std::string DirName(const std::string &FileName) { - char *Tmp = new char[FileName.size() + 1]; - memcpy(Tmp, FileName.c_str(), FileName.size() + 1); - std::string Res = dirname(Tmp); - delete [] Tmp; - return Res; -} - -std::string TmpDir() { - if (auto Env = getenv("TMPDIR")) - return Env; - return "/tmp"; -} - -bool IsInterestingCoverageFile(const std::string &FileName) { - if (FileName.find("compiler-rt/lib/") != std::string::npos) - return false; // sanitizer internal. - if (FileName.find("/usr/lib/") != std::string::npos) - return false; - if (FileName.find("/usr/include/") != std::string::npos) - return false; - if (FileName == "") - return false; - return true; -} - - -void RawPrint(const char *Str) { - write(2, Str, strlen(Str)); -} - -} // namespace fuzzer - -#endif // LIBFUZZER_POSIX diff --git a/lib/Fuzzer/FuzzerIOWindows.cpp b/lib/Fuzzer/FuzzerIOWindows.cpp deleted file mode 100644 index 742520267b73f..0000000000000 --- a/lib/Fuzzer/FuzzerIOWindows.cpp +++ /dev/null @@ -1,323 +0,0 @@ -//===- FuzzerIOWindows.cpp - IO utils for Windows. ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// IO functions implementation for Windows. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS - -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -static bool IsFile(const std::string &Path, const DWORD &FileAttributes) { - - if (FileAttributes & FILE_ATTRIBUTE_NORMAL) - return true; - - if (FileAttributes & FILE_ATTRIBUTE_DIRECTORY) - return false; - - HANDLE FileHandle( - CreateFileA(Path.c_str(), 0, FILE_SHARE_READ, NULL, OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS, 0)); - - if (FileHandle == INVALID_HANDLE_VALUE) { - Printf("CreateFileA() failed for \"%s\" (Error code: %lu).\n", Path.c_str(), - GetLastError()); - return false; - } - - DWORD FileType = GetFileType(FileHandle); - - if (FileType == FILE_TYPE_UNKNOWN) { - Printf("GetFileType() failed for \"%s\" (Error code: %lu).\n", Path.c_str(), - GetLastError()); - CloseHandle(FileHandle); - return false; - } - - if (FileType != FILE_TYPE_DISK) { - CloseHandle(FileHandle); - return false; - } - - CloseHandle(FileHandle); - return true; -} - -bool IsFile(const std::string &Path) { - DWORD Att = GetFileAttributesA(Path.c_str()); - - if (Att == INVALID_FILE_ATTRIBUTES) { - Printf("GetFileAttributesA() failed for \"%s\" (Error code: %lu).\n", - Path.c_str(), GetLastError()); - return false; - } - - return IsFile(Path, Att); -} - -void ListFilesInDirRecursive(const std::string &Dir, long *Epoch, - std::vector *V, bool TopDir) { - auto E = GetEpoch(Dir); - if (Epoch) - if (E && *Epoch >= E) return; - - std::string Path(Dir); - assert(!Path.empty()); - if (Path.back() != '\\') - Path.push_back('\\'); - Path.push_back('*'); - - // Get the first directory entry. - WIN32_FIND_DATAA FindInfo; - HANDLE FindHandle(FindFirstFileA(Path.c_str(), &FindInfo)); - if (FindHandle == INVALID_HANDLE_VALUE) - { - if (GetLastError() == ERROR_FILE_NOT_FOUND) - return; - Printf("No such directory: %s; exiting\n", Dir.c_str()); - exit(1); - } - - do { - std::string FileName = DirPlusFile(Dir, FindInfo.cFileName); - - if (FindInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { - size_t FilenameLen = strlen(FindInfo.cFileName); - if ((FilenameLen == 1 && FindInfo.cFileName[0] == '.') || - (FilenameLen == 2 && FindInfo.cFileName[0] == '.' && - FindInfo.cFileName[1] == '.')) - continue; - - ListFilesInDirRecursive(FileName, Epoch, V, false); - } - else if (IsFile(FileName, FindInfo.dwFileAttributes)) - V->push_back(FileName); - } while (FindNextFileA(FindHandle, &FindInfo)); - - DWORD LastError = GetLastError(); - if (LastError != ERROR_NO_MORE_FILES) - Printf("FindNextFileA failed (Error code: %lu).\n", LastError); - - FindClose(FindHandle); - - if (Epoch && TopDir) - *Epoch = E; -} - -char GetSeparator() { - return '\\'; -} - -FILE* OpenFile(int Fd, const char* Mode) { - return _fdopen(Fd, Mode); -} - -int CloseFile(int Fd) { - return _close(Fd); -} - -int DuplicateFile(int Fd) { - return _dup(Fd); -} - -void RemoveFile(const std::string &Path) { - _unlink(Path.c_str()); -} - -void DiscardOutput(int Fd) { - FILE* Temp = fopen("nul", "w"); - if (!Temp) - return; - _dup2(_fileno(Temp), Fd); - fclose(Temp); -} - -intptr_t GetHandleFromFd(int fd) { - return _get_osfhandle(fd); -} - -static bool IsSeparator(char C) { - return C == '\\' || C == '/'; -} - -// Parse disk designators, like "C:\". If Relative == true, also accepts: "C:". -// Returns number of characters considered if successful. -static size_t ParseDrive(const std::string &FileName, const size_t Offset, - bool Relative = true) { - if (Offset + 1 >= FileName.size() || FileName[Offset + 1] != ':') - return 0; - if (Offset + 2 >= FileName.size() || !IsSeparator(FileName[Offset + 2])) { - if (!Relative) // Accept relative path? - return 0; - else - return 2; - } - return 3; -} - -// Parse a file name, like: SomeFile.txt -// Returns number of characters considered if successful. -static size_t ParseFileName(const std::string &FileName, const size_t Offset) { - size_t Pos = Offset; - const size_t End = FileName.size(); - for(; Pos < End && !IsSeparator(FileName[Pos]); ++Pos) - ; - return Pos - Offset; -} - -// Parse a directory ending in separator, like: `SomeDir\` -// Returns number of characters considered if successful. -static size_t ParseDir(const std::string &FileName, const size_t Offset) { - size_t Pos = Offset; - const size_t End = FileName.size(); - if (Pos >= End || IsSeparator(FileName[Pos])) - return 0; - for(; Pos < End && !IsSeparator(FileName[Pos]); ++Pos) - ; - if (Pos >= End) - return 0; - ++Pos; // Include separator. - return Pos - Offset; -} - -// Parse a servername and share, like: `SomeServer\SomeShare\` -// Returns number of characters considered if successful. -static size_t ParseServerAndShare(const std::string &FileName, - const size_t Offset) { - size_t Pos = Offset, Res; - if (!(Res = ParseDir(FileName, Pos))) - return 0; - Pos += Res; - if (!(Res = ParseDir(FileName, Pos))) - return 0; - Pos += Res; - return Pos - Offset; -} - -// Parse the given Ref string from the position Offset, to exactly match the given -// string Patt. -// Returns number of characters considered if successful. -static size_t ParseCustomString(const std::string &Ref, size_t Offset, - const char *Patt) { - size_t Len = strlen(Patt); - if (Offset + Len > Ref.size()) - return 0; - return Ref.compare(Offset, Len, Patt) == 0 ? Len : 0; -} - -// Parse a location, like: -// \\?\UNC\Server\Share\ \\?\C:\ \\Server\Share\ \ C:\ C: -// Returns number of characters considered if successful. -static size_t ParseLocation(const std::string &FileName) { - size_t Pos = 0, Res; - - if ((Res = ParseCustomString(FileName, Pos, R"(\\?\)"))) { - Pos += Res; - if ((Res = ParseCustomString(FileName, Pos, R"(UNC\)"))) { - Pos += Res; - if ((Res = ParseServerAndShare(FileName, Pos))) - return Pos + Res; - return 0; - } - if ((Res = ParseDrive(FileName, Pos, false))) - return Pos + Res; - return 0; - } - - if (Pos < FileName.size() && IsSeparator(FileName[Pos])) { - ++Pos; - if (Pos < FileName.size() && IsSeparator(FileName[Pos])) { - ++Pos; - if ((Res = ParseServerAndShare(FileName, Pos))) - return Pos + Res; - return 0; - } - return Pos; - } - - if ((Res = ParseDrive(FileName, Pos))) - return Pos + Res; - - return Pos; -} - -std::string DirName(const std::string &FileName) { - size_t LocationLen = ParseLocation(FileName); - size_t DirLen = 0, Res; - while ((Res = ParseDir(FileName, LocationLen + DirLen))) - DirLen += Res; - size_t FileLen = ParseFileName(FileName, LocationLen + DirLen); - - if (LocationLen + DirLen + FileLen != FileName.size()) { - Printf("DirName() failed for \"%s\", invalid path.\n", FileName.c_str()); - exit(1); - } - - if (DirLen) { - --DirLen; // Remove trailing separator. - if (!FileLen) { // Path ended in separator. - assert(DirLen); - // Remove file name from Dir. - while (DirLen && !IsSeparator(FileName[LocationLen + DirLen - 1])) - --DirLen; - if (DirLen) // Remove trailing separator. - --DirLen; - } - } - - if (!LocationLen) { // Relative path. - if (!DirLen) - return "."; - return std::string(".\\").append(FileName, 0, DirLen); - } - - return FileName.substr(0, LocationLen + DirLen); -} - -std::string TmpDir() { - std::string Tmp; - Tmp.resize(MAX_PATH + 1); - DWORD Size = GetTempPathA(Tmp.size(), &Tmp[0]); - if (Size == 0) { - Printf("Couldn't get Tmp path.\n"); - exit(1); - } - Tmp.resize(Size); - return Tmp; -} - -bool IsInterestingCoverageFile(const std::string &FileName) { - if (FileName.find("Program Files") != std::string::npos) - return false; - if (FileName.find("compiler-rt\\lib\\") != std::string::npos) - return false; // sanitizer internal. - if (FileName == "") - return false; - return true; -} - -void RawPrint(const char *Str) { - // Not tested, may or may not work. Fix if needed. - Printf("%s", Str); -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff --git a/lib/Fuzzer/FuzzerInterface.h b/lib/Fuzzer/FuzzerInterface.h deleted file mode 100644 index c2c0a39843c04..0000000000000 --- a/lib/Fuzzer/FuzzerInterface.h +++ /dev/null @@ -1,67 +0,0 @@ -//===- FuzzerInterface.h - Interface header for the Fuzzer ------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Define the interface between libFuzzer and the library being tested. -//===----------------------------------------------------------------------===// - -// NOTE: the libFuzzer interface is thin and in the majority of cases -// you should not include this file into your target. In 95% of cases -// all you need is to define the following function in your file: -// extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); - -// WARNING: keep the interface in C. - -#ifndef LLVM_FUZZER_INTERFACE_H -#define LLVM_FUZZER_INTERFACE_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - -// Mandatory user-provided target function. -// Executes the code under test with [Data, Data+Size) as the input. -// libFuzzer will invoke this function *many* times with different inputs. -// Must return 0. -int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); - -// Optional user-provided initialization function. -// If provided, this function will be called by libFuzzer once at startup. -// It may read and modify argc/argv. -// Must return 0. -int LLVMFuzzerInitialize(int *argc, char ***argv); - -// Optional user-provided custom mutator. -// Mutates raw data in [Data, Data+Size) inplace. -// Returns the new size, which is not greater than MaxSize. -// Given the same Seed produces the same mutation. -size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size, size_t MaxSize, - unsigned int Seed); - -// Optional user-provided custom cross-over function. -// Combines pieces of Data1 & Data2 together into Out. -// Returns the new size, which is not greater than MaxOutSize. -// Should produce the same mutation given the same Seed. -size_t LLVMFuzzerCustomCrossOver(const uint8_t *Data1, size_t Size1, - const uint8_t *Data2, size_t Size2, - uint8_t *Out, size_t MaxOutSize, - unsigned int Seed); - -// Experimental, may go away in future. -// libFuzzer-provided function to be used inside LLVMFuzzerCustomMutator. -// Mutates raw data in [Data, Data+Size) inplace. -// Returns the new size, which is not greater than MaxSize. -size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize); - -#ifdef __cplusplus -} // extern "C" -#endif // __cplusplus - -#endif // LLVM_FUZZER_INTERFACE_H diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h deleted file mode 100644 index 1d68c01908f0f..0000000000000 --- a/lib/Fuzzer/FuzzerInternal.h +++ /dev/null @@ -1,150 +0,0 @@ -//===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Define the main class fuzzer::Fuzzer and most functions. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_INTERNAL_H -#define LLVM_FUZZER_INTERNAL_H - -#include "FuzzerDefs.h" -#include "FuzzerExtFunctions.h" -#include "FuzzerInterface.h" -#include "FuzzerOptions.h" -#include "FuzzerSHA1.h" -#include "FuzzerValueBitMap.h" -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -using namespace std::chrono; - -class Fuzzer { -public: - - Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, - FuzzingOptions Options); - ~Fuzzer(); - void Loop(); - void MinimizeCrashLoop(const Unit &U); - void ShuffleAndMinimize(UnitVector *V); - void RereadOutputCorpus(size_t MaxSize); - - size_t secondsSinceProcessStartUp() { - return duration_cast(system_clock::now() - ProcessStartTime) - .count(); - } - - bool TimedOut() { - return Options.MaxTotalTimeSec > 0 && - secondsSinceProcessStartUp() > - static_cast(Options.MaxTotalTimeSec); - } - - size_t execPerSec() { - size_t Seconds = secondsSinceProcessStartUp(); - return Seconds ? TotalNumberOfRuns / Seconds : 0; - } - - size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; } - - static void StaticAlarmCallback(); - static void StaticCrashSignalCallback(); - static void StaticExitCallback(); - static void StaticInterruptCallback(); - static void StaticFileSizeExceedCallback(); - - void ExecuteCallback(const uint8_t *Data, size_t Size); - bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, - InputInfo *II = nullptr); - - // Merge Corpora[1:] into Corpora[0]. - void Merge(const std::vector &Corpora); - void CrashResistantMerge(const std::vector &Args, - const std::vector &Corpora, - const char *CoverageSummaryInputPathOrNull, - const char *CoverageSummaryOutputPathOrNull); - void CrashResistantMergeInternalStep(const std::string &ControlFilePath); - MutationDispatcher &GetMD() { return MD; } - void PrintFinalStats(); - void SetMaxInputLen(size_t MaxInputLen); - void SetMaxMutationLen(size_t MaxMutationLen); - void RssLimitCallback(); - - bool InFuzzingThread() const { return IsMyThread; } - size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const; - void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, - bool DuringInitialCorpusExecution); - - void HandleMalloc(size_t Size); - void AnnounceOutput(const uint8_t *Data, size_t Size); - -private: - void AlarmCallback(); - void CrashCallback(); - void ExitCallback(); - void CrashOnOverwrittenData(); - void InterruptCallback(); - void MutateAndTestOne(); - void ReportNewCoverage(InputInfo *II, const Unit &U); - void PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size); - void WriteToOutputCorpus(const Unit &U); - void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); - void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0); - void PrintStatusForNewUnit(const Unit &U, const char *Text); - void ShuffleCorpus(UnitVector *V); - void CheckExitOnSrcPosOrItem(); - - static void StaticDeathCallback(); - void DumpCurrentUnit(const char *Prefix); - void DeathCallback(); - - void AllocateCurrentUnitData(); - uint8_t *CurrentUnitData = nullptr; - std::atomic CurrentUnitSize; - uint8_t BaseSha1[kSHA1NumBytes]; // Checksum of the base unit. - bool RunningCB = false; - - size_t TotalNumberOfRuns = 0; - size_t NumberOfNewUnitsAdded = 0; - - size_t LastCorpusUpdateRun = 0; - system_clock::time_point LastCorpusUpdateTime = system_clock::now(); - - - bool HasMoreMallocsThanFrees = false; - size_t NumberOfLeakDetectionAttempts = 0; - - UserCallback CB; - InputCorpus &Corpus; - MutationDispatcher &MD; - FuzzingOptions Options; - - system_clock::time_point ProcessStartTime = system_clock::now(); - system_clock::time_point UnitStartTime, UnitStopTime; - long TimeOfLongestUnitInSeconds = 0; - long EpochOfLastReadOfOutputCorpus = 0; - - size_t MaxInputLen = 0; - size_t MaxMutationLen = 0; - size_t TmpMaxMutationLen = 0; - - std::vector UniqFeatureSetTmp; - - // Need to know our own thread. - static thread_local bool IsMyThread; -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_INTERNAL_H diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp deleted file mode 100644 index 234945932bb4e..0000000000000 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ /dev/null @@ -1,722 +0,0 @@ -//===- FuzzerLoop.cpp - Fuzzer's main loop --------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Fuzzer's main loop. -//===----------------------------------------------------------------------===// - -#include "FuzzerCorpus.h" -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include "FuzzerMutate.h" -#include "FuzzerRandom.h" -#include "FuzzerShmem.h" -#include "FuzzerTracePC.h" -#include -#include -#include -#include - -#if defined(__has_include) -#if __has_include() -#include -#endif -#endif - -#define NO_SANITIZE_MEMORY -#if defined(__has_feature) -#if __has_feature(memory_sanitizer) -#undef NO_SANITIZE_MEMORY -#define NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory)) -#endif -#endif - -namespace fuzzer { -static const size_t kMaxUnitSizeToPrint = 256; - -thread_local bool Fuzzer::IsMyThread; - -SharedMemoryRegion SMR; - -// Only one Fuzzer per process. -static Fuzzer *F; - -// Leak detection is expensive, so we first check if there were more mallocs -// than frees (using the sanitizer malloc hooks) and only then try to call lsan. -struct MallocFreeTracer { - void Start(int TraceLevel) { - this->TraceLevel = TraceLevel; - if (TraceLevel) - Printf("MallocFreeTracer: START\n"); - Mallocs = 0; - Frees = 0; - } - // Returns true if there were more mallocs than frees. - bool Stop() { - if (TraceLevel) - Printf("MallocFreeTracer: STOP %zd %zd (%s)\n", Mallocs.load(), - Frees.load(), Mallocs == Frees ? "same" : "DIFFERENT"); - bool Result = Mallocs > Frees; - Mallocs = 0; - Frees = 0; - TraceLevel = 0; - return Result; - } - std::atomic Mallocs; - std::atomic Frees; - int TraceLevel = 0; -}; - -static MallocFreeTracer AllocTracer; - -ATTRIBUTE_NO_SANITIZE_MEMORY -void MallocHook(const volatile void *ptr, size_t size) { - size_t N = AllocTracer.Mallocs++; - F->HandleMalloc(size); - if (int TraceLevel = AllocTracer.TraceLevel) { - Printf("MALLOC[%zd] %p %zd\n", N, ptr, size); - if (TraceLevel >= 2 && EF) - EF->__sanitizer_print_stack_trace(); - } -} - -ATTRIBUTE_NO_SANITIZE_MEMORY -void FreeHook(const volatile void *ptr) { - size_t N = AllocTracer.Frees++; - if (int TraceLevel = AllocTracer.TraceLevel) { - Printf("FREE[%zd] %p\n", N, ptr); - if (TraceLevel >= 2 && EF) - EF->__sanitizer_print_stack_trace(); - } -} - -// Crash on a single malloc that exceeds the rss limit. -void Fuzzer::HandleMalloc(size_t Size) { - if (!Options.RssLimitMb || (Size >> 20) < (size_t)Options.RssLimitMb) - return; - Printf("==%d== ERROR: libFuzzer: out-of-memory (malloc(%zd))\n", GetPid(), - Size); - Printf(" To change the out-of-memory limit use -rss_limit_mb=\n\n"); - if (EF->__sanitizer_print_stack_trace) - EF->__sanitizer_print_stack_trace(); - DumpCurrentUnit("oom-"); - Printf("SUMMARY: libFuzzer: out-of-memory\n"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); // Stop right now. -} - -Fuzzer::Fuzzer(UserCallback CB, InputCorpus &Corpus, MutationDispatcher &MD, - FuzzingOptions Options) - : CB(CB), Corpus(Corpus), MD(MD), Options(Options) { - if (EF->__sanitizer_set_death_callback) - EF->__sanitizer_set_death_callback(StaticDeathCallback); - assert(!F); - F = this; - TPC.ResetMaps(); - IsMyThread = true; - if (Options.DetectLeaks && EF->__sanitizer_install_malloc_and_free_hooks) - EF->__sanitizer_install_malloc_and_free_hooks(MallocHook, FreeHook); - TPC.SetUseCounters(Options.UseCounters); - TPC.SetUseValueProfile(Options.UseValueProfile); - - if (Options.Verbosity) - TPC.PrintModuleInfo(); - if (!Options.OutputCorpus.empty() && Options.ReloadIntervalSec) - EpochOfLastReadOfOutputCorpus = GetEpoch(Options.OutputCorpus); - MaxInputLen = MaxMutationLen = Options.MaxLen; - TmpMaxMutationLen = Max(size_t(4), Corpus.MaxInputSize()); - AllocateCurrentUnitData(); - CurrentUnitSize = 0; - memset(BaseSha1, 0, sizeof(BaseSha1)); -} - -Fuzzer::~Fuzzer() { } - -void Fuzzer::AllocateCurrentUnitData() { - if (CurrentUnitData || MaxInputLen == 0) return; - CurrentUnitData = new uint8_t[MaxInputLen]; -} - -void Fuzzer::StaticDeathCallback() { - assert(F); - F->DeathCallback(); -} - -void Fuzzer::DumpCurrentUnit(const char *Prefix) { - if (!CurrentUnitData) return; // Happens when running individual inputs. - MD.PrintMutationSequence(); - Printf("; base unit: %s\n", Sha1ToString(BaseSha1).c_str()); - size_t UnitSize = CurrentUnitSize; - if (UnitSize <= kMaxUnitSizeToPrint) { - PrintHexArray(CurrentUnitData, UnitSize, "\n"); - PrintASCII(CurrentUnitData, UnitSize, "\n"); - } - WriteUnitToFileWithPrefix({CurrentUnitData, CurrentUnitData + UnitSize}, - Prefix); -} - -NO_SANITIZE_MEMORY -void Fuzzer::DeathCallback() { - DumpCurrentUnit("crash-"); - PrintFinalStats(); -} - -void Fuzzer::StaticAlarmCallback() { - assert(F); - F->AlarmCallback(); -} - -void Fuzzer::StaticCrashSignalCallback() { - assert(F); - F->CrashCallback(); -} - -void Fuzzer::StaticExitCallback() { - assert(F); - F->ExitCallback(); -} - -void Fuzzer::StaticInterruptCallback() { - assert(F); - F->InterruptCallback(); -} - -void Fuzzer::StaticFileSizeExceedCallback() { - Printf("==%lu== ERROR: libFuzzer: file size exceeded\n", GetPid()); - exit(1); -} - -void Fuzzer::CrashCallback() { - Printf("==%lu== ERROR: libFuzzer: deadly signal\n", GetPid()); - if (EF->__sanitizer_print_stack_trace) - EF->__sanitizer_print_stack_trace(); - Printf("NOTE: libFuzzer has rudimentary signal handlers.\n" - " Combine libFuzzer with AddressSanitizer or similar for better " - "crash reports.\n"); - Printf("SUMMARY: libFuzzer: deadly signal\n"); - DumpCurrentUnit("crash-"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); // Stop right now. -} - -void Fuzzer::ExitCallback() { - if (!RunningCB) - return; // This exit did not come from the user callback - Printf("==%lu== ERROR: libFuzzer: fuzz target exited\n", GetPid()); - if (EF->__sanitizer_print_stack_trace) - EF->__sanitizer_print_stack_trace(); - Printf("SUMMARY: libFuzzer: fuzz target exited\n"); - DumpCurrentUnit("crash-"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); -} - - -void Fuzzer::InterruptCallback() { - Printf("==%lu== libFuzzer: run interrupted; exiting\n", GetPid()); - PrintFinalStats(); - _Exit(0); // Stop right now, don't perform any at-exit actions. -} - -NO_SANITIZE_MEMORY -void Fuzzer::AlarmCallback() { - assert(Options.UnitTimeoutSec > 0); - // In Windows Alarm callback is executed by a different thread. -#if !LIBFUZZER_WINDOWS - if (!InFuzzingThread()) return; -#endif - if (!RunningCB) - return; // We have not started running units yet. - size_t Seconds = - duration_cast(system_clock::now() - UnitStartTime).count(); - if (Seconds == 0) - return; - if (Options.Verbosity >= 2) - Printf("AlarmCallback %zd\n", Seconds); - if (Seconds >= (size_t)Options.UnitTimeoutSec) { - Printf("ALARM: working on the last Unit for %zd seconds\n", Seconds); - Printf(" and the timeout value is %d (use -timeout=N to change)\n", - Options.UnitTimeoutSec); - DumpCurrentUnit("timeout-"); - Printf("==%lu== ERROR: libFuzzer: timeout after %d seconds\n", GetPid(), - Seconds); - if (EF->__sanitizer_print_stack_trace) - EF->__sanitizer_print_stack_trace(); - Printf("SUMMARY: libFuzzer: timeout\n"); - PrintFinalStats(); - _Exit(Options.TimeoutExitCode); // Stop right now. - } -} - -void Fuzzer::RssLimitCallback() { - Printf( - "==%lu== ERROR: libFuzzer: out-of-memory (used: %zdMb; limit: %zdMb)\n", - GetPid(), GetPeakRSSMb(), Options.RssLimitMb); - Printf(" To change the out-of-memory limit use -rss_limit_mb=\n\n"); - if (EF->__sanitizer_print_memory_profile) - EF->__sanitizer_print_memory_profile(95, 8); - DumpCurrentUnit("oom-"); - Printf("SUMMARY: libFuzzer: out-of-memory\n"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); // Stop right now. -} - -void Fuzzer::PrintStats(const char *Where, const char *End, size_t Units) { - size_t ExecPerSec = execPerSec(); - if (!Options.Verbosity) - return; - Printf("#%zd\t%s", TotalNumberOfRuns, Where); - if (size_t N = TPC.GetTotalPCCoverage()) - Printf(" cov: %zd", N); - if (size_t N = Corpus.NumFeatures()) - Printf( " ft: %zd", N); - if (!Corpus.empty()) { - Printf(" corp: %zd", Corpus.NumActiveUnits()); - if (size_t N = Corpus.SizeInBytes()) { - if (N < (1<<14)) - Printf("/%zdb", N); - else if (N < (1 << 24)) - Printf("/%zdKb", N >> 10); - else - Printf("/%zdMb", N >> 20); - } - } - if (Units) - Printf(" units: %zd", Units); - - Printf(" exec/s: %zd", ExecPerSec); - Printf(" rss: %zdMb", GetPeakRSSMb()); - Printf("%s", End); -} - -void Fuzzer::PrintFinalStats() { - if (Options.PrintCoverage) - TPC.PrintCoverage(); - if (Options.DumpCoverage) - TPC.DumpCoverage(); - if (Options.PrintCorpusStats) - Corpus.PrintStats(); - if (!Options.PrintFinalStats) return; - size_t ExecPerSec = execPerSec(); - Printf("stat::number_of_executed_units: %zd\n", TotalNumberOfRuns); - Printf("stat::average_exec_per_sec: %zd\n", ExecPerSec); - Printf("stat::new_units_added: %zd\n", NumberOfNewUnitsAdded); - Printf("stat::slowest_unit_time_sec: %zd\n", TimeOfLongestUnitInSeconds); - Printf("stat::peak_rss_mb: %zd\n", GetPeakRSSMb()); -} - -void Fuzzer::SetMaxInputLen(size_t MaxInputLen) { - assert(this->MaxInputLen == 0); // Can only reset MaxInputLen from 0 to non-0. - assert(MaxInputLen); - this->MaxInputLen = MaxInputLen; - this->MaxMutationLen = MaxInputLen; - AllocateCurrentUnitData(); - Printf("INFO: -max_len is not provided; " - "libFuzzer will not generate inputs larger than %zd bytes\n", - MaxInputLen); -} - -void Fuzzer::SetMaxMutationLen(size_t MaxMutationLen) { - assert(MaxMutationLen && MaxMutationLen <= MaxInputLen); - this->MaxMutationLen = MaxMutationLen; -} - -void Fuzzer::CheckExitOnSrcPosOrItem() { - if (!Options.ExitOnSrcPos.empty()) { - static auto *PCsSet = new std::set; - auto HandlePC = [&](uintptr_t PC) { - if (!PCsSet->insert(PC).second) return; - std::string Descr = DescribePC("%F %L", PC + 1); - if (Descr.find(Options.ExitOnSrcPos) != std::string::npos) { - Printf("INFO: found line matching '%s', exiting.\n", - Options.ExitOnSrcPos.c_str()); - _Exit(0); - } - }; - TPC.ForEachObservedPC(HandlePC); - } - if (!Options.ExitOnItem.empty()) { - if (Corpus.HasUnit(Options.ExitOnItem)) { - Printf("INFO: found item with checksum '%s', exiting.\n", - Options.ExitOnItem.c_str()); - _Exit(0); - } - } -} - -void Fuzzer::RereadOutputCorpus(size_t MaxSize) { - if (Options.OutputCorpus.empty() || !Options.ReloadIntervalSec) return; - std::vector AdditionalCorpus; - ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus, - &EpochOfLastReadOfOutputCorpus, MaxSize, - /*ExitOnError*/ false); - if (Options.Verbosity >= 2) - Printf("Reload: read %zd new units.\n", AdditionalCorpus.size()); - bool Reloaded = false; - for (auto &U : AdditionalCorpus) { - if (U.size() > MaxSize) - U.resize(MaxSize); - if (!Corpus.HasUnit(U)) { - if (RunOne(U.data(), U.size())) { - CheckExitOnSrcPosOrItem(); - Reloaded = true; - } - } - } - if (Reloaded) - PrintStats("RELOAD"); -} - -void Fuzzer::ShuffleCorpus(UnitVector *V) { - std::shuffle(V->begin(), V->end(), MD.GetRand()); - if (Options.PreferSmall) - std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) { - return A.size() < B.size(); - }); -} - -void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { - Printf("#0\tREAD units: %zd\n", InitialCorpus->size()); - if (Options.ShuffleAtStartUp) - ShuffleCorpus(InitialCorpus); - - // Test the callback with empty input and never try it again. - uint8_t dummy; - ExecuteCallback(&dummy, 0); - - for (auto &U : *InitialCorpus) { - RunOne(U.data(), U.size()); - CheckExitOnSrcPosOrItem(); - TryDetectingAMemoryLeak(U.data(), U.size(), - /*DuringInitialCorpusExecution*/ true); - U.clear(); - } - PrintStats("INITED"); - if (Corpus.empty()) { - Printf("ERROR: no interesting inputs were found. " - "Is the code instrumented for coverage? Exiting.\n"); - exit(1); - } -} - -void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) { - auto TimeOfUnit = - duration_cast(UnitStopTime - UnitStartTime).count(); - if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && - secondsSinceProcessStartUp() >= 2) - PrintStats("pulse "); - if (TimeOfUnit > TimeOfLongestUnitInSeconds * 1.1 && - TimeOfUnit >= Options.ReportSlowUnits) { - TimeOfLongestUnitInSeconds = TimeOfUnit; - Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds); - WriteUnitToFileWithPrefix({Data, Data + Size}, "slow-unit-"); - } -} - -bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, - InputInfo *II) { - if (!Size) return false; - - ExecuteCallback(Data, Size); - - UniqFeatureSetTmp.clear(); - size_t FoundUniqFeaturesOfII = 0; - size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); - TPC.CollectFeatures([&](size_t Feature) { - if (Corpus.AddFeature(Feature, Size, Options.Shrink)) - UniqFeatureSetTmp.push_back(Feature); - if (Options.ReduceInputs && II) - if (std::binary_search(II->UniqFeatureSet.begin(), - II->UniqFeatureSet.end(), Feature)) - FoundUniqFeaturesOfII++; - }); - PrintPulseAndReportSlowInput(Data, Size); - size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore; - if (NumNewFeatures) { - TPC.UpdateObservedPCs(); - Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile, - UniqFeatureSetTmp); - return true; - } - if (II && FoundUniqFeaturesOfII && - FoundUniqFeaturesOfII == II->UniqFeatureSet.size() && - II->U.size() > Size) { - Corpus.Replace(II, {Data, Data + Size}); - return true; - } - return false; -} - -size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const { - assert(InFuzzingThread()); - *Data = CurrentUnitData; - return CurrentUnitSize; -} - -void Fuzzer::CrashOnOverwrittenData() { - Printf("==%d== ERROR: libFuzzer: fuzz target overwrites it's const input\n", - GetPid()); - DumpCurrentUnit("crash-"); - Printf("SUMMARY: libFuzzer: out-of-memory\n"); - _Exit(Options.ErrorExitCode); // Stop right now. -} - -// Compare two arrays, but not all bytes if the arrays are large. -static bool LooseMemeq(const uint8_t *A, const uint8_t *B, size_t Size) { - const size_t Limit = 64; - if (Size <= 64) - return !memcmp(A, B, Size); - // Compare first and last Limit/2 bytes. - return !memcmp(A, B, Limit / 2) && - !memcmp(A + Size - Limit / 2, B + Size - Limit / 2, Limit / 2); -} - -void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { - TPC.RecordInitialStack(); - TotalNumberOfRuns++; - assert(InFuzzingThread()); - if (SMR.IsClient()) - SMR.WriteByteArray(Data, Size); - // We copy the contents of Unit into a separate heap buffer - // so that we reliably find buffer overflows in it. - uint8_t *DataCopy = new uint8_t[Size]; - memcpy(DataCopy, Data, Size); - if (CurrentUnitData && CurrentUnitData != Data) - memcpy(CurrentUnitData, Data, Size); - CurrentUnitSize = Size; - AllocTracer.Start(Options.TraceMalloc); - UnitStartTime = system_clock::now(); - TPC.ResetMaps(); - RunningCB = true; - int Res = CB(DataCopy, Size); - RunningCB = false; - UnitStopTime = system_clock::now(); - (void)Res; - assert(Res == 0); - HasMoreMallocsThanFrees = AllocTracer.Stop(); - if (!LooseMemeq(DataCopy, Data, Size)) - CrashOnOverwrittenData(); - CurrentUnitSize = 0; - delete[] DataCopy; -} - -void Fuzzer::WriteToOutputCorpus(const Unit &U) { - if (Options.OnlyASCII) - assert(IsASCII(U)); - if (Options.OutputCorpus.empty()) - return; - std::string Path = DirPlusFile(Options.OutputCorpus, Hash(U)); - WriteToFile(U, Path); - if (Options.Verbosity >= 2) - Printf("Written %zd bytes to %s\n", U.size(), Path.c_str()); -} - -void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) { - if (!Options.SaveArtifacts) - return; - std::string Path = Options.ArtifactPrefix + Prefix + Hash(U); - if (!Options.ExactArtifactPath.empty()) - Path = Options.ExactArtifactPath; // Overrides ArtifactPrefix. - WriteToFile(U, Path); - Printf("artifact_prefix='%s'; Test unit written to %s\n", - Options.ArtifactPrefix.c_str(), Path.c_str()); - if (U.size() <= kMaxUnitSizeToPrint) - Printf("Base64: %s\n", Base64(U).c_str()); -} - -void Fuzzer::PrintStatusForNewUnit(const Unit &U, const char *Text) { - if (!Options.PrintNEW) - return; - PrintStats(Text, ""); - if (Options.Verbosity) { - Printf(" L: %zd/%zd ", U.size(), Corpus.MaxInputSize()); - MD.PrintMutationSequence(); - Printf("\n"); - } -} - -void Fuzzer::ReportNewCoverage(InputInfo *II, const Unit &U) { - II->NumSuccessfullMutations++; - MD.RecordSuccessfulMutationSequence(); - PrintStatusForNewUnit(U, II->Reduced ? "REDUCE" : - "NEW "); - WriteToOutputCorpus(U); - NumberOfNewUnitsAdded++; - CheckExitOnSrcPosOrItem(); // Check only after the unit is saved to corpus. - LastCorpusUpdateRun = TotalNumberOfRuns; - LastCorpusUpdateTime = system_clock::now(); -} - -// Tries detecting a memory leak on the particular input that we have just -// executed before calling this function. -void Fuzzer::TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, - bool DuringInitialCorpusExecution) { - if (!HasMoreMallocsThanFrees) return; // mallocs==frees, a leak is unlikely. - if (!Options.DetectLeaks) return; - if (!&(EF->__lsan_enable) || !&(EF->__lsan_disable) || - !(EF->__lsan_do_recoverable_leak_check)) - return; // No lsan. - // Run the target once again, but with lsan disabled so that if there is - // a real leak we do not report it twice. - EF->__lsan_disable(); - ExecuteCallback(Data, Size); - EF->__lsan_enable(); - if (!HasMoreMallocsThanFrees) return; // a leak is unlikely. - if (NumberOfLeakDetectionAttempts++ > 1000) { - Options.DetectLeaks = false; - Printf("INFO: libFuzzer disabled leak detection after every mutation.\n" - " Most likely the target function accumulates allocated\n" - " memory in a global state w/o actually leaking it.\n" - " You may try running this binary with -trace_malloc=[12]" - " to get a trace of mallocs and frees.\n" - " If LeakSanitizer is enabled in this process it will still\n" - " run on the process shutdown.\n"); - return; - } - // Now perform the actual lsan pass. This is expensive and we must ensure - // we don't call it too often. - if (EF->__lsan_do_recoverable_leak_check()) { // Leak is found, report it. - if (DuringInitialCorpusExecution) - Printf("\nINFO: a leak has been found in the initial corpus.\n\n"); - Printf("INFO: to ignore leaks on libFuzzer side use -detect_leaks=0.\n\n"); - CurrentUnitSize = Size; - DumpCurrentUnit("leak-"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); // not exit() to disable lsan further on. - } -} - -void Fuzzer::MutateAndTestOne() { - MD.StartMutationSequence(); - - auto &II = Corpus.ChooseUnitToMutate(MD.GetRand()); - const auto &U = II.U; - memcpy(BaseSha1, II.Sha1, sizeof(BaseSha1)); - assert(CurrentUnitData); - size_t Size = U.size(); - assert(Size <= MaxInputLen && "Oversized Unit"); - memcpy(CurrentUnitData, U.data(), Size); - - assert(MaxMutationLen > 0); - - size_t CurrentMaxMutationLen = - Min(MaxMutationLen, Max(U.size(), TmpMaxMutationLen)); - assert(CurrentMaxMutationLen > 0); - - for (int i = 0; i < Options.MutateDepth; i++) { - if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) - break; - size_t NewSize = 0; - NewSize = MD.Mutate(CurrentUnitData, Size, CurrentMaxMutationLen); - assert(NewSize > 0 && "Mutator returned empty unit"); - assert(NewSize <= CurrentMaxMutationLen && "Mutator return overisized unit"); - Size = NewSize; - II.NumExecutedMutations++; - if (RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II)) - ReportNewCoverage(&II, {CurrentUnitData, CurrentUnitData + Size}); - - TryDetectingAMemoryLeak(CurrentUnitData, Size, - /*DuringInitialCorpusExecution*/ false); - } -} - -void Fuzzer::Loop() { - TPC.SetPrintNewPCs(Options.PrintNewCovPcs); - system_clock::time_point LastCorpusReload = system_clock::now(); - if (Options.DoCrossOver) - MD.SetCorpus(&Corpus); - while (true) { - auto Now = system_clock::now(); - if (duration_cast(Now - LastCorpusReload).count() >= - Options.ReloadIntervalSec) { - RereadOutputCorpus(MaxInputLen); - LastCorpusReload = system_clock::now(); - } - if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) - break; - if (TimedOut()) break; - - // Update TmpMaxMutationLen - if (Options.ExperimentalLenControl) { - if (TmpMaxMutationLen < MaxMutationLen && - (TotalNumberOfRuns - LastCorpusUpdateRun > 1000 && - duration_cast(Now - LastCorpusUpdateTime).count() >= 1)) { - LastCorpusUpdateRun = TotalNumberOfRuns; - LastCorpusUpdateTime = Now; - TmpMaxMutationLen = - Min(MaxMutationLen, - TmpMaxMutationLen + Max(size_t(4), TmpMaxMutationLen / 8)); - if (TmpMaxMutationLen <= MaxMutationLen) - Printf("#%zd\tTEMP_MAX_LEN: %zd\n", TotalNumberOfRuns, - TmpMaxMutationLen); - } - } else { - TmpMaxMutationLen = MaxMutationLen; - } - - // Perform several mutations and runs. - MutateAndTestOne(); - } - - PrintStats("DONE ", "\n"); - MD.PrintRecommendedDictionary(); -} - -void Fuzzer::MinimizeCrashLoop(const Unit &U) { - if (U.size() <= 1) return; - while (!TimedOut() && TotalNumberOfRuns < Options.MaxNumberOfRuns) { - MD.StartMutationSequence(); - memcpy(CurrentUnitData, U.data(), U.size()); - for (int i = 0; i < Options.MutateDepth; i++) { - size_t NewSize = MD.Mutate(CurrentUnitData, U.size(), MaxMutationLen); - assert(NewSize > 0 && NewSize <= MaxMutationLen); - ExecuteCallback(CurrentUnitData, NewSize); - PrintPulseAndReportSlowInput(CurrentUnitData, NewSize); - TryDetectingAMemoryLeak(CurrentUnitData, NewSize, - /*DuringInitialCorpusExecution*/ false); - } - } -} - -void Fuzzer::AnnounceOutput(const uint8_t *Data, size_t Size) { - if (SMR.IsServer()) { - SMR.WriteByteArray(Data, Size); - } else if (SMR.IsClient()) { - SMR.PostClient(); - SMR.WaitServer(); - size_t OtherSize = SMR.ReadByteArraySize(); - uint8_t *OtherData = SMR.GetByteArray(); - if (Size != OtherSize || memcmp(Data, OtherData, Size) != 0) { - size_t i = 0; - for (i = 0; i < Min(Size, OtherSize); i++) - if (Data[i] != OtherData[i]) - break; - Printf("==%lu== ERROR: libFuzzer: equivalence-mismatch. Sizes: %zd %zd; " - "offset %zd\n", GetPid(), Size, OtherSize, i); - DumpCurrentUnit("mismatch-"); - Printf("SUMMARY: libFuzzer: equivalence-mismatch\n"); - PrintFinalStats(); - _Exit(Options.ErrorExitCode); - } - } -} - -} // namespace fuzzer - -extern "C" { - -size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { - assert(fuzzer::F); - return fuzzer::F->GetMD().DefaultMutate(Data, Size, MaxSize); -} - -// Experimental -void LLVMFuzzerAnnounceOutput(const uint8_t *Data, size_t Size) { - assert(fuzzer::F); - fuzzer::F->AnnounceOutput(Data, Size); -} -} // extern "C" diff --git a/lib/Fuzzer/FuzzerMain.cpp b/lib/Fuzzer/FuzzerMain.cpp deleted file mode 100644 index af8657200be29..0000000000000 --- a/lib/Fuzzer/FuzzerMain.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===- FuzzerMain.cpp - main() function and flags -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// main() and flags. -//===----------------------------------------------------------------------===// - -#include "FuzzerDefs.h" - -extern "C" { -// This function should be defined by the user. -int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); -} // extern "C" - -int main(int argc, char **argv) { - return fuzzer::FuzzerDriver(&argc, &argv, LLVMFuzzerTestOneInput); -} diff --git a/lib/Fuzzer/FuzzerMerge.cpp b/lib/Fuzzer/FuzzerMerge.cpp deleted file mode 100644 index 616c0999aa39d..0000000000000 --- a/lib/Fuzzer/FuzzerMerge.cpp +++ /dev/null @@ -1,338 +0,0 @@ -//===- FuzzerMerge.cpp - merging corpora ----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Merging corpora. -//===----------------------------------------------------------------------===// - -#include "FuzzerMerge.h" -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include "FuzzerTracePC.h" -#include "FuzzerUtil.h" - -#include -#include -#include -#include - -namespace fuzzer { - -bool Merger::Parse(const std::string &Str, bool ParseCoverage) { - std::istringstream SS(Str); - return Parse(SS, ParseCoverage); -} - -void Merger::ParseOrExit(std::istream &IS, bool ParseCoverage) { - if (!Parse(IS, ParseCoverage)) { - Printf("MERGE: failed to parse the control file (unexpected error)\n"); - exit(1); - } -} - -// The control file example: -// -// 3 # The number of inputs -// 1 # The number of inputs in the first corpus, <= the previous number -// file0 -// file1 -// file2 # One file name per line. -// STARTED 0 123 # FileID, file size -// DONE 0 1 4 6 8 # FileID COV1 COV2 ... -// STARTED 1 456 # If DONE is missing, the input crashed while processing. -// STARTED 2 567 -// DONE 2 8 9 -bool Merger::Parse(std::istream &IS, bool ParseCoverage) { - LastFailure.clear(); - std::string Line; - - // Parse NumFiles. - if (!std::getline(IS, Line, '\n')) return false; - std::istringstream L1(Line); - size_t NumFiles = 0; - L1 >> NumFiles; - if (NumFiles == 0 || NumFiles > 10000000) return false; - - // Parse NumFilesInFirstCorpus. - if (!std::getline(IS, Line, '\n')) return false; - std::istringstream L2(Line); - NumFilesInFirstCorpus = NumFiles + 1; - L2 >> NumFilesInFirstCorpus; - if (NumFilesInFirstCorpus > NumFiles) return false; - - // Parse file names. - Files.resize(NumFiles); - for (size_t i = 0; i < NumFiles; i++) - if (!std::getline(IS, Files[i].Name, '\n')) - return false; - - // Parse STARTED and DONE lines. - size_t ExpectedStartMarker = 0; - const size_t kInvalidStartMarker = -1; - size_t LastSeenStartMarker = kInvalidStartMarker; - std::vector TmpFeatures; - while (std::getline(IS, Line, '\n')) { - std::istringstream ISS1(Line); - std::string Marker; - size_t N; - ISS1 >> Marker; - ISS1 >> N; - if (Marker == "STARTED") { - // STARTED FILE_ID FILE_SIZE - if (ExpectedStartMarker != N) - return false; - ISS1 >> Files[ExpectedStartMarker].Size; - LastSeenStartMarker = ExpectedStartMarker; - assert(ExpectedStartMarker < Files.size()); - ExpectedStartMarker++; - } else if (Marker == "DONE") { - // DONE FILE_ID COV1 COV2 COV3 ... - size_t CurrentFileIdx = N; - if (CurrentFileIdx != LastSeenStartMarker) - return false; - LastSeenStartMarker = kInvalidStartMarker; - if (ParseCoverage) { - TmpFeatures.clear(); // use a vector from outer scope to avoid resizes. - while (ISS1 >> std::hex >> N) - TmpFeatures.push_back(N); - std::sort(TmpFeatures.begin(), TmpFeatures.end()); - Files[CurrentFileIdx].Features = TmpFeatures; - } - } else { - return false; - } - } - if (LastSeenStartMarker != kInvalidStartMarker) - LastFailure = Files[LastSeenStartMarker].Name; - - FirstNotProcessedFile = ExpectedStartMarker; - return true; -} - -size_t Merger::ApproximateMemoryConsumption() const { - size_t Res = 0; - for (const auto &F: Files) - Res += sizeof(F) + F.Features.size() * sizeof(F.Features[0]); - return Res; -} - -// Decides which files need to be merged (add thost to NewFiles). -// Returns the number of new features added. -size_t Merger::Merge(const std::set &InitialFeatures, - std::vector *NewFiles) { - NewFiles->clear(); - assert(NumFilesInFirstCorpus <= Files.size()); - std::set AllFeatures(InitialFeatures); - - // What features are in the initial corpus? - for (size_t i = 0; i < NumFilesInFirstCorpus; i++) { - auto &Cur = Files[i].Features; - AllFeatures.insert(Cur.begin(), Cur.end()); - } - size_t InitialNumFeatures = AllFeatures.size(); - - // Remove all features that we already know from all other inputs. - for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { - auto &Cur = Files[i].Features; - std::vector Tmp; - std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(), - AllFeatures.end(), std::inserter(Tmp, Tmp.begin())); - Cur.swap(Tmp); - } - - // Sort. Give preference to - // * smaller files - // * files with more features. - std::sort(Files.begin() + NumFilesInFirstCorpus, Files.end(), - [&](const MergeFileInfo &a, const MergeFileInfo &b) -> bool { - if (a.Size != b.Size) - return a.Size < b.Size; - return a.Features.size() > b.Features.size(); - }); - - // One greedy pass: add the file's features to AllFeatures. - // If new features were added, add this file to NewFiles. - for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) { - auto &Cur = Files[i].Features; - // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(), - // Files[i].Size, Cur.size()); - size_t OldSize = AllFeatures.size(); - AllFeatures.insert(Cur.begin(), Cur.end()); - if (AllFeatures.size() > OldSize) - NewFiles->push_back(Files[i].Name); - } - return AllFeatures.size() - InitialNumFeatures; -} - -void Merger::PrintSummary(std::ostream &OS) { - for (auto &File : Files) { - OS << std::hex; - OS << File.Name << " size: " << File.Size << " features: "; - for (auto Feature : File.Features) - OS << " " << Feature; - OS << "\n"; - } -} - -std::set Merger::AllFeatures() const { - std::set S; - for (auto &File : Files) - S.insert(File.Features.begin(), File.Features.end()); - return S; -} - -std::set Merger::ParseSummary(std::istream &IS) { - std::string Line, Tmp; - std::set Res; - while (std::getline(IS, Line, '\n')) { - size_t N; - std::istringstream ISS1(Line); - ISS1 >> Tmp; // Name - ISS1 >> Tmp; // size: - assert(Tmp == "size:" && "Corrupt summary file"); - ISS1 >> std::hex; - ISS1 >> N; // File Size - ISS1 >> Tmp; // features: - assert(Tmp == "features:" && "Corrupt summary file"); - while (ISS1 >> std::hex >> N) - Res.insert(N); - } - return Res; -} - -// Inner process. May crash if the target crashes. -void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) { - Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str()); - Merger M; - std::ifstream IF(CFPath); - M.ParseOrExit(IF, false); - IF.close(); - if (!M.LastFailure.empty()) - Printf("MERGE-INNER: '%s' caused a failure at the previous merge step\n", - M.LastFailure.c_str()); - - Printf("MERGE-INNER: %zd total files;" - " %zd processed earlier; will process %zd files now\n", - M.Files.size(), M.FirstNotProcessedFile, - M.Files.size() - M.FirstNotProcessedFile); - - std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app); - for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) { - auto U = FileToVector(M.Files[i].Name); - if (U.size() > MaxInputLen) { - U.resize(MaxInputLen); - U.shrink_to_fit(); - } - std::ostringstream StartedLine; - // Write the pre-run marker. - OF << "STARTED " << std::dec << i << " " << U.size() << "\n"; - OF.flush(); // Flush is important since ExecuteCommand may crash. - // Run. - TPC.ResetMaps(); - ExecuteCallback(U.data(), U.size()); - // Collect coverage. - std::set Features; - TPC.CollectFeatures([&](size_t Feature) -> bool { - Features.insert(Feature); - return true; - }); - // Show stats. - if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1))) - PrintStats("pulse "); - // Write the post-run marker and the coverage. - OF << "DONE " << i; - for (size_t F : Features) - OF << " " << std::hex << F; - OF << "\n"; - } -} - -// Outer process. Does not call the target code and thus sohuld not fail. -void Fuzzer::CrashResistantMerge(const std::vector &Args, - const std::vector &Corpora, - const char *CoverageSummaryInputPathOrNull, - const char *CoverageSummaryOutputPathOrNull) { - if (Corpora.size() <= 1) { - Printf("Merge requires two or more corpus dirs\n"); - return; - } - std::vector AllFiles; - ListFilesInDirRecursive(Corpora[0], nullptr, &AllFiles, /*TopDir*/true); - size_t NumFilesInFirstCorpus = AllFiles.size(); - for (size_t i = 1; i < Corpora.size(); i++) - ListFilesInDirRecursive(Corpora[i], nullptr, &AllFiles, /*TopDir*/true); - Printf("MERGE-OUTER: %zd files, %zd in the initial corpus\n", - AllFiles.size(), NumFilesInFirstCorpus); - auto CFPath = DirPlusFile(TmpDir(), - "libFuzzerTemp." + std::to_string(GetPid()) + ".txt"); - // Write the control file. - RemoveFile(CFPath); - std::ofstream ControlFile(CFPath); - ControlFile << AllFiles.size() << "\n"; - ControlFile << NumFilesInFirstCorpus << "\n"; - for (auto &Path: AllFiles) - ControlFile << Path << "\n"; - if (!ControlFile) { - Printf("MERGE-OUTER: failed to write to the control file: %s\n", - CFPath.c_str()); - exit(1); - } - ControlFile.close(); - - // Execute the inner process untill it passes. - // Every inner process should execute at least one input. - auto BaseCmd = SplitBefore("-ignore_remaining_args=1", - CloneArgsWithoutX(Args, "keep-all-flags")); - bool Success = false; - for (size_t i = 1; i <= AllFiles.size(); i++) { - Printf("MERGE-OUTER: attempt %zd\n", i); - auto ExitCode = ExecuteCommand(BaseCmd.first + " -merge_control_file=" + - CFPath + " " + BaseCmd.second); - if (!ExitCode) { - Printf("MERGE-OUTER: succesfull in %zd attempt(s)\n", i); - Success = true; - break; - } - } - if (!Success) { - Printf("MERGE-OUTER: zero succesfull attempts, exiting\n"); - exit(1); - } - // Read the control file and do the merge. - Merger M; - std::ifstream IF(CFPath); - IF.seekg(0, IF.end); - Printf("MERGE-OUTER: the control file has %zd bytes\n", (size_t)IF.tellg()); - IF.seekg(0, IF.beg); - M.ParseOrExit(IF, true); - IF.close(); - Printf("MERGE-OUTER: consumed %zdMb (%zdMb rss) to parse the control file\n", - M.ApproximateMemoryConsumption() >> 20, GetPeakRSSMb()); - if (CoverageSummaryOutputPathOrNull) { - Printf("MERGE-OUTER: writing coverage summary for %zd files to %s\n", - M.Files.size(), CoverageSummaryOutputPathOrNull); - std::ofstream SummaryOut(CoverageSummaryOutputPathOrNull); - M.PrintSummary(SummaryOut); - } - std::vector NewFiles; - std::set InitialFeatures; - if (CoverageSummaryInputPathOrNull) { - std::ifstream SummaryIn(CoverageSummaryInputPathOrNull); - InitialFeatures = M.ParseSummary(SummaryIn); - Printf("MERGE-OUTER: coverage summary loaded from %s, %zd features found\n", - CoverageSummaryInputPathOrNull, InitialFeatures.size()); - } - size_t NumNewFeatures = M.Merge(InitialFeatures, &NewFiles); - Printf("MERGE-OUTER: %zd new files with %zd new features added\n", - NewFiles.size(), NumNewFeatures); - for (auto &F: NewFiles) - WriteToOutputCorpus(FileToVector(F)); - // We are done, delete the control file. - RemoveFile(CFPath); -} - -} // namespace fuzzer diff --git a/lib/Fuzzer/FuzzerMerge.h b/lib/Fuzzer/FuzzerMerge.h deleted file mode 100644 index dd4c37b6e39c7..0000000000000 --- a/lib/Fuzzer/FuzzerMerge.h +++ /dev/null @@ -1,80 +0,0 @@ -//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Merging Corpora. -// -// The task: -// Take the existing corpus (possibly empty) and merge new inputs into -// it so that only inputs with new coverage ('features') are added. -// The process should tolerate the crashes, OOMs, leaks, etc. -// -// Algorithm: -// The outter process collects the set of files and writes their names -// into a temporary "control" file, then repeatedly launches the inner -// process until all inputs are processed. -// The outer process does not actually execute the target code. -// -// The inner process reads the control file and sees a) list of all the inputs -// and b) the last processed input. Then it starts processing the inputs one -// by one. Before processing every input it writes one line to control file: -// STARTED INPUT_ID INPUT_SIZE -// After processing an input it write another line: -// DONE INPUT_ID Feature1 Feature2 Feature3 ... -// If a crash happens while processing an input the last line in the control -// file will be "STARTED INPUT_ID" and so the next process will know -// where to resume. -// -// Once all inputs are processed by the innner process(es) the outer process -// reads the control files and does the merge based entirely on the contents -// of control file. -// It uses a single pass greedy algorithm choosing first the smallest inputs -// within the same size the inputs that have more new features. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_MERGE_H -#define LLVM_FUZZER_MERGE_H - -#include "FuzzerDefs.h" - -#include -#include -#include -#include - -namespace fuzzer { - -struct MergeFileInfo { - std::string Name; - size_t Size = 0; - std::vector Features; -}; - -struct Merger { - std::vector Files; - size_t NumFilesInFirstCorpus = 0; - size_t FirstNotProcessedFile = 0; - std::string LastFailure; - - bool Parse(std::istream &IS, bool ParseCoverage); - bool Parse(const std::string &Str, bool ParseCoverage); - void ParseOrExit(std::istream &IS, bool ParseCoverage); - void PrintSummary(std::ostream &OS); - std::set ParseSummary(std::istream &IS); - size_t Merge(const std::set &InitialFeatures, - std::vector *NewFiles); - size_t Merge(std::vector *NewFiles) { - return Merge(std::set{}, NewFiles); - } - size_t ApproximateMemoryConsumption() const; - std::set AllFeatures() const; -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_MERGE_H diff --git a/lib/Fuzzer/FuzzerMutate.cpp b/lib/Fuzzer/FuzzerMutate.cpp deleted file mode 100644 index 5998ef9d3193d..0000000000000 --- a/lib/Fuzzer/FuzzerMutate.cpp +++ /dev/null @@ -1,533 +0,0 @@ -//===- FuzzerMutate.cpp - Mutate a test input -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Mutate a test input. -//===----------------------------------------------------------------------===// - -#include "FuzzerMutate.h" -#include "FuzzerCorpus.h" -#include "FuzzerDefs.h" -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include "FuzzerOptions.h" - -namespace fuzzer { - -const size_t Dictionary::kMaxDictSize; - -static void PrintASCII(const Word &W, const char *PrintAfter) { - PrintASCII(W.data(), W.size(), PrintAfter); -} - -MutationDispatcher::MutationDispatcher(Random &Rand, - const FuzzingOptions &Options) - : Rand(Rand), Options(Options) { - DefaultMutators.insert( - DefaultMutators.begin(), - { - {&MutationDispatcher::Mutate_EraseBytes, "EraseBytes"}, - {&MutationDispatcher::Mutate_InsertByte, "InsertByte"}, - {&MutationDispatcher::Mutate_InsertRepeatedBytes, - "InsertRepeatedBytes"}, - {&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"}, - {&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"}, - {&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"}, - {&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"}, - {&MutationDispatcher::Mutate_ChangeBinaryInteger, "ChangeBinInt"}, - {&MutationDispatcher::Mutate_CopyPart, "CopyPart"}, - {&MutationDispatcher::Mutate_CrossOver, "CrossOver"}, - {&MutationDispatcher::Mutate_AddWordFromManualDictionary, - "ManualDict"}, - {&MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary, - "PersAutoDict"}, - }); - if(Options.UseCmp) - DefaultMutators.push_back( - {&MutationDispatcher::Mutate_AddWordFromTORC, "CMP"}); - - if (EF->LLVMFuzzerCustomMutator) - Mutators.push_back({&MutationDispatcher::Mutate_Custom, "Custom"}); - else - Mutators = DefaultMutators; - - if (EF->LLVMFuzzerCustomCrossOver) - Mutators.push_back( - {&MutationDispatcher::Mutate_CustomCrossOver, "CustomCrossOver"}); -} - -static char RandCh(Random &Rand) { - if (Rand.RandBool()) return Rand(256); - const char *Special = "!*'();:@&=+$,/?%#[]012Az-`~.\xff\x00"; - return Special[Rand(sizeof(Special) - 1)]; -} - -size_t MutationDispatcher::Mutate_Custom(uint8_t *Data, size_t Size, - size_t MaxSize) { - return EF->LLVMFuzzerCustomMutator(Data, Size, MaxSize, Rand.Rand()); -} - -size_t MutationDispatcher::Mutate_CustomCrossOver(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (!Corpus || Corpus->size() < 2 || Size == 0) - return 0; - size_t Idx = Rand(Corpus->size()); - const Unit &Other = (*Corpus)[Idx]; - if (Other.empty()) - return 0; - CustomCrossOverInPlaceHere.resize(MaxSize); - auto &U = CustomCrossOverInPlaceHere; - size_t NewSize = EF->LLVMFuzzerCustomCrossOver( - Data, Size, Other.data(), Other.size(), U.data(), U.size(), Rand.Rand()); - if (!NewSize) - return 0; - assert(NewSize <= MaxSize && "CustomCrossOver returned overisized unit"); - memcpy(Data, U.data(), NewSize); - return NewSize; -} - -size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize || Size == 0) return 0; - size_t ShuffleAmount = - Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size. - size_t ShuffleStart = Rand(Size - ShuffleAmount); - assert(ShuffleStart + ShuffleAmount <= Size); - std::shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount, Rand); - return Size; -} - -size_t MutationDispatcher::Mutate_EraseBytes(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size <= 1) return 0; - size_t N = Rand(Size / 2) + 1; - assert(N < Size); - size_t Idx = Rand(Size - N + 1); - // Erase Data[Idx:Idx+N]. - memmove(Data + Idx, Data + Idx + N, Size - Idx - N); - // Printf("Erase: %zd %zd => %zd; Idx %zd\n", N, Size, Size - N, Idx); - return Size - N; -} - -size_t MutationDispatcher::Mutate_InsertByte(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size >= MaxSize) return 0; - size_t Idx = Rand(Size + 1); - // Insert new value at Data[Idx]. - memmove(Data + Idx + 1, Data + Idx, Size - Idx); - Data[Idx] = RandCh(Rand); - return Size + 1; -} - -size_t MutationDispatcher::Mutate_InsertRepeatedBytes(uint8_t *Data, - size_t Size, - size_t MaxSize) { - const size_t kMinBytesToInsert = 3; - if (Size + kMinBytesToInsert >= MaxSize) return 0; - size_t MaxBytesToInsert = std::min(MaxSize - Size, (size_t)128); - size_t N = Rand(MaxBytesToInsert - kMinBytesToInsert + 1) + kMinBytesToInsert; - assert(Size + N <= MaxSize && N); - size_t Idx = Rand(Size + 1); - // Insert new values at Data[Idx]. - memmove(Data + Idx + N, Data + Idx, Size - Idx); - // Give preference to 0x00 and 0xff. - uint8_t Byte = Rand.RandBool() ? Rand(256) : (Rand.RandBool() ? 0 : 255); - for (size_t i = 0; i < N; i++) - Data[Idx + i] = Byte; - return Size + N; -} - -size_t MutationDispatcher::Mutate_ChangeByte(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - size_t Idx = Rand(Size); - Data[Idx] = RandCh(Rand); - return Size; -} - -size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - size_t Idx = Rand(Size); - Data[Idx] ^= 1 << Rand(8); - return Size; -} - -size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data, - size_t Size, - size_t MaxSize) { - return AddWordFromDictionary(ManualDictionary, Data, Size, MaxSize); -} - -size_t MutationDispatcher::ApplyDictionaryEntry(uint8_t *Data, size_t Size, - size_t MaxSize, - DictionaryEntry &DE) { - const Word &W = DE.GetW(); - bool UsePositionHint = DE.HasPositionHint() && - DE.GetPositionHint() + W.size() < Size && - Rand.RandBool(); - if (Rand.RandBool()) { // Insert W. - if (Size + W.size() > MaxSize) return 0; - size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size + 1); - memmove(Data + Idx + W.size(), Data + Idx, Size - Idx); - memcpy(Data + Idx, W.data(), W.size()); - Size += W.size(); - } else { // Overwrite some bytes with W. - if (W.size() > Size) return 0; - size_t Idx = UsePositionHint ? DE.GetPositionHint() : Rand(Size - W.size()); - memcpy(Data + Idx, W.data(), W.size()); - } - return Size; -} - -// Somewhere in the past we have observed a comparison instructions -// with arguments Arg1 Arg2. This function tries to guess a dictionary -// entry that will satisfy that comparison. -// It first tries to find one of the arguments (possibly swapped) in the -// input and if it succeeds it creates a DE with a position hint. -// Otherwise it creates a DE with one of the arguments w/o a position hint. -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - const void *Arg1, const void *Arg2, - const void *Arg1Mutation, const void *Arg2Mutation, - size_t ArgSize, const uint8_t *Data, - size_t Size) { - ScopedDoingMyOwnMemOrStr scoped_doing_my_own_mem_os_str; - bool HandleFirst = Rand.RandBool(); - const void *ExistingBytes, *DesiredBytes; - Word W; - const uint8_t *End = Data + Size; - for (int Arg = 0; Arg < 2; Arg++) { - ExistingBytes = HandleFirst ? Arg1 : Arg2; - DesiredBytes = HandleFirst ? Arg2Mutation : Arg1Mutation; - HandleFirst = !HandleFirst; - W.Set(reinterpret_cast(DesiredBytes), ArgSize); - const size_t kMaxNumPositions = 8; - size_t Positions[kMaxNumPositions]; - size_t NumPositions = 0; - for (const uint8_t *Cur = Data; - Cur < End && NumPositions < kMaxNumPositions; Cur++) { - Cur = - (const uint8_t *)SearchMemory(Cur, End - Cur, ExistingBytes, ArgSize); - if (!Cur) break; - Positions[NumPositions++] = Cur - Data; - } - if (!NumPositions) continue; - return DictionaryEntry(W, Positions[Rand(NumPositions)]); - } - DictionaryEntry DE(W); - return DE; -} - - -template -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - T Arg1, T Arg2, const uint8_t *Data, size_t Size) { - if (Rand.RandBool()) Arg1 = Bswap(Arg1); - if (Rand.RandBool()) Arg2 = Bswap(Arg2); - T Arg1Mutation = Arg1 + Rand(-1, 1); - T Arg2Mutation = Arg2 + Rand(-1, 1); - return MakeDictionaryEntryFromCMP(&Arg1, &Arg2, &Arg1Mutation, &Arg2Mutation, - sizeof(Arg1), Data, Size); -} - -DictionaryEntry MutationDispatcher::MakeDictionaryEntryFromCMP( - const Word &Arg1, const Word &Arg2, const uint8_t *Data, size_t Size) { - return MakeDictionaryEntryFromCMP(Arg1.data(), Arg2.data(), Arg1.data(), - Arg2.data(), Arg1.size(), Data, Size); -} - -size_t MutationDispatcher::Mutate_AddWordFromTORC( - uint8_t *Data, size_t Size, size_t MaxSize) { - Word W; - DictionaryEntry DE; - switch (Rand(4)) { - case 0: { - auto X = TPC.TORC8.Get(Rand.Rand()); - DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); - } break; - case 1: { - auto X = TPC.TORC4.Get(Rand.Rand()); - if ((X.A >> 16) == 0 && (X.B >> 16) == 0 && Rand.RandBool()) - DE = MakeDictionaryEntryFromCMP((uint16_t)X.A, (uint16_t)X.B, Data, Size); - else - DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); - } break; - case 2: { - auto X = TPC.TORCW.Get(Rand.Rand()); - DE = MakeDictionaryEntryFromCMP(X.A, X.B, Data, Size); - } break; - case 3: if (Options.UseMemmem) { - auto X = TPC.MMT.Get(Rand.Rand()); - DE = DictionaryEntry(X); - } break; - default: - assert(0); - } - if (!DE.GetW().size()) return 0; - Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); - if (!Size) return 0; - DictionaryEntry &DERef = - CmpDictionaryEntriesDeque[CmpDictionaryEntriesDequeIdx++ % - kCmpDictionaryEntriesDequeSize]; - DERef = DE; - CurrentDictionaryEntrySequence.push_back(&DERef); - return Size; -} - -size_t MutationDispatcher::Mutate_AddWordFromPersistentAutoDictionary( - uint8_t *Data, size_t Size, size_t MaxSize) { - return AddWordFromDictionary(PersistentAutoDictionary, Data, Size, MaxSize); -} - -size_t MutationDispatcher::AddWordFromDictionary(Dictionary &D, uint8_t *Data, - size_t Size, size_t MaxSize) { - if (Size > MaxSize) return 0; - if (D.empty()) return 0; - DictionaryEntry &DE = D[Rand(D.size())]; - Size = ApplyDictionaryEntry(Data, Size, MaxSize, DE); - if (!Size) return 0; - DE.IncUseCount(); - CurrentDictionaryEntrySequence.push_back(&DE); - return Size; -} - -// Overwrites part of To[0,ToSize) with a part of From[0,FromSize). -// Returns ToSize. -size_t MutationDispatcher::CopyPartOf(const uint8_t *From, size_t FromSize, - uint8_t *To, size_t ToSize) { - // Copy From[FromBeg, FromBeg + CopySize) into To[ToBeg, ToBeg + CopySize). - size_t ToBeg = Rand(ToSize); - size_t CopySize = Rand(ToSize - ToBeg) + 1; - assert(ToBeg + CopySize <= ToSize); - CopySize = std::min(CopySize, FromSize); - size_t FromBeg = Rand(FromSize - CopySize + 1); - assert(FromBeg + CopySize <= FromSize); - memmove(To + ToBeg, From + FromBeg, CopySize); - return ToSize; -} - -// Inserts part of From[0,ToSize) into To. -// Returns new size of To on success or 0 on failure. -size_t MutationDispatcher::InsertPartOf(const uint8_t *From, size_t FromSize, - uint8_t *To, size_t ToSize, - size_t MaxToSize) { - if (ToSize >= MaxToSize) return 0; - size_t AvailableSpace = MaxToSize - ToSize; - size_t MaxCopySize = std::min(AvailableSpace, FromSize); - size_t CopySize = Rand(MaxCopySize) + 1; - size_t FromBeg = Rand(FromSize - CopySize + 1); - assert(FromBeg + CopySize <= FromSize); - size_t ToInsertPos = Rand(ToSize + 1); - assert(ToInsertPos + CopySize <= MaxToSize); - size_t TailSize = ToSize - ToInsertPos; - if (To == From) { - MutateInPlaceHere.resize(MaxToSize); - memcpy(MutateInPlaceHere.data(), From + FromBeg, CopySize); - memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); - memmove(To + ToInsertPos, MutateInPlaceHere.data(), CopySize); - } else { - memmove(To + ToInsertPos + CopySize, To + ToInsertPos, TailSize); - memmove(To + ToInsertPos, From + FromBeg, CopySize); - } - return ToSize + CopySize; -} - -size_t MutationDispatcher::Mutate_CopyPart(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize || Size == 0) return 0; - if (Rand.RandBool()) - return CopyPartOf(Data, Size, Data, Size); - else - return InsertPartOf(Data, Size, Data, Size, MaxSize); -} - -size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - size_t B = Rand(Size); - while (B < Size && !isdigit(Data[B])) B++; - if (B == Size) return 0; - size_t E = B; - while (E < Size && isdigit(Data[E])) E++; - assert(B < E); - // now we have digits in [B, E). - // strtol and friends don't accept non-zero-teminated data, parse it manually. - uint64_t Val = Data[B] - '0'; - for (size_t i = B + 1; i < E; i++) - Val = Val * 10 + Data[i] - '0'; - - // Mutate the integer value. - switch(Rand(5)) { - case 0: Val++; break; - case 1: Val--; break; - case 2: Val /= 2; break; - case 3: Val *= 2; break; - case 4: Val = Rand(Val * Val); break; - default: assert(0); - } - // Just replace the bytes with the new ones, don't bother moving bytes. - for (size_t i = B; i < E; i++) { - size_t Idx = E + B - i - 1; - assert(Idx >= B && Idx < E); - Data[Idx] = (Val % 10) + '0'; - Val /= 10; - } - return Size; -} - -template -size_t ChangeBinaryInteger(uint8_t *Data, size_t Size, Random &Rand) { - if (Size < sizeof(T)) return 0; - size_t Off = Rand(Size - sizeof(T) + 1); - assert(Off + sizeof(T) <= Size); - T Val; - if (Off < 64 && !Rand(4)) { - Val = Size; - if (Rand.RandBool()) - Val = Bswap(Val); - } else { - memcpy(&Val, Data + Off, sizeof(Val)); - T Add = Rand(21); - Add -= 10; - if (Rand.RandBool()) - Val = Bswap(T(Bswap(Val) + Add)); // Add assuming different endiannes. - else - Val = Val + Add; // Add assuming current endiannes. - if (Add == 0 || Rand.RandBool()) // Maybe negate. - Val = -Val; - } - memcpy(Data + Off, &Val, sizeof(Val)); - return Size; -} - -size_t MutationDispatcher::Mutate_ChangeBinaryInteger(uint8_t *Data, - size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - switch (Rand(4)) { - case 3: return ChangeBinaryInteger(Data, Size, Rand); - case 2: return ChangeBinaryInteger(Data, Size, Rand); - case 1: return ChangeBinaryInteger(Data, Size, Rand); - case 0: return ChangeBinaryInteger(Data, Size, Rand); - default: assert(0); - } - return 0; -} - -size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size, - size_t MaxSize) { - if (Size > MaxSize) return 0; - if (!Corpus || Corpus->size() < 2 || Size == 0) return 0; - size_t Idx = Rand(Corpus->size()); - const Unit &O = (*Corpus)[Idx]; - if (O.empty()) return 0; - MutateInPlaceHere.resize(MaxSize); - auto &U = MutateInPlaceHere; - size_t NewSize = 0; - switch(Rand(3)) { - case 0: - NewSize = CrossOver(Data, Size, O.data(), O.size(), U.data(), U.size()); - break; - case 1: - NewSize = InsertPartOf(O.data(), O.size(), U.data(), U.size(), MaxSize); - if (!NewSize) - NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size()); - break; - case 2: - NewSize = CopyPartOf(O.data(), O.size(), U.data(), U.size()); - break; - default: assert(0); - } - assert(NewSize > 0 && "CrossOver returned empty unit"); - assert(NewSize <= MaxSize && "CrossOver returned overisized unit"); - memcpy(Data, U.data(), NewSize); - return NewSize; -} - -void MutationDispatcher::StartMutationSequence() { - CurrentMutatorSequence.clear(); - CurrentDictionaryEntrySequence.clear(); -} - -// Copy successful dictionary entries to PersistentAutoDictionary. -void MutationDispatcher::RecordSuccessfulMutationSequence() { - for (auto DE : CurrentDictionaryEntrySequence) { - // PersistentAutoDictionary.AddWithSuccessCountOne(DE); - DE->IncSuccessCount(); - assert(DE->GetW().size()); - // Linear search is fine here as this happens seldom. - if (!PersistentAutoDictionary.ContainsWord(DE->GetW())) - PersistentAutoDictionary.push_back({DE->GetW(), 1}); - } -} - -void MutationDispatcher::PrintRecommendedDictionary() { - std::vector V; - for (auto &DE : PersistentAutoDictionary) - if (!ManualDictionary.ContainsWord(DE.GetW())) - V.push_back(DE); - if (V.empty()) return; - Printf("###### Recommended dictionary. ######\n"); - for (auto &DE: V) { - assert(DE.GetW().size()); - Printf("\""); - PrintASCII(DE.GetW(), "\""); - Printf(" # Uses: %zd\n", DE.GetUseCount()); - } - Printf("###### End of recommended dictionary. ######\n"); -} - -void MutationDispatcher::PrintMutationSequence() { - Printf("MS: %zd ", CurrentMutatorSequence.size()); - for (auto M : CurrentMutatorSequence) - Printf("%s-", M.Name); - if (!CurrentDictionaryEntrySequence.empty()) { - Printf(" DE: "); - for (auto DE : CurrentDictionaryEntrySequence) { - Printf("\""); - PrintASCII(DE->GetW(), "\"-"); - } - } -} - -size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) { - return MutateImpl(Data, Size, MaxSize, Mutators); -} - -size_t MutationDispatcher::DefaultMutate(uint8_t *Data, size_t Size, - size_t MaxSize) { - return MutateImpl(Data, Size, MaxSize, DefaultMutators); -} - -// Mutates Data in place, returns new size. -size_t MutationDispatcher::MutateImpl(uint8_t *Data, size_t Size, - size_t MaxSize, - const std::vector &Mutators) { - assert(MaxSize > 0); - // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize), - // in which case they will return 0. - // Try several times before returning un-mutated data. - for (int Iter = 0; Iter < 100; Iter++) { - auto M = Mutators[Rand(Mutators.size())]; - size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize); - if (NewSize && NewSize <= MaxSize) { - if (Options.OnlyASCII) - ToASCII(Data, NewSize); - CurrentMutatorSequence.push_back(M); - return NewSize; - } - } - *Data = ' '; - return 1; // Fallback, should not happen frequently. -} - -void MutationDispatcher::AddWordToManualDictionary(const Word &W) { - ManualDictionary.push_back( - {W, std::numeric_limits::max()}); -} - -} // namespace fuzzer diff --git a/lib/Fuzzer/FuzzerMutate.h b/lib/Fuzzer/FuzzerMutate.h deleted file mode 100644 index 84b04c0dbf3ea..0000000000000 --- a/lib/Fuzzer/FuzzerMutate.h +++ /dev/null @@ -1,150 +0,0 @@ -//===- FuzzerMutate.h - Internal header for the Fuzzer ----------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::MutationDispatcher -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_MUTATE_H -#define LLVM_FUZZER_MUTATE_H - -#include "FuzzerDefs.h" -#include "FuzzerDictionary.h" -#include "FuzzerOptions.h" -#include "FuzzerRandom.h" - -namespace fuzzer { - -class MutationDispatcher { -public: - MutationDispatcher(Random &Rand, const FuzzingOptions &Options); - ~MutationDispatcher() {} - /// Indicate that we are about to start a new sequence of mutations. - void StartMutationSequence(); - /// Print the current sequence of mutations. - void PrintMutationSequence(); - /// Indicate that the current sequence of mutations was successfull. - void RecordSuccessfulMutationSequence(); - /// Mutates data by invoking user-provided mutator. - size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by invoking user-provided crossover. - size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by shuffling bytes. - size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by erasing bytes. - size_t Mutate_EraseBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by inserting a byte. - size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by inserting several repeated bytes. - size_t Mutate_InsertRepeatedBytes(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by chanding one byte. - size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by chanding one bit. - size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); - /// Mutates data by copying/inserting a part of data into a different place. - size_t Mutate_CopyPart(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Mutates data by adding a word from the manual dictionary. - size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, - size_t MaxSize); - - /// Mutates data by adding a word from the TORC. - size_t Mutate_AddWordFromTORC(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Mutates data by adding a word from the persistent automatic dictionary. - size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, - size_t MaxSize); - - /// Tries to find an ASCII integer in Data, changes it to another ASCII int. - size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); - /// Change a 1-, 2-, 4-, or 8-byte integer in interesting ways. - size_t Mutate_ChangeBinaryInteger(uint8_t *Data, size_t Size, size_t MaxSize); - - /// CrossOver Data with some other element of the corpus. - size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Applies one of the configured mutations. - /// Returns the new size of data which could be up to MaxSize. - size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); - /// Applies one of the default mutations. Provided as a service - /// to mutation authors. - size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); - - /// Creates a cross-over of two pieces of Data, returns its size. - size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, - size_t Size2, uint8_t *Out, size_t MaxOutSize); - - void AddWordToManualDictionary(const Word &W); - - void PrintRecommendedDictionary(); - - void SetCorpus(const InputCorpus *Corpus) { this->Corpus = Corpus; } - - Random &GetRand() { return Rand; } - -private: - - struct Mutator { - size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); - const char *Name; - }; - - size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, - size_t MaxSize); - size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, - const std::vector &Mutators); - - size_t InsertPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, - size_t ToSize, size_t MaxToSize); - size_t CopyPartOf(const uint8_t *From, size_t FromSize, uint8_t *To, - size_t ToSize); - size_t ApplyDictionaryEntry(uint8_t *Data, size_t Size, size_t MaxSize, - DictionaryEntry &DE); - - template - DictionaryEntry MakeDictionaryEntryFromCMP(T Arg1, T Arg2, - const uint8_t *Data, size_t Size); - DictionaryEntry MakeDictionaryEntryFromCMP(const Word &Arg1, const Word &Arg2, - const uint8_t *Data, size_t Size); - DictionaryEntry MakeDictionaryEntryFromCMP(const void *Arg1, const void *Arg2, - const void *Arg1Mutation, - const void *Arg2Mutation, - size_t ArgSize, - const uint8_t *Data, size_t Size); - - Random &Rand; - const FuzzingOptions Options; - - // Dictionary provided by the user via -dict=DICT_FILE. - Dictionary ManualDictionary; - // Temporary dictionary modified by the fuzzer itself, - // recreated periodically. - Dictionary TempAutoDictionary; - // Persistent dictionary modified by the fuzzer, consists of - // entries that led to successfull discoveries in the past mutations. - Dictionary PersistentAutoDictionary; - - std::vector CurrentMutatorSequence; - std::vector CurrentDictionaryEntrySequence; - - static const size_t kCmpDictionaryEntriesDequeSize = 16; - DictionaryEntry CmpDictionaryEntriesDeque[kCmpDictionaryEntriesDequeSize]; - size_t CmpDictionaryEntriesDequeIdx = 0; - - const InputCorpus *Corpus = nullptr; - std::vector MutateInPlaceHere; - // CustomCrossOver needs its own buffer as a custom implementation may call - // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere. - std::vector CustomCrossOverInPlaceHere; - - std::vector Mutators; - std::vector DefaultMutators; -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_MUTATE_H diff --git a/lib/Fuzzer/FuzzerOptions.h b/lib/Fuzzer/FuzzerOptions.h deleted file mode 100644 index 9500235e2b1f3..0000000000000 --- a/lib/Fuzzer/FuzzerOptions.h +++ /dev/null @@ -1,68 +0,0 @@ -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::FuzzingOptions -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_OPTIONS_H -#define LLVM_FUZZER_OPTIONS_H - -#include "FuzzerDefs.h" - -namespace fuzzer { - -struct FuzzingOptions { - int Verbosity = 1; - size_t MaxLen = 0; - bool ExperimentalLenControl = false; - int UnitTimeoutSec = 300; - int TimeoutExitCode = 77; - int ErrorExitCode = 77; - int MaxTotalTimeSec = 0; - int RssLimitMb = 0; - bool DoCrossOver = true; - int MutateDepth = 5; - bool UseCounters = false; - bool UseIndirCalls = true; - bool UseMemmem = true; - bool UseCmp = false; - bool UseValueProfile = false; - bool Shrink = false; - bool ReduceInputs = false; - int ReloadIntervalSec = 1; - bool ShuffleAtStartUp = true; - bool PreferSmall = true; - size_t MaxNumberOfRuns = -1L; - int ReportSlowUnits = 10; - bool OnlyASCII = false; - std::string OutputCorpus; - std::string ArtifactPrefix = "./"; - std::string ExactArtifactPath; - std::string ExitOnSrcPos; - std::string ExitOnItem; - bool SaveArtifacts = true; - bool PrintNEW = true; // Print a status line when new units are found; - bool PrintNewCovPcs = false; - bool PrintFinalStats = false; - bool PrintCorpusStats = false; - bool PrintCoverage = false; - bool DumpCoverage = false; - bool DetectLeaks = true; - int TraceMalloc = 0; - bool HandleAbrt = false; - bool HandleBus = false; - bool HandleFpe = false; - bool HandleIll = false; - bool HandleInt = false; - bool HandleSegv = false; - bool HandleTerm = false; - bool HandleXfsz = false; -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_OPTIONS_H diff --git a/lib/Fuzzer/FuzzerRandom.h b/lib/Fuzzer/FuzzerRandom.h deleted file mode 100644 index 8a1aa3ef5fdc1..0000000000000 --- a/lib/Fuzzer/FuzzerRandom.h +++ /dev/null @@ -1,34 +0,0 @@ -//===- FuzzerRandom.h - Internal header for the Fuzzer ----------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::Random -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_RANDOM_H -#define LLVM_FUZZER_RANDOM_H - -#include - -namespace fuzzer { -class Random : public std::mt19937 { - public: - Random(unsigned int seed) : std::mt19937(seed) {} - result_type operator()() { return this->std::mt19937::operator()(); } - size_t Rand() { return this->operator()(); } - size_t RandBool() { return Rand() % 2; } - size_t operator()(size_t n) { return n ? Rand() % n : 0; } - intptr_t operator()(intptr_t From, intptr_t To) { - assert(From < To); - intptr_t RangeSize = To - From + 1; - return operator()(RangeSize) + From; - } -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_RANDOM_H diff --git a/lib/Fuzzer/FuzzerSHA1.cpp b/lib/Fuzzer/FuzzerSHA1.cpp deleted file mode 100644 index d2f8e811bbf8b..0000000000000 --- a/lib/Fuzzer/FuzzerSHA1.cpp +++ /dev/null @@ -1,222 +0,0 @@ -//===- FuzzerSHA1.h - Private copy of the SHA1 implementation ---*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This code is taken from public domain -// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c) -// and modified by adding anonymous namespace, adding an interface -// function fuzzer::ComputeSHA1() and removing unnecessary code. -// -// lib/Fuzzer can not use SHA1 implementation from openssl because -// openssl may not be available and because we may be fuzzing openssl itself. -// For the same reason we do not want to depend on SHA1 from LLVM tree. -//===----------------------------------------------------------------------===// - -#include "FuzzerSHA1.h" -#include "FuzzerDefs.h" - -/* This code is public-domain - it is based on libcrypt - * placed in the public domain by Wei Dai and other contributors. - */ - -#include -#include -#include -#include - -namespace { // Added for LibFuzzer - -#ifdef __BIG_ENDIAN__ -# define SHA_BIG_ENDIAN -#elif defined __LITTLE_ENDIAN__ -/* override */ -#elif defined __BYTE_ORDER -# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -# define SHA_BIG_ENDIAN -# endif -#else // ! defined __LITTLE_ENDIAN__ -# include // machine/endian.h -# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -# define SHA_BIG_ENDIAN -# endif -#endif - - -/* header */ - -#define HASH_LENGTH 20 -#define BLOCK_LENGTH 64 - -typedef struct sha1nfo { - uint32_t buffer[BLOCK_LENGTH/4]; - uint32_t state[HASH_LENGTH/4]; - uint32_t byteCount; - uint8_t bufferOffset; - uint8_t keyBuffer[BLOCK_LENGTH]; - uint8_t innerHash[HASH_LENGTH]; -} sha1nfo; - -/* public API - prototypes - TODO: doxygen*/ - -/** - */ -void sha1_init(sha1nfo *s); -/** - */ -void sha1_writebyte(sha1nfo *s, uint8_t data); -/** - */ -void sha1_write(sha1nfo *s, const char *data, size_t len); -/** - */ -uint8_t* sha1_result(sha1nfo *s); - - -/* code */ -#define SHA1_K0 0x5a827999 -#define SHA1_K20 0x6ed9eba1 -#define SHA1_K40 0x8f1bbcdc -#define SHA1_K60 0xca62c1d6 - -void sha1_init(sha1nfo *s) { - s->state[0] = 0x67452301; - s->state[1] = 0xefcdab89; - s->state[2] = 0x98badcfe; - s->state[3] = 0x10325476; - s->state[4] = 0xc3d2e1f0; - s->byteCount = 0; - s->bufferOffset = 0; -} - -uint32_t sha1_rol32(uint32_t number, uint8_t bits) { - return ((number << bits) | (number >> (32-bits))); -} - -void sha1_hashBlock(sha1nfo *s) { - uint8_t i; - uint32_t a,b,c,d,e,t; - - a=s->state[0]; - b=s->state[1]; - c=s->state[2]; - d=s->state[3]; - e=s->state[4]; - for (i=0; i<80; i++) { - if (i>=16) { - t = s->buffer[(i+13)&15] ^ s->buffer[(i+8)&15] ^ s->buffer[(i+2)&15] ^ s->buffer[i&15]; - s->buffer[i&15] = sha1_rol32(t,1); - } - if (i<20) { - t = (d ^ (b & (c ^ d))) + SHA1_K0; - } else if (i<40) { - t = (b ^ c ^ d) + SHA1_K20; - } else if (i<60) { - t = ((b & c) | (d & (b | c))) + SHA1_K40; - } else { - t = (b ^ c ^ d) + SHA1_K60; - } - t+=sha1_rol32(a,5) + e + s->buffer[i&15]; - e=d; - d=c; - c=sha1_rol32(b,30); - b=a; - a=t; - } - s->state[0] += a; - s->state[1] += b; - s->state[2] += c; - s->state[3] += d; - s->state[4] += e; -} - -void sha1_addUncounted(sha1nfo *s, uint8_t data) { - uint8_t * const b = (uint8_t*) s->buffer; -#ifdef SHA_BIG_ENDIAN - b[s->bufferOffset] = data; -#else - b[s->bufferOffset ^ 3] = data; -#endif - s->bufferOffset++; - if (s->bufferOffset == BLOCK_LENGTH) { - sha1_hashBlock(s); - s->bufferOffset = 0; - } -} - -void sha1_writebyte(sha1nfo *s, uint8_t data) { - ++s->byteCount; - sha1_addUncounted(s, data); -} - -void sha1_write(sha1nfo *s, const char *data, size_t len) { - for (;len--;) sha1_writebyte(s, (uint8_t) *data++); -} - -void sha1_pad(sha1nfo *s) { - // Implement SHA-1 padding (fips180-2 §5.1.1) - - // Pad with 0x80 followed by 0x00 until the end of the block - sha1_addUncounted(s, 0x80); - while (s->bufferOffset != 56) sha1_addUncounted(s, 0x00); - - // Append length in the last 8 bytes - sha1_addUncounted(s, 0); // We're only using 32 bit lengths - sha1_addUncounted(s, 0); // But SHA-1 supports 64 bit lengths - sha1_addUncounted(s, 0); // So zero pad the top bits - sha1_addUncounted(s, s->byteCount >> 29); // Shifting to multiply by 8 - sha1_addUncounted(s, s->byteCount >> 21); // as SHA-1 supports bitstreams as well as - sha1_addUncounted(s, s->byteCount >> 13); // byte. - sha1_addUncounted(s, s->byteCount >> 5); - sha1_addUncounted(s, s->byteCount << 3); -} - -uint8_t* sha1_result(sha1nfo *s) { - // Pad to complete the last block - sha1_pad(s); - -#ifndef SHA_BIG_ENDIAN - // Swap byte order back - int i; - for (i=0; i<5; i++) { - s->state[i]= - (((s->state[i])<<24)& 0xff000000) - | (((s->state[i])<<8) & 0x00ff0000) - | (((s->state[i])>>8) & 0x0000ff00) - | (((s->state[i])>>24)& 0x000000ff); - } -#endif - - // Return pointer to hash (20 characters) - return (uint8_t*) s->state; -} - -} // namespace; Added for LibFuzzer - -namespace fuzzer { - -// The rest is added for LibFuzzer -void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out) { - sha1nfo s; - sha1_init(&s); - sha1_write(&s, (const char*)Data, Len); - memcpy(Out, sha1_result(&s), HASH_LENGTH); -} - -std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes]) { - std::stringstream SS; - for (int i = 0; i < kSHA1NumBytes; i++) - SS << std::hex << std::setfill('0') << std::setw(2) << (unsigned)Sha1[i]; - return SS.str(); -} - -std::string Hash(const Unit &U) { - uint8_t Hash[kSHA1NumBytes]; - ComputeSHA1(U.data(), U.size(), Hash); - return Sha1ToString(Hash); -} - -} diff --git a/lib/Fuzzer/FuzzerSHA1.h b/lib/Fuzzer/FuzzerSHA1.h deleted file mode 100644 index 3b5e6e807f420..0000000000000 --- a/lib/Fuzzer/FuzzerSHA1.h +++ /dev/null @@ -1,33 +0,0 @@ -//===- FuzzerSHA1.h - Internal header for the SHA1 utils --------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// SHA1 utils. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_SHA1_H -#define LLVM_FUZZER_SHA1_H - -#include "FuzzerDefs.h" -#include -#include - -namespace fuzzer { - -// Private copy of SHA1 implementation. -static const int kSHA1NumBytes = 20; - -// Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'. -void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out); - -std::string Sha1ToString(const uint8_t Sha1[kSHA1NumBytes]); - -std::string Hash(const Unit &U); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_SHA1_H diff --git a/lib/Fuzzer/FuzzerShmem.h b/lib/Fuzzer/FuzzerShmem.h deleted file mode 100644 index 53568e0acb69c..0000000000000 --- a/lib/Fuzzer/FuzzerShmem.h +++ /dev/null @@ -1,69 +0,0 @@ -//===- FuzzerShmem.h - shared memory interface ------------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// SharedMemoryRegion -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_SHMEM_H -#define LLVM_FUZZER_SHMEM_H - -#include -#include -#include - -#include "FuzzerDefs.h" - -namespace fuzzer { - -class SharedMemoryRegion { - public: - bool Create(const char *Name); - bool Open(const char *Name); - bool Destroy(const char *Name); - uint8_t *GetData() { return Data; } - void PostServer() {Post(0);} - void WaitServer() {Wait(0);} - void PostClient() {Post(1);} - void WaitClient() {Wait(1);} - - size_t WriteByteArray(const uint8_t *Bytes, size_t N) { - assert(N <= kShmemSize - sizeof(N)); - memcpy(GetData(), &N, sizeof(N)); - memcpy(GetData() + sizeof(N), Bytes, N); - assert(N == ReadByteArraySize()); - return N; - } - size_t ReadByteArraySize() { - size_t Res; - memcpy(&Res, GetData(), sizeof(Res)); - return Res; - } - uint8_t *GetByteArray() { return GetData() + sizeof(size_t); } - - bool IsServer() const { return Data && IAmServer; } - bool IsClient() const { return Data && !IAmServer; } - -private: - - static const size_t kShmemSize = 1 << 22; - bool IAmServer; - std::string Path(const char *Name); - std::string SemName(const char *Name, int Idx); - void Post(int Idx); - void Wait(int Idx); - - bool Map(int fd); - uint8_t *Data = nullptr; - void *Semaphore[2]; -}; - -extern SharedMemoryRegion SMR; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_SHMEM_H diff --git a/lib/Fuzzer/FuzzerShmemPosix.cpp b/lib/Fuzzer/FuzzerShmemPosix.cpp deleted file mode 100644 index 50cdcfb509dc2..0000000000000 --- a/lib/Fuzzer/FuzzerShmemPosix.cpp +++ /dev/null @@ -1,103 +0,0 @@ -//===- FuzzerShmemPosix.cpp - Posix shared memory ---------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// SharedMemoryRegion -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_POSIX - -#include "FuzzerIO.h" -#include "FuzzerShmem.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -std::string SharedMemoryRegion::Path(const char *Name) { - return DirPlusFile(TmpDir(), Name); -} - -std::string SharedMemoryRegion::SemName(const char *Name, int Idx) { - std::string Res(Name); - return Res + (char)('0' + Idx); -} - -bool SharedMemoryRegion::Map(int fd) { - Data = - (uint8_t *)mmap(0, kShmemSize, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); - if (Data == (uint8_t*)-1) - return false; - return true; -} - -bool SharedMemoryRegion::Create(const char *Name) { - int fd = open(Path(Name).c_str(), O_CREAT | O_RDWR, 0777); - if (fd < 0) return false; - if (ftruncate(fd, kShmemSize) < 0) return false; - if (!Map(fd)) - return false; - for (int i = 0; i < 2; i++) { - sem_unlink(SemName(Name, i).c_str()); - Semaphore[i] = sem_open(SemName(Name, i).c_str(), O_CREAT, 0644, 0); - if (Semaphore[i] == (void *)-1) - return false; - } - IAmServer = true; - return true; -} - -bool SharedMemoryRegion::Open(const char *Name) { - int fd = open(Path(Name).c_str(), O_RDWR); - if (fd < 0) return false; - struct stat stat_res; - if (0 != fstat(fd, &stat_res)) - return false; - assert(stat_res.st_size == kShmemSize); - if (!Map(fd)) - return false; - for (int i = 0; i < 2; i++) { - Semaphore[i] = sem_open(SemName(Name, i).c_str(), 0); - if (Semaphore[i] == (void *)-1) - return false; - } - IAmServer = false; - return true; -} - -bool SharedMemoryRegion::Destroy(const char *Name) { - return 0 == unlink(Path(Name).c_str()); -} - -void SharedMemoryRegion::Post(int Idx) { - assert(Idx == 0 || Idx == 1); - sem_post((sem_t*)Semaphore[Idx]); -} - -void SharedMemoryRegion::Wait(int Idx) { - assert(Idx == 0 || Idx == 1); - for (int i = 0; i < 10 && sem_wait((sem_t*)Semaphore[Idx]); i++) { - // sem_wait may fail if interrupted by a signal. - sleep(i); - if (i) - Printf("%s: sem_wait[%d] failed %s\n", i < 9 ? "WARNING" : "ERROR", i, - strerror(errno)); - if (i == 9) abort(); - } -} - -} // namespace fuzzer - -#endif // LIBFUZZER_POSIX diff --git a/lib/Fuzzer/FuzzerShmemWindows.cpp b/lib/Fuzzer/FuzzerShmemWindows.cpp deleted file mode 100644 index d330ebf4fd07a..0000000000000 --- a/lib/Fuzzer/FuzzerShmemWindows.cpp +++ /dev/null @@ -1,64 +0,0 @@ -//===- FuzzerShmemWindows.cpp - Posix shared memory -------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// SharedMemoryRegion -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS - -#include "FuzzerIO.h" -#include "FuzzerShmem.h" - -#include -#include -#include -#include - -namespace fuzzer { - -std::string SharedMemoryRegion::Path(const char *Name) { - return DirPlusFile(TmpDir(), Name); -} - -std::string SharedMemoryRegion::SemName(const char *Name, int Idx) { - std::string Res(Name); - return Res + (char)('0' + Idx); -} - -bool SharedMemoryRegion::Map(int fd) { - assert(0 && "UNIMPLEMENTED"); - return false; -} - -bool SharedMemoryRegion::Create(const char *Name) { - assert(0 && "UNIMPLEMENTED"); - return false; -} - -bool SharedMemoryRegion::Open(const char *Name) { - assert(0 && "UNIMPLEMENTED"); - return false; -} - -bool SharedMemoryRegion::Destroy(const char *Name) { - assert(0 && "UNIMPLEMENTED"); - return false; -} - -void SharedMemoryRegion::Post(int Idx) { - assert(0 && "UNIMPLEMENTED"); -} - -void SharedMemoryRegion::Wait(int Idx) { - Semaphore[1] = nullptr; - assert(0 && "UNIMPLEMENTED"); -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff --git a/lib/Fuzzer/FuzzerTracePC.cpp b/lib/Fuzzer/FuzzerTracePC.cpp deleted file mode 100644 index ebd33d3ec886f..0000000000000 --- a/lib/Fuzzer/FuzzerTracePC.cpp +++ /dev/null @@ -1,583 +0,0 @@ -//===- FuzzerTracePC.cpp - PC tracing--------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Trace PCs. -// This module implements __sanitizer_cov_trace_pc_guard[_init], -// the callback required for -fsanitize-coverage=trace-pc-guard instrumentation. -// -//===----------------------------------------------------------------------===// - -#include "FuzzerTracePC.h" -#include "FuzzerCorpus.h" -#include "FuzzerDefs.h" -#include "FuzzerDictionary.h" -#include "FuzzerExtFunctions.h" -#include "FuzzerIO.h" -#include "FuzzerUtil.h" -#include "FuzzerValueBitMap.h" -#include - -// The coverage counters and PCs. -// These are declared as global variables named "__sancov_*" to simplify -// experiments with inlined instrumentation. -alignas(64) ATTRIBUTE_INTERFACE -uint8_t __sancov_trace_pc_guard_8bit_counters[fuzzer::TracePC::kNumPCs]; - -ATTRIBUTE_INTERFACE -uintptr_t __sancov_trace_pc_pcs[fuzzer::TracePC::kNumPCs]; - -// Used by -fsanitize-coverage=stack-depth to track stack depth -ATTRIBUTE_INTERFACE thread_local uintptr_t __sancov_lowest_stack; - -namespace fuzzer { - -TracePC TPC; - -int ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr; - -uint8_t *TracePC::Counters() const { - return __sancov_trace_pc_guard_8bit_counters; -} - -uintptr_t *TracePC::PCs() const { - return __sancov_trace_pc_pcs; -} - -size_t TracePC::GetTotalPCCoverage() { - if (ObservedPCs.size()) - return ObservedPCs.size(); - size_t Res = 0; - for (size_t i = 1, N = GetNumPCs(); i < N; i++) - if (PCs()[i]) - Res++; - return Res; -} - - -void TracePC::HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop) { - if (Start == Stop) return; - if (NumModulesWithInline8bitCounters && - ModuleCounters[NumModulesWithInline8bitCounters-1].Start == Start) return; - assert(NumModulesWithInline8bitCounters < - sizeof(ModuleCounters) / sizeof(ModuleCounters[0])); - ModuleCounters[NumModulesWithInline8bitCounters++] = {Start, Stop}; - NumInline8bitCounters += Stop - Start; -} - -void TracePC::HandlePCsInit(const uint8_t *Start, const uint8_t *Stop) { - const uintptr_t *B = reinterpret_cast(Start); - const uintptr_t *E = reinterpret_cast(Stop); - if (NumPCTables && ModulePCTable[NumPCTables - 1].Start == B) return; - assert(NumPCTables < sizeof(ModulePCTable) / sizeof(ModulePCTable[0])); - ModulePCTable[NumPCTables++] = {B, E}; - NumPCsInPCTables += E - B; -} - -void TracePC::HandleInit(uint32_t *Start, uint32_t *Stop) { - if (Start == Stop || *Start) return; - assert(NumModules < sizeof(Modules) / sizeof(Modules[0])); - for (uint32_t *P = Start; P < Stop; P++) { - NumGuards++; - if (NumGuards == kNumPCs) { - RawPrint( - "WARNING: The binary has too many instrumented PCs.\n" - " You may want to reduce the size of the binary\n" - " for more efficient fuzzing and precise coverage data\n"); - } - *P = NumGuards % kNumPCs; - } - Modules[NumModules].Start = Start; - Modules[NumModules].Stop = Stop; - NumModules++; -} - -void TracePC::PrintModuleInfo() { - if (NumGuards) { - Printf("INFO: Loaded %zd modules (%zd guards): ", NumModules, NumGuards); - for (size_t i = 0; i < NumModules; i++) - Printf("%zd [%p, %p), ", Modules[i].Stop - Modules[i].Start, - Modules[i].Start, Modules[i].Stop); - Printf("\n"); - } - if (NumModulesWithInline8bitCounters) { - Printf("INFO: Loaded %zd modules (%zd inline 8-bit counters): ", - NumModulesWithInline8bitCounters, NumInline8bitCounters); - for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) - Printf("%zd [%p, %p), ", ModuleCounters[i].Stop - ModuleCounters[i].Start, - ModuleCounters[i].Start, ModuleCounters[i].Stop); - Printf("\n"); - } - if (NumPCTables) { - Printf("INFO: Loaded %zd PC tables (%zd PCs): ", NumPCTables, - NumPCsInPCTables); - for (size_t i = 0; i < NumPCTables; i++) { - Printf("%zd [%p,%p), ", ModulePCTable[i].Stop - ModulePCTable[i].Start, - ModulePCTable[i].Start, ModulePCTable[i].Stop); - } - Printf("\n"); - - if ((NumGuards && NumGuards != NumPCsInPCTables) || - (NumInline8bitCounters && NumInline8bitCounters != NumPCsInPCTables)) { - Printf("ERROR: The size of coverage PC tables does not match the" - " number of instrumented PCs. This might be a bug in the compiler," - " please contact the libFuzzer developers.\n"); - _Exit(1); - } - } - if (size_t NumClangCounters = ClangCountersEnd() - ClangCountersBegin()) - Printf("INFO: %zd Clang Coverage Counters\n", NumClangCounters); -} - -ATTRIBUTE_NO_SANITIZE_ALL -void TracePC::HandleCallerCallee(uintptr_t Caller, uintptr_t Callee) { - const uintptr_t kBits = 12; - const uintptr_t kMask = (1 << kBits) - 1; - uintptr_t Idx = (Caller & kMask) | ((Callee & kMask) << kBits); - ValueProfileMap.AddValueModPrime(Idx); -} - -void TracePC::UpdateObservedPCs() { - auto Observe = [&](uintptr_t PC) { - bool Inserted = ObservedPCs.insert(PC).second; - if (Inserted && DoPrintNewPCs) - PrintPC("\tNEW_PC: %p %F %L\n", "\tNEW_PC: %p\n", PC + 1); - }; - if (NumPCsInPCTables) { - if (NumInline8bitCounters == NumPCsInPCTables) { - for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) { - uint8_t *Beg = ModuleCounters[i].Start; - size_t Size = ModuleCounters[i].Stop - Beg; - assert(Size == - (size_t)(ModulePCTable[i].Stop - ModulePCTable[i].Start)); - for (size_t j = 0; j < Size; j++) - if (Beg[j]) - Observe(ModulePCTable[i].Start[j]); - } - } else if (NumGuards == NumPCsInPCTables) { - size_t GuardIdx = 1; - for (size_t i = 0; i < NumModules; i++) { - uint32_t *Beg = Modules[i].Start; - size_t Size = Modules[i].Stop - Beg; - assert(Size == - (size_t)(ModulePCTable[i].Stop - ModulePCTable[i].Start)); - for (size_t j = 0; j < Size; j++, GuardIdx++) - if (Counters()[GuardIdx]) - Observe(ModulePCTable[i].Start[j]); - } - } - } - if (size_t NumClangCounters = - ClangCountersEnd() - ClangCountersBegin()) { - auto P = ClangCountersBegin(); - for (size_t Idx = 0; Idx < NumClangCounters; Idx++) - if (P[Idx]) - Observe((uintptr_t)Idx); - } -} - -inline ALWAYS_INLINE uintptr_t GetPreviousInstructionPc(uintptr_t PC) { - // TODO: this implementation is x86 only. - // see sanitizer_common GetPreviousInstructionPc for full implementation. - return PC - 1; -} - -inline ALWAYS_INLINE uintptr_t GetNextInstructionPc(uintptr_t PC) { - // TODO: this implementation is x86 only. - // see sanitizer_common GetPreviousInstructionPc for full implementation. - return PC + 1; -} - -static std::string GetModuleName(uintptr_t PC) { - char ModulePathRaw[4096] = ""; // What's PATH_MAX in portable C++? - void *OffsetRaw = nullptr; - if (!EF->__sanitizer_get_module_and_offset_for_pc( - reinterpret_cast(PC), ModulePathRaw, - sizeof(ModulePathRaw), &OffsetRaw)) - return ""; - return ModulePathRaw; -} - -void TracePC::PrintCoverage() { - if (!EF->__sanitizer_symbolize_pc || - !EF->__sanitizer_get_module_and_offset_for_pc) { - Printf("INFO: __sanitizer_symbolize_pc or " - "__sanitizer_get_module_and_offset_for_pc is not available," - " not printing coverage\n"); - return; - } - Printf("COVERAGE:\n"); - std::string LastFunctionName = ""; - std::string LastFileStr = ""; - std::set UncoveredLines; - std::set CoveredLines; - - auto FunctionEndCallback = [&](const std::string &CurrentFunc, - const std::string &CurrentFile) { - if (LastFunctionName != CurrentFunc) { - if (CoveredLines.empty() && !UncoveredLines.empty()) { - Printf("UNCOVERED_FUNC: %s\n", LastFunctionName.c_str()); - } else { - for (auto Line : UncoveredLines) { - if (!CoveredLines.count(Line)) - Printf("UNCOVERED_LINE: %s %s:%zd\n", LastFunctionName.c_str(), - LastFileStr.c_str(), Line); - } - } - - UncoveredLines.clear(); - CoveredLines.clear(); - LastFunctionName = CurrentFunc; - LastFileStr = CurrentFile; - } - }; - - for (size_t i = 0; i < NumPCTables; i++) { - auto &M = ModulePCTable[i]; - assert(M.Start < M.Stop); - auto ModuleName = GetModuleName(*M.Start); - for (auto Ptr = M.Start; Ptr < M.Stop; Ptr++) { - auto PC = *Ptr; - auto VisualizePC = GetNextInstructionPc(PC); - bool IsObserved = ObservedPCs.count(PC); - std::string FileStr = DescribePC("%s", VisualizePC); - if (!IsInterestingCoverageFile(FileStr)) continue; - std::string FunctionStr = DescribePC("%F", VisualizePC); - FunctionEndCallback(FunctionStr, FileStr); - std::string LineStr = DescribePC("%l", VisualizePC); - size_t Line = std::stoul(LineStr); - if (IsObserved && CoveredLines.insert(Line).second) - Printf("COVERED: %s %s:%zd\n", FunctionStr.c_str(), FileStr.c_str(), - Line); - else - UncoveredLines.insert(Line); - } - } - FunctionEndCallback("", ""); -} - -void TracePC::DumpCoverage() { - if (EF->__sanitizer_dump_coverage) { - std::vector PCsCopy(GetNumPCs()); - for (size_t i = 0; i < GetNumPCs(); i++) - PCsCopy[i] = PCs()[i] ? GetPreviousInstructionPc(PCs()[i]) : 0; - EF->__sanitizer_dump_coverage(PCsCopy.data(), PCsCopy.size()); - } -} - -// Value profile. -// We keep track of various values that affect control flow. -// These values are inserted into a bit-set-based hash map. -// Every new bit in the map is treated as a new coverage. -// -// For memcmp/strcmp/etc the interesting value is the length of the common -// prefix of the parameters. -// For cmp instructions the interesting value is a XOR of the parameters. -// The interesting value is mixed up with the PC and is then added to the map. - -ATTRIBUTE_NO_SANITIZE_ALL -void TracePC::AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2, - size_t n, bool StopAtZero) { - if (!n) return; - size_t Len = std::min(n, Word::GetMaxSize()); - const uint8_t *A1 = reinterpret_cast(s1); - const uint8_t *A2 = reinterpret_cast(s2); - uint8_t B1[Word::kMaxSize]; - uint8_t B2[Word::kMaxSize]; - // Copy the data into locals in this non-msan-instrumented function - // to avoid msan complaining further. - size_t Hash = 0; // Compute some simple hash of both strings. - for (size_t i = 0; i < Len; i++) { - B1[i] = A1[i]; - B2[i] = A2[i]; - size_t T = B1[i]; - Hash ^= (T << 8) | B2[i]; - } - size_t I = 0; - for (; I < Len; I++) - if (B1[I] != B2[I] || (StopAtZero && B1[I] == 0)) - break; - size_t PC = reinterpret_cast(caller_pc); - size_t Idx = (PC & 4095) | (I << 12); - ValueProfileMap.AddValue(Idx); - TORCW.Insert(Idx ^ Hash, Word(B1, Len), Word(B2, Len)); -} - -template -ATTRIBUTE_TARGET_POPCNT ALWAYS_INLINE -ATTRIBUTE_NO_SANITIZE_ALL -void TracePC::HandleCmp(uintptr_t PC, T Arg1, T Arg2) { - uint64_t ArgXor = Arg1 ^ Arg2; - uint64_t ArgDistance = __builtin_popcountll(ArgXor) + 1; // [1,65] - uintptr_t Idx = ((PC & 4095) + 1) * ArgDistance; - if (sizeof(T) == 4) - TORC4.Insert(ArgXor, Arg1, Arg2); - else if (sizeof(T) == 8) - TORC8.Insert(ArgXor, Arg1, Arg2); - ValueProfileMap.AddValue(Idx); -} - -static size_t InternalStrnlen(const char *S, size_t MaxLen) { - size_t Len = 0; - for (; Len < MaxLen && S[Len]; Len++) {} - return Len; -} - -// Finds min of (strlen(S1), strlen(S2)). -// Needed bacause one of these strings may actually be non-zero terminated. -static size_t InternalStrnlen2(const char *S1, const char *S2) { - size_t Len = 0; - for (; S1[Len] && S2[Len]; Len++) {} - return Len; -} - -void TracePC::ClearInlineCounters() { - for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) { - uint8_t *Beg = ModuleCounters[i].Start; - size_t Size = ModuleCounters[i].Stop - Beg; - memset(Beg, 0, Size); - } -} - -void TracePC::RecordInitialStack() { - InitialStack = __sancov_lowest_stack; -} - -uintptr_t TracePC::GetMaxStackOffset() const { - return InitialStack - __sancov_lowest_stack; // Stack grows down -} - -} // namespace fuzzer - -extern "C" { -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -void __sanitizer_cov_trace_pc_guard(uint32_t *Guard) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - uint32_t Idx = *Guard; - __sancov_trace_pc_pcs[Idx] = PC; - __sancov_trace_pc_guard_8bit_counters[Idx]++; -} - -// Best-effort support for -fsanitize-coverage=trace-pc, which is available -// in both Clang and GCC. -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -void __sanitizer_cov_trace_pc() { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - uintptr_t Idx = PC & (((uintptr_t)1 << fuzzer::TracePC::kTracePcBits) - 1); - __sancov_trace_pc_pcs[Idx] = PC; - __sancov_trace_pc_guard_8bit_counters[Idx]++; -} - -ATTRIBUTE_INTERFACE -void __sanitizer_cov_trace_pc_guard_init(uint32_t *Start, uint32_t *Stop) { - fuzzer::TPC.HandleInit(Start, Stop); -} - -ATTRIBUTE_INTERFACE -void __sanitizer_cov_8bit_counters_init(uint8_t *Start, uint8_t *Stop) { - fuzzer::TPC.HandleInline8bitCountersInit(Start, Stop); -} - -ATTRIBUTE_INTERFACE -void __sanitizer_cov_pcs_init(const uint8_t *pcs_beg, const uint8_t *pcs_end) { - fuzzer::TPC.HandlePCsInit(pcs_beg, pcs_end); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -void __sanitizer_cov_trace_pc_indir(uintptr_t Callee) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCallerCallee(PC, Callee); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_cmp8(uint64_t Arg1, uint64_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -// Now the __sanitizer_cov_trace_const_cmp[1248] callbacks just mimic -// the behaviour of __sanitizer_cov_trace_cmp[1248] ones. This, however, -// should be changed later to make full use of instrumentation. -void __sanitizer_cov_trace_const_cmp8(uint64_t Arg1, uint64_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_cmp4(uint32_t Arg1, uint32_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_const_cmp4(uint32_t Arg1, uint32_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_cmp2(uint16_t Arg1, uint16_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_const_cmp2(uint16_t Arg1, uint16_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_cmp1(uint8_t Arg1, uint8_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_const_cmp1(uint8_t Arg1, uint8_t Arg2) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Arg1, Arg2); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) { - uint64_t N = Cases[0]; - uint64_t ValSizeInBits = Cases[1]; - uint64_t *Vals = Cases + 2; - // Skip the most common and the most boring case. - if (Vals[N - 1] < 256 && Val < 256) - return; - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - size_t i; - uint64_t Token = 0; - for (i = 0; i < N; i++) { - Token = Val ^ Vals[i]; - if (Val < Vals[i]) - break; - } - - if (ValSizeInBits == 16) - fuzzer::TPC.HandleCmp(PC + i, static_cast(Token), (uint16_t)(0)); - else if (ValSizeInBits == 32) - fuzzer::TPC.HandleCmp(PC + i, static_cast(Token), (uint32_t)(0)); - else - fuzzer::TPC.HandleCmp(PC + i, Token, (uint64_t)(0)); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_div4(uint32_t Val) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Val, (uint32_t)0); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_div8(uint64_t Val) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Val, (uint64_t)0); -} - -ATTRIBUTE_INTERFACE -ATTRIBUTE_NO_SANITIZE_ALL -ATTRIBUTE_TARGET_POPCNT -void __sanitizer_cov_trace_gep(uintptr_t Idx) { - uintptr_t PC = reinterpret_cast(__builtin_return_address(0)); - fuzzer::TPC.HandleCmp(PC, Idx, (uintptr_t)0); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1, - const void *s2, size_t n, int result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - if (result == 0) return; // No reason to mutate. - if (n <= 1) return; // Not interesting. - fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/false); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1, - const char *s2, size_t n, int result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - if (result == 0) return; // No reason to mutate. - size_t Len1 = fuzzer::InternalStrnlen(s1, n); - size_t Len2 = fuzzer::InternalStrnlen(s2, n); - n = std::min(n, Len1); - n = std::min(n, Len2); - if (n <= 1) return; // Not interesting. - fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, n, /*StopAtZero*/true); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1, - const char *s2, int result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - if (result == 0) return; // No reason to mutate. - size_t N = fuzzer::InternalStrnlen2(s1, s2); - if (N <= 1) return; // Not interesting. - fuzzer::TPC.AddValueForMemcmp(caller_pc, s1, s2, N, /*StopAtZero*/true); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strncasecmp(void *called_pc, const char *s1, - const char *s2, size_t n, int result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - return __sanitizer_weak_hook_strncmp(called_pc, s1, s2, n, result); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strcasecmp(void *called_pc, const char *s1, - const char *s2, int result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - return __sanitizer_weak_hook_strcmp(called_pc, s1, s2, result); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strstr(void *called_pc, const char *s1, - const char *s2, char *result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - fuzzer::TPC.MMT.Add(reinterpret_cast(s2), strlen(s2)); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_strcasestr(void *called_pc, const char *s1, - const char *s2, char *result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - fuzzer::TPC.MMT.Add(reinterpret_cast(s2), strlen(s2)); -} - -ATTRIBUTE_INTERFACE ATTRIBUTE_NO_SANITIZE_MEMORY -void __sanitizer_weak_hook_memmem(void *called_pc, const void *s1, size_t len1, - const void *s2, size_t len2, void *result) { - if (fuzzer::ScopedDoingMyOwnMemOrStr::DoingMyOwnMemOrStr) return; - fuzzer::TPC.MMT.Add(reinterpret_cast(s2), len2); -} -} // extern "C" diff --git a/lib/Fuzzer/FuzzerTracePC.h b/lib/Fuzzer/FuzzerTracePC.h deleted file mode 100644 index 56f1820f79e75..0000000000000 --- a/lib/Fuzzer/FuzzerTracePC.h +++ /dev/null @@ -1,257 +0,0 @@ -//===- FuzzerTracePC.h - Internal header for the Fuzzer ---------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// fuzzer::TracePC -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_TRACE_PC -#define LLVM_FUZZER_TRACE_PC - -#include "FuzzerDefs.h" -#include "FuzzerDictionary.h" -#include "FuzzerValueBitMap.h" - -#include - -namespace fuzzer { - -// TableOfRecentCompares (TORC) remembers the most recently performed -// comparisons of type T. -// We record the arguments of CMP instructions in this table unconditionally -// because it seems cheaper this way than to compute some expensive -// conditions inside __sanitizer_cov_trace_cmp*. -// After the unit has been executed we may decide to use the contents of -// this table to populate a Dictionary. -template -struct TableOfRecentCompares { - static const size_t kSize = kSizeT; - struct Pair { - T A, B; - }; - ATTRIBUTE_NO_SANITIZE_ALL - void Insert(size_t Idx, const T &Arg1, const T &Arg2) { - Idx = Idx % kSize; - Table[Idx].A = Arg1; - Table[Idx].B = Arg2; - } - - Pair Get(size_t I) { return Table[I % kSize]; } - - Pair Table[kSize]; -}; - -template -struct MemMemTable { - static const size_t kSize = kSizeT; - Word MemMemWords[kSize]; - Word EmptyWord; - - void Add(const uint8_t *Data, size_t Size) { - if (Size <= 2) return; - Size = std::min(Size, Word::GetMaxSize()); - size_t Idx = SimpleFastHash(Data, Size) % kSize; - MemMemWords[Idx].Set(Data, Size); - } - const Word &Get(size_t Idx) { - for (size_t i = 0; i < kSize; i++) { - const Word &W = MemMemWords[(Idx + i) % kSize]; - if (W.size()) return W; - } - EmptyWord.Set(nullptr, 0); - return EmptyWord; - } -}; - -class TracePC { - public: - static const size_t kNumPCs = 1 << 21; - // How many bits of PC are used from __sanitizer_cov_trace_pc. - static const size_t kTracePcBits = 18; - - void HandleInit(uint32_t *Start, uint32_t *Stop); - void HandleInline8bitCountersInit(uint8_t *Start, uint8_t *Stop); - void HandlePCsInit(const uint8_t *Start, const uint8_t *Stop); - void HandleCallerCallee(uintptr_t Caller, uintptr_t Callee); - template void HandleCmp(uintptr_t PC, T Arg1, T Arg2); - size_t GetTotalPCCoverage(); - void SetUseCounters(bool UC) { UseCounters = UC; } - void SetUseValueProfile(bool VP) { UseValueProfile = VP; } - void SetPrintNewPCs(bool P) { DoPrintNewPCs = P; } - void UpdateObservedPCs(); - template void CollectFeatures(Callback CB) const; - - void ResetMaps() { - ValueProfileMap.Reset(); - if (NumModules) - memset(Counters(), 0, GetNumPCs()); - ClearExtraCounters(); - ClearInlineCounters(); - ClearClangCounters(); - } - - void ClearInlineCounters(); - - void UpdateFeatureSet(size_t CurrentElementIdx, size_t CurrentElementSize); - void PrintFeatureSet(); - - void PrintModuleInfo(); - - void PrintCoverage(); - void DumpCoverage(); - - void AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2, - size_t n, bool StopAtZero); - - TableOfRecentCompares TORC4; - TableOfRecentCompares TORC8; - TableOfRecentCompares TORCW; - MemMemTable<1024> MMT; - - size_t GetNumPCs() const { - return NumGuards == 0 ? (1 << kTracePcBits) : Min(kNumPCs, NumGuards + 1); - } - uintptr_t GetPC(size_t Idx) { - assert(Idx < GetNumPCs()); - return PCs()[Idx]; - } - - void RecordInitialStack(); - uintptr_t GetMaxStackOffset() const; - - template - void ForEachObservedPC(CallBack CB) { - for (auto PC : ObservedPCs) - CB(PC); - } - -private: - bool UseCounters = false; - bool UseValueProfile = false; - bool DoPrintNewPCs = false; - - struct Module { - uint32_t *Start, *Stop; - }; - - Module Modules[4096]; - size_t NumModules; // linker-initialized. - size_t NumGuards; // linker-initialized. - - struct { uint8_t *Start, *Stop; } ModuleCounters[4096]; - size_t NumModulesWithInline8bitCounters; // linker-initialized. - size_t NumInline8bitCounters; - - struct { const uintptr_t *Start, *Stop; } ModulePCTable[4096]; - size_t NumPCTables; - size_t NumPCsInPCTables; - - uint8_t *Counters() const; - uintptr_t *PCs() const; - - std::set ObservedPCs; - - ValueBitMap ValueProfileMap; - uintptr_t InitialStack; -}; - -template -// void Callback(size_t FirstFeature, size_t Idx, uint8_t Value); -ATTRIBUTE_NO_SANITIZE_ALL -void ForEachNonZeroByte(const uint8_t *Begin, const uint8_t *End, - size_t FirstFeature, Callback Handle8bitCounter) { - typedef uintptr_t LargeType; - const size_t Step = sizeof(LargeType) / sizeof(uint8_t); - const size_t StepMask = Step - 1; - auto P = Begin; - // Iterate by 1 byte until either the alignment boundary or the end. - for (; reinterpret_cast(P) & StepMask && P < End; P++) - if (uint8_t V = *P) - Handle8bitCounter(FirstFeature, P - Begin, V); - - // Iterate by Step bytes at a time. - for (; P < End; P += Step) - if (LargeType Bundle = *reinterpret_cast(P)) - for (size_t I = 0; I < Step; I++, Bundle >>= 8) - if (uint8_t V = Bundle & 0xff) - Handle8bitCounter(FirstFeature, P - Begin + I, V); - - // Iterate by 1 byte until the end. - for (; P < End; P++) - if (uint8_t V = *P) - Handle8bitCounter(FirstFeature, P - Begin, V); -} - -// Given a non-zero Counters returns a number in [0,7]. -template -unsigned CounterToFeature(T Counter) { - assert(Counter); - unsigned Bit = 0; - /**/ if (Counter >= 128) Bit = 7; - else if (Counter >= 32) Bit = 6; - else if (Counter >= 16) Bit = 5; - else if (Counter >= 8) Bit = 4; - else if (Counter >= 4) Bit = 3; - else if (Counter >= 3) Bit = 2; - else if (Counter >= 2) Bit = 1; - return Bit; -} - -template // bool Callback(size_t Feature) -ATTRIBUTE_NO_SANITIZE_ADDRESS -__attribute__((noinline)) -void TracePC::CollectFeatures(Callback HandleFeature) const { - uint8_t *Counters = this->Counters(); - size_t N = GetNumPCs(); - auto Handle8bitCounter = [&](size_t FirstFeature, - size_t Idx, uint8_t Counter) { - HandleFeature(FirstFeature + Idx * 8 + CounterToFeature(Counter)); - }; - - size_t FirstFeature = 0; - - if (!NumInline8bitCounters) { - ForEachNonZeroByte(Counters, Counters + N, FirstFeature, Handle8bitCounter); - FirstFeature += N * 8; - } - - if (NumInline8bitCounters) { - for (size_t i = 0; i < NumModulesWithInline8bitCounters; i++) { - ForEachNonZeroByte(ModuleCounters[i].Start, ModuleCounters[i].Stop, - FirstFeature, Handle8bitCounter); - FirstFeature += 8 * (ModuleCounters[i].Stop - ModuleCounters[i].Start); - } - } - - if (size_t NumClangCounters = ClangCountersEnd() - ClangCountersBegin()) { - auto P = ClangCountersBegin(); - for (size_t Idx = 0; Idx < NumClangCounters; Idx++) - if (auto Cnt = P[Idx]) - HandleFeature(FirstFeature + Idx * 8 + CounterToFeature(Cnt)); - FirstFeature += NumClangCounters; - } - - ForEachNonZeroByte(ExtraCountersBegin(), ExtraCountersEnd(), FirstFeature, - Handle8bitCounter); - FirstFeature += (ExtraCountersEnd() - ExtraCountersBegin()) * 8; - - if (UseValueProfile) { - ValueProfileMap.ForEach([&](size_t Idx) { - HandleFeature(FirstFeature + Idx); - }); - FirstFeature += ValueProfileMap.SizeInBits(); - } - - if (auto MaxStackOffset = GetMaxStackOffset()) - HandleFeature(FirstFeature + MaxStackOffset); -} - -extern TracePC TPC; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_TRACE_PC diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp deleted file mode 100644 index f5a7773744932..0000000000000 --- a/lib/Fuzzer/FuzzerUtil.cpp +++ /dev/null @@ -1,215 +0,0 @@ -//===- FuzzerUtil.cpp - Misc utils ----------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Misc utils. -//===----------------------------------------------------------------------===// - -#include "FuzzerUtil.h" -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -void PrintHexArray(const uint8_t *Data, size_t Size, - const char *PrintAfter) { - for (size_t i = 0; i < Size; i++) - Printf("0x%x,", (unsigned)Data[i]); - Printf("%s", PrintAfter); -} - -void Print(const Unit &v, const char *PrintAfter) { - PrintHexArray(v.data(), v.size(), PrintAfter); -} - -void PrintASCIIByte(uint8_t Byte) { - if (Byte == '\\') - Printf("\\\\"); - else if (Byte == '"') - Printf("\\\""); - else if (Byte >= 32 && Byte < 127) - Printf("%c", Byte); - else - Printf("\\x%02x", Byte); -} - -void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter) { - for (size_t i = 0; i < Size; i++) - PrintASCIIByte(Data[i]); - Printf("%s", PrintAfter); -} - -void PrintASCII(const Unit &U, const char *PrintAfter) { - PrintASCII(U.data(), U.size(), PrintAfter); -} - -bool ToASCII(uint8_t *Data, size_t Size) { - bool Changed = false; - for (size_t i = 0; i < Size; i++) { - uint8_t &X = Data[i]; - auto NewX = X; - NewX &= 127; - if (!isspace(NewX) && !isprint(NewX)) - NewX = ' '; - Changed |= NewX != X; - X = NewX; - } - return Changed; -} - -bool IsASCII(const Unit &U) { return IsASCII(U.data(), U.size()); } - -bool IsASCII(const uint8_t *Data, size_t Size) { - for (size_t i = 0; i < Size; i++) - if (!(isprint(Data[i]) || isspace(Data[i]))) return false; - return true; -} - -bool ParseOneDictionaryEntry(const std::string &Str, Unit *U) { - U->clear(); - if (Str.empty()) return false; - size_t L = 0, R = Str.size() - 1; // We are parsing the range [L,R]. - // Skip spaces from both sides. - while (L < R && isspace(Str[L])) L++; - while (R > L && isspace(Str[R])) R--; - if (R - L < 2) return false; - // Check the closing " - if (Str[R] != '"') return false; - R--; - // Find the opening " - while (L < R && Str[L] != '"') L++; - if (L >= R) return false; - assert(Str[L] == '\"'); - L++; - assert(L <= R); - for (size_t Pos = L; Pos <= R; Pos++) { - uint8_t V = (uint8_t)Str[Pos]; - if (!isprint(V) && !isspace(V)) return false; - if (V =='\\') { - // Handle '\\' - if (Pos + 1 <= R && (Str[Pos + 1] == '\\' || Str[Pos + 1] == '"')) { - U->push_back(Str[Pos + 1]); - Pos++; - continue; - } - // Handle '\xAB' - if (Pos + 3 <= R && Str[Pos + 1] == 'x' - && isxdigit(Str[Pos + 2]) && isxdigit(Str[Pos + 3])) { - char Hex[] = "0xAA"; - Hex[2] = Str[Pos + 2]; - Hex[3] = Str[Pos + 3]; - U->push_back(strtol(Hex, nullptr, 16)); - Pos += 3; - continue; - } - return false; // Invalid escape. - } else { - // Any other character. - U->push_back(V); - } - } - return true; -} - -bool ParseDictionaryFile(const std::string &Text, std::vector *Units) { - if (Text.empty()) { - Printf("ParseDictionaryFile: file does not exist or is empty\n"); - return false; - } - std::istringstream ISS(Text); - Units->clear(); - Unit U; - int LineNo = 0; - std::string S; - while (std::getline(ISS, S, '\n')) { - LineNo++; - size_t Pos = 0; - while (Pos < S.size() && isspace(S[Pos])) Pos++; // Skip spaces. - if (Pos == S.size()) continue; // Empty line. - if (S[Pos] == '#') continue; // Comment line. - if (ParseOneDictionaryEntry(S, &U)) { - Units->push_back(U); - } else { - Printf("ParseDictionaryFile: error in line %d\n\t\t%s\n", LineNo, - S.c_str()); - return false; - } - } - return true; -} - -std::string Base64(const Unit &U) { - static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - std::string Res; - size_t i; - for (i = 0; i + 2 < U.size(); i += 3) { - uint32_t x = (U[i] << 16) + (U[i + 1] << 8) + U[i + 2]; - Res += Table[(x >> 18) & 63]; - Res += Table[(x >> 12) & 63]; - Res += Table[(x >> 6) & 63]; - Res += Table[x & 63]; - } - if (i + 1 == U.size()) { - uint32_t x = (U[i] << 16); - Res += Table[(x >> 18) & 63]; - Res += Table[(x >> 12) & 63]; - Res += "=="; - } else if (i + 2 == U.size()) { - uint32_t x = (U[i] << 16) + (U[i + 1] << 8); - Res += Table[(x >> 18) & 63]; - Res += Table[(x >> 12) & 63]; - Res += Table[(x >> 6) & 63]; - Res += "="; - } - return Res; -} - -std::string DescribePC(const char *SymbolizedFMT, uintptr_t PC) { - if (!EF->__sanitizer_symbolize_pc) return ""; - char PcDescr[1024]; - EF->__sanitizer_symbolize_pc(reinterpret_cast(PC), - SymbolizedFMT, PcDescr, sizeof(PcDescr)); - PcDescr[sizeof(PcDescr) - 1] = 0; // Just in case. - return PcDescr; -} - -void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC) { - if (EF->__sanitizer_symbolize_pc) - Printf("%s", DescribePC(SymbolizedFMT, PC).c_str()); - else - Printf(FallbackFMT, PC); -} - -unsigned NumberOfCpuCores() { - unsigned N = std::thread::hardware_concurrency(); - if (!N) { - Printf("WARNING: std::thread::hardware_concurrency not well defined for " - "your platform. Assuming CPU count of 1.\n"); - N = 1; - } - return N; -} - -size_t SimpleFastHash(const uint8_t *Data, size_t Size) { - size_t Res = 0; - for (size_t i = 0; i < Size; i++) - Res = Res * 11 + Data[i]; - return Res; -} - -} // namespace fuzzer diff --git a/lib/Fuzzer/FuzzerUtil.h b/lib/Fuzzer/FuzzerUtil.h deleted file mode 100644 index 9c90040b09cb4..0000000000000 --- a/lib/Fuzzer/FuzzerUtil.h +++ /dev/null @@ -1,84 +0,0 @@ -//===- FuzzerUtil.h - Internal header for the Fuzzer Utils ------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Util functions. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_UTIL_H -#define LLVM_FUZZER_UTIL_H - -#include "FuzzerDefs.h" - -namespace fuzzer { - -void PrintHexArray(const Unit &U, const char *PrintAfter = ""); - -void PrintHexArray(const uint8_t *Data, size_t Size, - const char *PrintAfter = ""); - -void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = ""); - -void PrintASCII(const Unit &U, const char *PrintAfter = ""); - -// Changes U to contain only ASCII (isprint+isspace) characters. -// Returns true iff U has been changed. -bool ToASCII(uint8_t *Data, size_t Size); - -bool IsASCII(const Unit &U); - -bool IsASCII(const uint8_t *Data, size_t Size); - -std::string Base64(const Unit &U); - -void PrintPC(const char *SymbolizedFMT, const char *FallbackFMT, uintptr_t PC); - -std::string DescribePC(const char *SymbolizedFMT, uintptr_t PC); - -unsigned NumberOfCpuCores(); - -// Platform specific functions. -void SetSignalHandler(const FuzzingOptions& Options); - -void SleepSeconds(int Seconds); - -unsigned long GetPid(); - -size_t GetPeakRSSMb(); - -int ExecuteCommand(const std::string &Command); - -FILE *OpenProcessPipe(const char *Command, const char *Mode); - -const void *SearchMemory(const void *haystack, size_t haystacklen, - const void *needle, size_t needlelen); - -std::string CloneArgsWithoutX(const std::vector &Args, - const char *X1, const char *X2); - -inline std::string CloneArgsWithoutX(const std::vector &Args, - const char *X) { - return CloneArgsWithoutX(Args, X, X); -} - -inline std::pair SplitBefore(std::string X, - std::string S) { - auto Pos = S.find(X); - if (Pos == std::string::npos) - return std::make_pair(S, ""); - return std::make_pair(S.substr(0, Pos), S.substr(Pos)); -} - -std::string DisassembleCmd(const std::string &FileName); - -std::string SearchRegexCmd(const std::string &Regex); - -size_t SimpleFastHash(const uint8_t *Data, size_t Size); - -} // namespace fuzzer - -#endif // LLVM_FUZZER_UTIL_H diff --git a/lib/Fuzzer/FuzzerUtilDarwin.cpp b/lib/Fuzzer/FuzzerUtilDarwin.cpp deleted file mode 100644 index 2df4872a92069..0000000000000 --- a/lib/Fuzzer/FuzzerUtilDarwin.cpp +++ /dev/null @@ -1,161 +0,0 @@ -//===- FuzzerUtilDarwin.cpp - Misc utils ----------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Misc utils for Darwin. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_APPLE - -#include "FuzzerIO.h" -#include -#include -#include -#include -#include -#include - -// There is no header for this on macOS so declare here -extern "C" char **environ; - -namespace fuzzer { - -static std::mutex SignalMutex; -// Global variables used to keep track of how signal handling should be -// restored. They should **not** be accessed without holding `SignalMutex`. -static int ActiveThreadCount = 0; -static struct sigaction OldSigIntAction; -static struct sigaction OldSigQuitAction; -static sigset_t OldBlockedSignalsSet; - -// This is a reimplementation of Libc's `system()`. On Darwin the Libc -// implementation contains a mutex which prevents it from being used -// concurrently. This implementation **can** be used concurrently. It sets the -// signal handlers when the first thread enters and restores them when the last -// thread finishes execution of the function and ensures this is not racey by -// using a mutex. -int ExecuteCommand(const std::string &Command) { - posix_spawnattr_t SpawnAttributes; - if (posix_spawnattr_init(&SpawnAttributes)) - return -1; - // Block and ignore signals of the current process when the first thread - // enters. - { - std::lock_guard Lock(SignalMutex); - if (ActiveThreadCount == 0) { - static struct sigaction IgnoreSignalAction; - sigset_t BlockedSignalsSet; - memset(&IgnoreSignalAction, 0, sizeof(IgnoreSignalAction)); - IgnoreSignalAction.sa_handler = SIG_IGN; - - if (sigaction(SIGINT, &IgnoreSignalAction, &OldSigIntAction) == -1) { - Printf("Failed to ignore SIGINT\n"); - (void)posix_spawnattr_destroy(&SpawnAttributes); - return -1; - } - if (sigaction(SIGQUIT, &IgnoreSignalAction, &OldSigQuitAction) == -1) { - Printf("Failed to ignore SIGQUIT\n"); - // Try our best to restore the signal handlers. - (void)sigaction(SIGINT, &OldSigIntAction, NULL); - (void)posix_spawnattr_destroy(&SpawnAttributes); - return -1; - } - - (void)sigemptyset(&BlockedSignalsSet); - (void)sigaddset(&BlockedSignalsSet, SIGCHLD); - if (sigprocmask(SIG_BLOCK, &BlockedSignalsSet, &OldBlockedSignalsSet) == - -1) { - Printf("Failed to block SIGCHLD\n"); - // Try our best to restore the signal handlers. - (void)sigaction(SIGQUIT, &OldSigQuitAction, NULL); - (void)sigaction(SIGINT, &OldSigIntAction, NULL); - (void)posix_spawnattr_destroy(&SpawnAttributes); - return -1; - } - } - ++ActiveThreadCount; - } - - // NOTE: Do not introduce any new `return` statements past this - // point. It is important that `ActiveThreadCount` always be decremented - // when leaving this function. - - // Make sure the child process uses the default handlers for the - // following signals rather than inheriting what the parent has. - sigset_t DefaultSigSet; - (void)sigemptyset(&DefaultSigSet); - (void)sigaddset(&DefaultSigSet, SIGQUIT); - (void)sigaddset(&DefaultSigSet, SIGINT); - (void)posix_spawnattr_setsigdefault(&SpawnAttributes, &DefaultSigSet); - // Make sure the child process doesn't block SIGCHLD - (void)posix_spawnattr_setsigmask(&SpawnAttributes, &OldBlockedSignalsSet); - short SpawnFlags = POSIX_SPAWN_SETSIGDEF | POSIX_SPAWN_SETSIGMASK; - (void)posix_spawnattr_setflags(&SpawnAttributes, SpawnFlags); - - pid_t Pid; - char **Environ = environ; // Read from global - const char *CommandCStr = Command.c_str(); - char *const Argv[] = { - strdup("sh"), - strdup("-c"), - strdup(CommandCStr), - NULL - }; - int ErrorCode = 0, ProcessStatus = 0; - // FIXME: We probably shouldn't hardcode the shell path. - ErrorCode = posix_spawn(&Pid, "/bin/sh", NULL, &SpawnAttributes, - Argv, Environ); - (void)posix_spawnattr_destroy(&SpawnAttributes); - if (!ErrorCode) { - pid_t SavedPid = Pid; - do { - // Repeat until call completes uninterrupted. - Pid = waitpid(SavedPid, &ProcessStatus, /*options=*/0); - } while (Pid == -1 && errno == EINTR); - if (Pid == -1) { - // Fail for some other reason. - ProcessStatus = -1; - } - } else if (ErrorCode == ENOMEM || ErrorCode == EAGAIN) { - // Fork failure. - ProcessStatus = -1; - } else { - // Shell execution failure. - ProcessStatus = W_EXITCODE(127, 0); - } - for (unsigned i = 0, n = sizeof(Argv) / sizeof(Argv[0]); i < n; ++i) - free(Argv[i]); - - // Restore the signal handlers of the current process when the last thread - // using this function finishes. - { - std::lock_guard Lock(SignalMutex); - --ActiveThreadCount; - if (ActiveThreadCount == 0) { - bool FailedRestore = false; - if (sigaction(SIGINT, &OldSigIntAction, NULL) == -1) { - Printf("Failed to restore SIGINT handling\n"); - FailedRestore = true; - } - if (sigaction(SIGQUIT, &OldSigQuitAction, NULL) == -1) { - Printf("Failed to restore SIGQUIT handling\n"); - FailedRestore = true; - } - if (sigprocmask(SIG_BLOCK, &OldBlockedSignalsSet, NULL) == -1) { - Printf("Failed to unblock SIGCHLD\n"); - FailedRestore = true; - } - if (FailedRestore) - ProcessStatus = -1; - } - } - return ProcessStatus; -} - -} // namespace fuzzer - -#endif // LIBFUZZER_APPLE diff --git a/lib/Fuzzer/FuzzerUtilLinux.cpp b/lib/Fuzzer/FuzzerUtilLinux.cpp deleted file mode 100644 index dfe7e6f4e18a3..0000000000000 --- a/lib/Fuzzer/FuzzerUtilLinux.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===- FuzzerUtilLinux.cpp - Misc utils for Linux. ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Misc utils for Linux. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_LINUX - -#include - -namespace fuzzer { - -int ExecuteCommand(const std::string &Command) { - return system(Command.c_str()); -} - -} // namespace fuzzer - -#endif // LIBFUZZER_LINUX diff --git a/lib/Fuzzer/FuzzerUtilPosix.cpp b/lib/Fuzzer/FuzzerUtilPosix.cpp deleted file mode 100644 index bc85264ac187d..0000000000000 --- a/lib/Fuzzer/FuzzerUtilPosix.cpp +++ /dev/null @@ -1,144 +0,0 @@ -//===- FuzzerUtilPosix.cpp - Misc utils for Posix. ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Misc utils implementation using Posix API. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_POSIX -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fuzzer { - -static void AlarmHandler(int, siginfo_t *, void *) { - Fuzzer::StaticAlarmCallback(); -} - -static void CrashHandler(int, siginfo_t *, void *) { - Fuzzer::StaticCrashSignalCallback(); -} - -static void InterruptHandler(int, siginfo_t *, void *) { - Fuzzer::StaticInterruptCallback(); -} - -static void FileSizeExceedHandler(int, siginfo_t *, void *) { - Fuzzer::StaticFileSizeExceedCallback(); -} - -static void SetSigaction(int signum, - void (*callback)(int, siginfo_t *, void *)) { - struct sigaction sigact = {}; - if (sigaction(signum, nullptr, &sigact)) { - Printf("libFuzzer: sigaction failed with %d\n", errno); - exit(1); - } - if (sigact.sa_flags & SA_SIGINFO) { - if (sigact.sa_sigaction) - return; - } else { - if (sigact.sa_handler != SIG_DFL && sigact.sa_handler != SIG_IGN && - sigact.sa_handler != SIG_ERR) - return; - } - - sigact = {}; - sigact.sa_sigaction = callback; - if (sigaction(signum, &sigact, 0)) { - Printf("libFuzzer: sigaction failed with %d\n", errno); - exit(1); - } -} - -void SetTimer(int Seconds) { - struct itimerval T { - {Seconds, 0}, { Seconds, 0 } - }; - if (setitimer(ITIMER_REAL, &T, nullptr)) { - Printf("libFuzzer: setitimer failed with %d\n", errno); - exit(1); - } - SetSigaction(SIGALRM, AlarmHandler); -} - -void SetSignalHandler(const FuzzingOptions& Options) { - if (Options.UnitTimeoutSec > 0) - SetTimer(Options.UnitTimeoutSec / 2 + 1); - if (Options.HandleInt) - SetSigaction(SIGINT, InterruptHandler); - if (Options.HandleTerm) - SetSigaction(SIGTERM, InterruptHandler); - if (Options.HandleSegv) - SetSigaction(SIGSEGV, CrashHandler); - if (Options.HandleBus) - SetSigaction(SIGBUS, CrashHandler); - if (Options.HandleAbrt) - SetSigaction(SIGABRT, CrashHandler); - if (Options.HandleIll) - SetSigaction(SIGILL, CrashHandler); - if (Options.HandleFpe) - SetSigaction(SIGFPE, CrashHandler); - if (Options.HandleXfsz) - SetSigaction(SIGXFSZ, FileSizeExceedHandler); -} - -void SleepSeconds(int Seconds) { - sleep(Seconds); // Use C API to avoid coverage from instrumented libc++. -} - -unsigned long GetPid() { return (unsigned long)getpid(); } - -size_t GetPeakRSSMb() { - struct rusage usage; - if (getrusage(RUSAGE_SELF, &usage)) - return 0; - if (LIBFUZZER_LINUX) { - // ru_maxrss is in KiB - return usage.ru_maxrss >> 10; - } else if (LIBFUZZER_APPLE) { - // ru_maxrss is in bytes - return usage.ru_maxrss >> 20; - } - assert(0 && "GetPeakRSSMb() is not implemented for your platform"); - return 0; -} - -FILE *OpenProcessPipe(const char *Command, const char *Mode) { - return popen(Command, Mode); -} - -const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, - size_t PattLen) { - return memmem(Data, DataLen, Patt, PattLen); -} - -std::string DisassembleCmd(const std::string &FileName) { - return "objdump -d " + FileName; -} - -std::string SearchRegexCmd(const std::string &Regex) { - return "grep '" + Regex + "'"; -} - -} // namespace fuzzer - -#endif // LIBFUZZER_POSIX diff --git a/lib/Fuzzer/FuzzerUtilWindows.cpp b/lib/Fuzzer/FuzzerUtilWindows.cpp deleted file mode 100644 index 25ac976fc2dbb..0000000000000 --- a/lib/Fuzzer/FuzzerUtilWindows.cpp +++ /dev/null @@ -1,193 +0,0 @@ -//===- FuzzerUtilWindows.cpp - Misc utils for Windows. --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Misc utils implementation for Windows. -//===----------------------------------------------------------------------===// -#include "FuzzerDefs.h" -#if LIBFUZZER_WINDOWS -#include "FuzzerIO.h" -#include "FuzzerInternal.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// This must be included after windows.h. -#include - -namespace fuzzer { - -static const FuzzingOptions* HandlerOpt = nullptr; - -static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) { - switch (ExceptionInfo->ExceptionRecord->ExceptionCode) { - case EXCEPTION_ACCESS_VIOLATION: - case EXCEPTION_ARRAY_BOUNDS_EXCEEDED: - case EXCEPTION_STACK_OVERFLOW: - if (HandlerOpt->HandleSegv) - Fuzzer::StaticCrashSignalCallback(); - break; - case EXCEPTION_DATATYPE_MISALIGNMENT: - case EXCEPTION_IN_PAGE_ERROR: - if (HandlerOpt->HandleBus) - Fuzzer::StaticCrashSignalCallback(); - break; - case EXCEPTION_ILLEGAL_INSTRUCTION: - case EXCEPTION_PRIV_INSTRUCTION: - if (HandlerOpt->HandleIll) - Fuzzer::StaticCrashSignalCallback(); - break; - case EXCEPTION_FLT_DENORMAL_OPERAND: - case EXCEPTION_FLT_DIVIDE_BY_ZERO: - case EXCEPTION_FLT_INEXACT_RESULT: - case EXCEPTION_FLT_INVALID_OPERATION: - case EXCEPTION_FLT_OVERFLOW: - case EXCEPTION_FLT_STACK_CHECK: - case EXCEPTION_FLT_UNDERFLOW: - case EXCEPTION_INT_DIVIDE_BY_ZERO: - case EXCEPTION_INT_OVERFLOW: - if (HandlerOpt->HandleFpe) - Fuzzer::StaticCrashSignalCallback(); - break; - // TODO: handle (Options.HandleXfsz) - } - return EXCEPTION_CONTINUE_SEARCH; -} - -BOOL WINAPI CtrlHandler(DWORD dwCtrlType) { - switch (dwCtrlType) { - case CTRL_C_EVENT: - if (HandlerOpt->HandleInt) - Fuzzer::StaticInterruptCallback(); - return TRUE; - case CTRL_BREAK_EVENT: - if (HandlerOpt->HandleTerm) - Fuzzer::StaticInterruptCallback(); - return TRUE; - } - return FALSE; -} - -void CALLBACK AlarmHandler(PVOID, BOOLEAN) { - Fuzzer::StaticAlarmCallback(); -} - -class TimerQ { - HANDLE TimerQueue; - public: - TimerQ() : TimerQueue(NULL) {}; - ~TimerQ() { - if (TimerQueue) - DeleteTimerQueueEx(TimerQueue, NULL); - }; - void SetTimer(int Seconds) { - if (!TimerQueue) { - TimerQueue = CreateTimerQueue(); - if (!TimerQueue) { - Printf("libFuzzer: CreateTimerQueue failed.\n"); - exit(1); - } - } - HANDLE Timer; - if (!CreateTimerQueueTimer(&Timer, TimerQueue, AlarmHandler, NULL, - Seconds*1000, Seconds*1000, 0)) { - Printf("libFuzzer: CreateTimerQueueTimer failed.\n"); - exit(1); - } - }; -}; - -static TimerQ Timer; - -static void CrashHandler(int) { Fuzzer::StaticCrashSignalCallback(); } - -void SetSignalHandler(const FuzzingOptions& Options) { - HandlerOpt = &Options; - - if (Options.UnitTimeoutSec > 0) - Timer.SetTimer(Options.UnitTimeoutSec / 2 + 1); - - if (Options.HandleInt || Options.HandleTerm) - if (!SetConsoleCtrlHandler(CtrlHandler, TRUE)) { - DWORD LastError = GetLastError(); - Printf("libFuzzer: SetConsoleCtrlHandler failed (Error code: %lu).\n", - LastError); - exit(1); - } - - if (Options.HandleSegv || Options.HandleBus || Options.HandleIll || - Options.HandleFpe) - SetUnhandledExceptionFilter(ExceptionHandler); - - if (Options.HandleAbrt) - if (SIG_ERR == signal(SIGABRT, CrashHandler)) { - Printf("libFuzzer: signal failed with %d\n", errno); - exit(1); - } -} - -void SleepSeconds(int Seconds) { Sleep(Seconds * 1000); } - -unsigned long GetPid() { return GetCurrentProcessId(); } - -size_t GetPeakRSSMb() { - PROCESS_MEMORY_COUNTERS info; - if (!GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info))) - return 0; - return info.PeakWorkingSetSize >> 20; -} - -FILE *OpenProcessPipe(const char *Command, const char *Mode) { - return _popen(Command, Mode); -} - -int ExecuteCommand(const std::string &Command) { - return system(Command.c_str()); -} - -const void *SearchMemory(const void *Data, size_t DataLen, const void *Patt, - size_t PattLen) { - // TODO: make this implementation more efficient. - const char *Cdata = (const char *)Data; - const char *Cpatt = (const char *)Patt; - - if (!Data || !Patt || DataLen == 0 || PattLen == 0 || DataLen < PattLen) - return NULL; - - if (PattLen == 1) - return memchr(Data, *Cpatt, DataLen); - - const char *End = Cdata + DataLen - PattLen + 1; - - for (const char *It = Cdata; It < End; ++It) - if (It[0] == Cpatt[0] && memcmp(It, Cpatt, PattLen) == 0) - return It; - - return NULL; -} - -std::string DisassembleCmd(const std::string &FileName) { - if (ExecuteCommand("dumpbin /summary > nul") == 0) - return "dumpbin /disasm " + FileName; - Printf("libFuzzer: couldn't find tool to disassemble (dumpbin)\n"); - exit(1); -} - -std::string SearchRegexCmd(const std::string &Regex) { - return "findstr /r \"" + Regex + "\""; -} - -} // namespace fuzzer - -#endif // LIBFUZZER_WINDOWS diff --git a/lib/Fuzzer/FuzzerValueBitMap.h b/lib/Fuzzer/FuzzerValueBitMap.h deleted file mode 100644 index 13d7cbd95dd73..0000000000000 --- a/lib/Fuzzer/FuzzerValueBitMap.h +++ /dev/null @@ -1,73 +0,0 @@ -//===- FuzzerValueBitMap.h - INTERNAL - Bit map -----------------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// ValueBitMap. -//===----------------------------------------------------------------------===// - -#ifndef LLVM_FUZZER_VALUE_BIT_MAP_H -#define LLVM_FUZZER_VALUE_BIT_MAP_H - -#include "FuzzerDefs.h" - -namespace fuzzer { - -// A bit map containing kMapSizeInWords bits. -struct ValueBitMap { - static const size_t kMapSizeInBits = 1 << 16; - static const size_t kMapPrimeMod = 65371; // Largest Prime < kMapSizeInBits; - static const size_t kBitsInWord = (sizeof(uintptr_t) * 8); - static const size_t kMapSizeInWords = kMapSizeInBits / kBitsInWord; - public: - - // Clears all bits. - void Reset() { memset(Map, 0, sizeof(Map)); } - - // Computes a hash function of Value and sets the corresponding bit. - // Returns true if the bit was changed from 0 to 1. - ATTRIBUTE_NO_SANITIZE_ALL - inline bool AddValue(uintptr_t Value) { - uintptr_t Idx = Value % kMapSizeInBits; - uintptr_t WordIdx = Idx / kBitsInWord; - uintptr_t BitIdx = Idx % kBitsInWord; - uintptr_t Old = Map[WordIdx]; - uintptr_t New = Old | (1UL << BitIdx); - Map[WordIdx] = New; - return New != Old; - } - - ATTRIBUTE_NO_SANITIZE_ALL - inline bool AddValueModPrime(uintptr_t Value) { - return AddValue(Value % kMapPrimeMod); - } - - inline bool Get(uintptr_t Idx) { - assert(Idx < kMapSizeInBits); - uintptr_t WordIdx = Idx / kBitsInWord; - uintptr_t BitIdx = Idx % kBitsInWord; - return Map[WordIdx] & (1UL << BitIdx); - } - - size_t SizeInBits() const { return kMapSizeInBits; } - - template - ATTRIBUTE_NO_SANITIZE_ALL - void ForEach(Callback CB) const { - for (size_t i = 0; i < kMapSizeInWords; i++) - if (uintptr_t M = Map[i]) - for (size_t j = 0; j < sizeof(M) * 8; j++) - if (M & ((uintptr_t)1 << j)) - CB(i * sizeof(M) * 8 + j); - } - - private: - uintptr_t Map[kMapSizeInWords] __attribute__((aligned(512))); -}; - -} // namespace fuzzer - -#endif // LLVM_FUZZER_VALUE_BIT_MAP_H diff --git a/lib/Fuzzer/README.txt b/lib/Fuzzer/README.txt index 4932d616e5e3c..53ac637638f64 100644 --- a/lib/Fuzzer/README.txt +++ b/lib/Fuzzer/README.txt @@ -1,5 +1 @@ libFuzzer was moved to compiler-rt in https://reviews.llvm.org/D36908. -All future changes should be directed there. - -The copy of sources is temporarily left in this folder for the duration of a -move. diff --git a/lib/Fuzzer/afl/afl_driver.cpp b/lib/Fuzzer/afl/afl_driver.cpp deleted file mode 100644 index 15bceb896e175..0000000000000 --- a/lib/Fuzzer/afl/afl_driver.cpp +++ /dev/null @@ -1,335 +0,0 @@ -//===- afl_driver.cpp - a glue between AFL and libFuzzer --------*- C++ -* ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -//===----------------------------------------------------------------------===// - -/* This file allows to fuzz libFuzzer-style target functions - (LLVMFuzzerTestOneInput) with AFL using AFL's persistent (in-process) mode. - -Usage: -################################################################################ -cat << EOF > test_fuzzer.cc -#include -#include -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - if (size > 0 && data[0] == 'H') - if (size > 1 && data[1] == 'I') - if (size > 2 && data[2] == '!') - __builtin_trap(); - return 0; -} -EOF -# Build your target with -fsanitize-coverage=trace-pc-guard using fresh clang. -clang -g -fsanitize-coverage=trace-pc-guard test_fuzzer.cc -c -# Build afl-llvm-rt.o.c from the AFL distribution. -clang -c -w $AFL_HOME/llvm_mode/afl-llvm-rt.o.c -# Build this file, link it with afl-llvm-rt.o.o and the target code. -clang++ afl_driver.cpp test_fuzzer.o afl-llvm-rt.o.o -# Run AFL: -rm -rf IN OUT; mkdir IN OUT; echo z > IN/z; -$AFL_HOME/afl-fuzz -i IN -o OUT ./a.out -################################################################################ -Environment Variables: -There are a few environment variables that can be set to use features that -afl-fuzz doesn't have. - -AFL_DRIVER_STDERR_DUPLICATE_FILENAME: Setting this *appends* stderr to the file -specified. If the file does not exist, it is created. This is useful for getting -stack traces (when using ASAN for example) or original error messages on hard to -reproduce bugs. - -AFL_DRIVER_EXTRA_STATS_FILENAME: Setting this causes afl_driver to write extra -statistics to the file specified. Currently these are peak_rss_mb -(the peak amount of virtual memory used in MB) and slowest_unit_time_secs. If -the file does not exist it is created. If the file does exist then -afl_driver assumes it was restarted by afl-fuzz and will try to read old -statistics from the file. If that fails then the process will quit. - -*/ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -// Platform detection. Copied from FuzzerInternal.h -#ifdef __linux__ -#define LIBFUZZER_LINUX 1 -#define LIBFUZZER_APPLE 0 -#elif __APPLE__ -#define LIBFUZZER_LINUX 0 -#define LIBFUZZER_APPLE 1 -#else -#error "Support for your platform has not been implemented" -#endif - -// Used to avoid repeating error checking boilerplate. If cond is false, a -// fatal error has occured in the program. In this event print error_message -// to stderr and abort(). Otherwise do nothing. Note that setting -// AFL_DRIVER_STDERR_DUPLICATE_FILENAME may cause error_message to be appended -// to the file as well, if the error occurs after the duplication is performed. -#define CHECK_ERROR(cond, error_message) \ - if (!(cond)) { \ - fprintf(stderr, (error_message)); \ - abort(); \ - } - -// libFuzzer interface is thin, so we don't include any libFuzzer headers. -extern "C" { -int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); -__attribute__((weak)) int LLVMFuzzerInitialize(int *argc, char ***argv); -} - -// Notify AFL about persistent mode. -static volatile char AFL_PERSISTENT[] = "##SIG_AFL_PERSISTENT##"; -extern "C" int __afl_persistent_loop(unsigned int); -static volatile char suppress_warning2 = AFL_PERSISTENT[0]; - -// Notify AFL about deferred forkserver. -static volatile char AFL_DEFER_FORKSVR[] = "##SIG_AFL_DEFER_FORKSRV##"; -extern "C" void __afl_manual_init(); -static volatile char suppress_warning1 = AFL_DEFER_FORKSVR[0]; - -// Input buffer. -static const size_t kMaxAflInputSize = 1 << 20; -static uint8_t AflInputBuf[kMaxAflInputSize]; - -// Variables we need for writing to the extra stats file. -static FILE *extra_stats_file = NULL; -static uint32_t previous_peak_rss = 0; -static time_t slowest_unit_time_secs = 0; -static const int kNumExtraStats = 2; -static const char *kExtraStatsFormatString = "peak_rss_mb : %u\n" - "slowest_unit_time_sec : %u\n"; - -// Copied from FuzzerUtil.cpp. -size_t GetPeakRSSMb() { - struct rusage usage; - if (getrusage(RUSAGE_SELF, &usage)) - return 0; - if (LIBFUZZER_LINUX) { - // ru_maxrss is in KiB - return usage.ru_maxrss >> 10; - } else if (LIBFUZZER_APPLE) { - // ru_maxrss is in bytes - return usage.ru_maxrss >> 20; - } - assert(0 && "GetPeakRSSMb() is not implemented for your platform"); - return 0; -} - -// Based on SetSigaction in FuzzerUtil.cpp -static void SetSigaction(int signum, - void (*callback)(int, siginfo_t *, void *)) { - struct sigaction sigact; - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_sigaction = callback; - if (sigaction(signum, &sigact, 0)) { - fprintf(stderr, "libFuzzer: sigaction failed with %d\n", errno); - exit(1); - } -} - -// Write extra stats to the file specified by the user. If none is specified -// this function will never be called. -static void write_extra_stats() { - uint32_t peak_rss = GetPeakRSSMb(); - - if (peak_rss < previous_peak_rss) - peak_rss = previous_peak_rss; - - int chars_printed = fprintf(extra_stats_file, kExtraStatsFormatString, - peak_rss, slowest_unit_time_secs); - - CHECK_ERROR(chars_printed != 0, "Failed to write extra_stats_file"); - - CHECK_ERROR(fclose(extra_stats_file) == 0, - "Failed to close extra_stats_file"); -} - -// Call write_extra_stats before we exit. -static void crash_handler(int, siginfo_t *, void *) { - // Make sure we don't try calling write_extra_stats again if we crashed while - // trying to call it. - static bool first_crash = true; - CHECK_ERROR(first_crash, - "Crashed in crash signal handler. This is a bug in the fuzzer."); - - first_crash = false; - write_extra_stats(); -} - -// If the user has specified an extra_stats_file through the environment -// variable AFL_DRIVER_EXTRA_STATS_FILENAME, then perform necessary set up -// to write stats to it on exit. If no file is specified, do nothing. Otherwise -// install signal and exit handlers to write to the file when the process exits. -// Then if the file doesn't exist create it and set extra stats to 0. But if it -// does exist then read the initial values of the extra stats from the file -// and check that the file is writable. -static void maybe_initialize_extra_stats() { - // If AFL_DRIVER_EXTRA_STATS_FILENAME isn't set then we have nothing to do. - char *extra_stats_filename = getenv("AFL_DRIVER_EXTRA_STATS_FILENAME"); - if (!extra_stats_filename) - return; - - // Open the file and find the previous peak_rss_mb value. - // This is necessary because the fuzzing process is restarted after N - // iterations are completed. So we may need to get this value from a previous - // process to be accurate. - extra_stats_file = fopen(extra_stats_filename, "r"); - - // If extra_stats_file already exists: read old stats from it. - if (extra_stats_file) { - int matches = fscanf(extra_stats_file, kExtraStatsFormatString, - &previous_peak_rss, &slowest_unit_time_secs); - - // Make sure we have read a real extra stats file and that we have used it - // to set slowest_unit_time_secs and previous_peak_rss. - CHECK_ERROR(matches == kNumExtraStats, "Extra stats file is corrupt"); - - CHECK_ERROR(fclose(extra_stats_file) == 0, "Failed to close file"); - - // Now open the file for writing. - extra_stats_file = fopen(extra_stats_filename, "w"); - CHECK_ERROR(extra_stats_file, - "Failed to open extra stats file for writing"); - } else { - // Looks like this is the first time in a fuzzing job this is being called. - extra_stats_file = fopen(extra_stats_filename, "w+"); - CHECK_ERROR(extra_stats_file, "failed to create extra stats file"); - } - - // Make sure that crash_handler gets called on any kind of fatal error. - int crash_signals[] = {SIGSEGV, SIGBUS, SIGABRT, SIGILL, SIGFPE, SIGINT, - SIGTERM}; - - const size_t num_signals = sizeof(crash_signals) / sizeof(crash_signals[0]); - - for (size_t idx = 0; idx < num_signals; idx++) - SetSigaction(crash_signals[idx], crash_handler); - - // Make sure it gets called on other kinds of exits. - atexit(write_extra_stats); -} - -// If the user asks us to duplicate stderr, then do it. -static void maybe_duplicate_stderr() { - char* stderr_duplicate_filename = - getenv("AFL_DRIVER_STDERR_DUPLICATE_FILENAME"); - - if (!stderr_duplicate_filename) - return; - - FILE* stderr_duplicate_stream = - freopen(stderr_duplicate_filename, "a+", stderr); - - if (!stderr_duplicate_stream) { - fprintf( - stderr, - "Failed to duplicate stderr to AFL_DRIVER_STDERR_DUPLICATE_FILENAME"); - abort(); - } -} - -// Define LLVMFuzzerMutate to avoid link failures for targets that use it -// with libFuzzer's LLVMFuzzerCustomMutator. -extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { - assert(false && "LLVMFuzzerMutate should not be called from afl_driver"); - return 0; -} - -// Execute any files provided as parameters. -int ExecuteFilesOnyByOne(int argc, char **argv) { - for (int i = 1; i < argc; i++) { - std::ifstream in(argv[i]); - in.seekg(0, in.end); - size_t length = in.tellg(); - in.seekg (0, in.beg); - std::cout << "Reading " << length << " bytes from " << argv[i] << std::endl; - // Allocate exactly length bytes so that we reliably catch buffer overflows. - std::vector bytes(length); - in.read(bytes.data(), bytes.size()); - assert(in); - LLVMFuzzerTestOneInput(reinterpret_cast(bytes.data()), - bytes.size()); - std::cout << "Execution successfull" << std::endl; - } - return 0; -} - -int main(int argc, char **argv) { - fprintf(stderr, - "======================= INFO =========================\n" - "This binary is built for AFL-fuzz.\n" - "To run the target function on individual input(s) execute this:\n" - " %s < INPUT_FILE\n" - "or\n" - " %s INPUT_FILE1 [INPUT_FILE2 ... ]\n" - "To fuzz with afl-fuzz execute this:\n" - " afl-fuzz [afl-flags] %s [-N]\n" - "afl-fuzz will run N iterations before " - "re-spawning the process (default: 1000)\n" - "======================================================\n", - argv[0], argv[0], argv[0]); - if (LLVMFuzzerInitialize) - LLVMFuzzerInitialize(&argc, &argv); - // Do any other expensive one-time initialization here. - - maybe_duplicate_stderr(); - maybe_initialize_extra_stats(); - - __afl_manual_init(); - - int N = 1000; - if (argc == 2 && argv[1][0] == '-') - N = atoi(argv[1] + 1); - else if(argc == 2 && (N = atoi(argv[1])) > 0) - fprintf(stderr, "WARNING: using the deprecated call style `%s %d`\n", - argv[0], N); - else if (argc > 1) - return ExecuteFilesOnyByOne(argc, argv); - - assert(N > 0); - time_t unit_time_secs; - int num_runs = 0; - while (__afl_persistent_loop(N)) { - ssize_t n_read = read(0, AflInputBuf, kMaxAflInputSize); - if (n_read > 0) { - // Copy AflInputBuf into a separate buffer to let asan find buffer - // overflows. Don't use unique_ptr/etc to avoid extra dependencies. - uint8_t *copy = new uint8_t[n_read]; - memcpy(copy, AflInputBuf, n_read); - - struct timeval unit_start_time; - CHECK_ERROR(gettimeofday(&unit_start_time, NULL) == 0, - "Calling gettimeofday failed"); - - num_runs++; - LLVMFuzzerTestOneInput(copy, n_read); - - struct timeval unit_stop_time; - CHECK_ERROR(gettimeofday(&unit_stop_time, NULL) == 0, - "Calling gettimeofday failed"); - - // Update slowest_unit_time_secs if we see a new max. - unit_time_secs = unit_stop_time.tv_sec - unit_start_time.tv_sec; - if (slowest_unit_time_secs < unit_time_secs) - slowest_unit_time_secs = unit_time_secs; - - delete[] copy; - } - } - fprintf(stderr, "%s: successfully executed %d input(s)\n", argv[0], num_runs); -} diff --git a/lib/Fuzzer/build.sh b/lib/Fuzzer/build.sh deleted file mode 100755 index 4556af5daf7db..0000000000000 --- a/lib/Fuzzer/build.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -LIBFUZZER_SRC_DIR=$(dirname $0) -CXX="${CXX:-clang}" -for f in $LIBFUZZER_SRC_DIR/*.cpp; do - $CXX -g -O2 -fno-omit-frame-pointer -std=c++11 $f -c & -done -wait -rm -f libFuzzer.a -ar ru libFuzzer.a Fuzzer*.o -rm -f Fuzzer*.o - diff --git a/lib/Fuzzer/cxx.dict b/lib/Fuzzer/cxx.dict deleted file mode 100644 index 41350f47558b8..0000000000000 --- a/lib/Fuzzer/cxx.dict +++ /dev/null @@ -1,122 +0,0 @@ -"++" -"--" -"<<" -">>" -"+=" -"-=" -"*=" -"/=" -">>=" -"<<=" -"&=" -"|=" -"^=" -"%=" -"!=" -"&&" -"||" -"==" -">=" -"<=" -"->" -"alignas" -"alignof" -"and" -"and_eq" -"asm" -"auto" -"bitand" -"bitor" -"bool" -"break" -"case" -"catch" -"char" -"char16_t" -"char32_t" -"class" -"compl" -"concept" -"const" -"constexpr" -"const_cast" -"continue" -"decltype" -"default" -"delete" -"do" -"double" -"dynamic_cast" -"else" -"enum" -"explicit" -"export" -"extern" -"false" -"float" -"for" -"friend" -"goto" -"if" -"inline" -"int" -"long" -"mutable" -"namespace" -"new" -"noexcept" -"not" -"not_eq" -"nullptr" -"operator" -"or" -"or_eq" -"private" -"protected" -"public" -"register" -"reinterpret_cast" -"requires" -"return" -"short" -"signed" -"sizeof" -"static" -"static_assert" -"static_cast" -"struct" -"switch" -"template" -"this" -"thread_local" -"throw" -"true" -"try" -"typedef" -"typeid" -"typename" -"union" -"unsigned" -"using" -"virtual" -"void" -"volatile" -"wchar_t" -"while" -"xor" -"xor_eq" -"if" -"elif" -"else" -"endif" -"defined" -"ifdef" -"ifndef" -"define" -"undef" -"include" -"line" -"error" -"pragma" -"override" -"final" diff --git a/lib/Fuzzer/standalone/StandaloneFuzzTargetMain.c b/lib/Fuzzer/standalone/StandaloneFuzzTargetMain.c deleted file mode 100644 index 0d76ea49e7964..0000000000000 --- a/lib/Fuzzer/standalone/StandaloneFuzzTargetMain.c +++ /dev/null @@ -1,41 +0,0 @@ -/*===- StandaloneFuzzTargetMain.c - standalone main() for fuzz targets. ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This main() function can be linked to a fuzz target (i.e. a library -// that exports LLVMFuzzerTestOneInput() and possibly LLVMFuzzerInitialize()) -// instead of libFuzzer. This main() function will not perform any fuzzing -// but will simply feed all input files one by one to the fuzz target. -// -// Use this file to provide reproducers for bugs when linking against libFuzzer -// or other fuzzing engine is undesirable. -//===----------------------------------------------------------------------===*/ -#include -#include -#include - -extern int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size); -__attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); -int main(int argc, char **argv) { - fprintf(stderr, "StandaloneFuzzTargetMain: running %d inputs\n", argc - 1); - if (LLVMFuzzerInitialize) - LLVMFuzzerInitialize(&argc, &argv); - for (int i = 1; i < argc; i++) { - fprintf(stderr, "Running: %s\n", argv[i]); - FILE *f = fopen(argv[i], "r"); - assert(f); - fseek(f, 0, SEEK_END); - size_t len = ftell(f); - fseek(f, 0, SEEK_SET); - unsigned char *buf = (unsigned char*)malloc(len); - size_t n_read = fread(buf, 1, len, f); - assert(n_read == len); - LLVMFuzzerTestOneInput(buf, len); - free(buf); - fprintf(stderr, "Done: %s: (%zd bytes)\n", argv[i], n_read); - } -} diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index f351aa1d615a0..f6ed6a2116b83 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -373,7 +373,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::HHVM: Out << "hhvmcc"; break; case CallingConv::HHVM_C: Out << "hhvm_ccc"; break; case CallingConv::AMDGPU_VS: Out << "amdgpu_vs"; break; + case CallingConv::AMDGPU_LS: Out << "amdgpu_ls"; break; case CallingConv::AMDGPU_HS: Out << "amdgpu_hs"; break; + case CallingConv::AMDGPU_ES: Out << "amdgpu_es"; break; case CallingConv::AMDGPU_GS: Out << "amdgpu_gs"; break; case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break; case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break; diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 54b9761bd03f8..c8f1aaaccee36 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -790,14 +790,12 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const { // AttributeListImpl Definition //===----------------------------------------------------------------------===// -/// Map from AttributeList index to the internal array index. Adding one works: -/// FunctionIndex: ~0U -> 0 -/// ReturnIndex: 0 -> 1 -/// FirstArgIndex: 1.. -> 2.. +/// Map from AttributeList index to the internal array index. Adding one happens +/// to work, but it relies on unsigned integer wrapping. MSVC warns about +/// unsigned wrapping in constexpr functions, so write out the conditional. LLVM +/// folds it to add anyway. static constexpr unsigned attrIdxToArrayIdx(unsigned Index) { - // MSVC warns about '~0U + 1' wrapping around when this is called on - // FunctionIndex, so cast to int first. - return static_cast(Index) + 1; + return Index == AttributeList::FunctionIndex ? 0 : Index + 1; } AttributeListImpl::AttributeListImpl(LLVMContext &C, diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index 2a69912671e83..07d499bc19337 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Regex.h" #include @@ -2358,15 +2359,26 @@ Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { /// info. Return true if module is modified. bool llvm::UpgradeDebugInfo(Module &M) { unsigned Version = getDebugMetadataVersionFromModule(M); - if (Version == DEBUG_METADATA_VERSION) - return false; - - bool RetCode = StripDebugInfo(M); - if (RetCode) { + if (Version == DEBUG_METADATA_VERSION) { + bool BrokenDebugInfo = false; + if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo)) + report_fatal_error("Broken module found, compilation aborted!"); + if (!BrokenDebugInfo) + // Everything is ok. + return false; + else { + // Diagnose malformed debug info. + DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M); + M.getContext().diagnose(Diag); + } + } + bool Modified = StripDebugInfo(M); + if (Modified && Version != DEBUG_METADATA_VERSION) { + // Diagnose a version mismatch. DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); M.getContext().diagnose(DiagVersion); } - return RetCode; + return Modified; } bool llvm::UpgradeModuleFlags(Module &M) { @@ -2436,6 +2448,35 @@ bool llvm::UpgradeModuleFlags(Module &M) { return Changed; } +void llvm::UpgradeSectionAttributes(Module &M) { + auto TrimSpaces = [](StringRef Section) -> std::string { + SmallVector Components; + Section.split(Components, ','); + + SmallString<32> Buffer; + raw_svector_ostream OS(Buffer); + + for (auto Component : Components) + OS << ',' << Component.trim(); + + return OS.str().substr(1); + }; + + for (auto &GV : M.globals()) { + if (!GV.hasSection()) + continue; + + StringRef Section = GV.getSection(); + + if (!Section.startswith("__DATA, __objc_catlist")) + continue; + + // __DATA, __objc_catlist, regular, no_dead_strip + // __DATA,__objc_catlist,regular,no_dead_strip + GV.setSection(TrimSpaces(Section)); + } +} + static bool isOldLoopArgument(Metadata *MD) { auto *T = dyn_cast_or_null(MD); if (!T) diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 60dd20e4659f0..044cc1ff449e8 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -629,6 +629,15 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, if (ConstantExpr *CE = dyn_cast(V)) if (CE->getOpcode() == Instruction::GetElementPtr && CE->getOperand(0)->isNullValue()) { + // FIXME: Looks like getFoldedSizeOf(), getFoldedOffsetOf() and + // getFoldedAlignOf() don't handle the case when DestTy is a vector of + // pointers yet. We end up in asserts in CastInst::getCastOpcode (see + // test/Analysis/ConstantFolding/cast-vector.ll). I've only seen this + // happen in one "real" C-code test case, so it does not seem to be an + // important optimization to handle vectors here. For now, simply bail + // out. + if (DestTy->isVectorTy()) + return nullptr; GEPOperator *GEPO = cast(CE); Type *Ty = GEPO->getSourceElementType(); if (CE->getNumOperands() == 2) { diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index abb83e01e0c21..54c73b01acf4d 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -276,7 +276,8 @@ LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename, dest.close(); if (dest.has_error()) { - *ErrorMessage = strdup("Error printing to file"); + std::string E = "Error printing to file: " + dest.error().message(); + *ErrorMessage = strdup(E.c_str()); return true; } diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 88f5b36dd5836..18979a8d5cf5e 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/DIBuilder.h" +#include "llvm/IR/IRBuilder.h" #include "LLVMContextImpl.h" #include "llvm/ADT/STLExtras.h" #include "llvm/BinaryFormat/Dwarf.h" @@ -771,16 +772,59 @@ DILexicalBlock *DIBuilder::createLexicalBlock(DIScope *Scope, DIFile *File, File, Line, Col); } +Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo, + DIExpression *Expr, const DILocation *DL, + Instruction *InsertBefore) { + return insertDeclare(Storage, VarInfo, Expr, DL, InsertBefore->getParent(), + InsertBefore); +} + +Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo, + DIExpression *Expr, const DILocation *DL, + BasicBlock *InsertAtEnd) { + // If this block already has a terminator then insert this intrinsic before + // the terminator. Otherwise, put it at the end of the block. + Instruction *InsertBefore = InsertAtEnd->getTerminator(); + return insertDeclare(Storage, VarInfo, Expr, DL, InsertAtEnd, InsertBefore); +} + +Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, + DILocalVariable *VarInfo, + DIExpression *Expr, + const DILocation *DL, + Instruction *InsertBefore) { + return insertDbgValueIntrinsic( + V, VarInfo, Expr, DL, InsertBefore ? InsertBefore->getParent() : nullptr, + InsertBefore); +} + +Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, + DILocalVariable *VarInfo, + DIExpression *Expr, + const DILocation *DL, + BasicBlock *InsertAtEnd) { + return insertDbgValueIntrinsic(V, VarInfo, Expr, DL, InsertAtEnd, nullptr); +} + +/// Return an IRBuilder for inserting dbg.declare and dbg.value intrinsics. This +/// abstracts over the various ways to specify an insert position. +static IRBuilder<> getIRBForDbgInsertion(const DILocation *DL, + BasicBlock *InsertBB, + Instruction *InsertBefore) { + IRBuilder<> B(DL->getContext()); + if (InsertBefore) + B.SetInsertPoint(InsertBefore); + else if (InsertBB) + B.SetInsertPoint(InsertBB); + B.SetCurrentDebugLocation(DL); + return B; +} + static Value *getDbgIntrinsicValueImpl(LLVMContext &VMContext, Value *V) { assert(V && "no value passed to dbg intrinsic"); return MetadataAsValue::get(VMContext, ValueAsMetadata::get(V)); } -static Instruction *withDebugLoc(Instruction *I, const DILocation *DL) { - I->setDebugLoc(const_cast(DL)); - return I; -} - static Function *getDeclareIntrin(Module &M) { return Intrinsic::getDeclaration(&M, UseDbgAddr ? Intrinsic::dbg_addr : Intrinsic::dbg_declare); @@ -788,7 +832,7 @@ static Function *getDeclareIntrin(Module &M) { Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo, DIExpression *Expr, const DILocation *DL, - Instruction *InsertBefore) { + BasicBlock *InsertBB, Instruction *InsertBefore) { assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.declare"); assert(DL && "Expected debug loc"); assert(DL->getScope()->getSubprogram() == @@ -802,60 +846,14 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo, Value *Args[] = {getDbgIntrinsicValueImpl(VMContext, Storage), MetadataAsValue::get(VMContext, VarInfo), MetadataAsValue::get(VMContext, Expr)}; - return withDebugLoc(CallInst::Create(DeclareFn, Args, "", InsertBefore), DL); -} -Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo, - DIExpression *Expr, const DILocation *DL, - BasicBlock *InsertAtEnd) { - assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.declare"); - assert(DL && "Expected debug loc"); - assert(DL->getScope()->getSubprogram() == - VarInfo->getScope()->getSubprogram() && - "Expected matching subprograms"); - if (!DeclareFn) - DeclareFn = getDeclareIntrin(M); - - trackIfUnresolved(VarInfo); - trackIfUnresolved(Expr); - Value *Args[] = {getDbgIntrinsicValueImpl(VMContext, Storage), - MetadataAsValue::get(VMContext, VarInfo), - MetadataAsValue::get(VMContext, Expr)}; - - // If this block already has a terminator then insert this intrinsic - // before the terminator. - if (TerminatorInst *T = InsertAtEnd->getTerminator()) - return withDebugLoc(CallInst::Create(DeclareFn, Args, "", T), DL); - return withDebugLoc(CallInst::Create(DeclareFn, Args, "", InsertAtEnd), DL); -} - -Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, - DILocalVariable *VarInfo, - DIExpression *Expr, - const DILocation *DL, - Instruction *InsertBefore) { - assert(V && "no value passed to dbg.value"); - assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.value"); - assert(DL && "Expected debug loc"); - assert(DL->getScope()->getSubprogram() == - VarInfo->getScope()->getSubprogram() && - "Expected matching subprograms"); - if (!ValueFn) - ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - - trackIfUnresolved(VarInfo); - trackIfUnresolved(Expr); - Value *Args[] = {getDbgIntrinsicValueImpl(VMContext, V), - MetadataAsValue::get(VMContext, VarInfo), - MetadataAsValue::get(VMContext, Expr)}; - return withDebugLoc(CallInst::Create(ValueFn, Args, "", InsertBefore), DL); + IRBuilder<> B = getIRBForDbgInsertion(DL, InsertBB, InsertBefore); + return B.CreateCall(DeclareFn, Args); } -Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, - DILocalVariable *VarInfo, - DIExpression *Expr, - const DILocation *DL, - BasicBlock *InsertAtEnd) { +Instruction *DIBuilder::insertDbgValueIntrinsic( + Value *V, DILocalVariable *VarInfo, DIExpression *Expr, + const DILocation *DL, BasicBlock *InsertBB, Instruction *InsertBefore) { assert(V && "no value passed to dbg.value"); assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.value"); assert(DL && "Expected debug loc"); @@ -871,7 +869,8 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, MetadataAsValue::get(VMContext, VarInfo), MetadataAsValue::get(VMContext, Expr)}; - return withDebugLoc(CallInst::Create(ValueFn, Args, "", InsertAtEnd), DL); + IRBuilder<> B = getIRBForDbgInsertion(DL, InsertBB, InsertBefore); + return B.CreateCall(ValueFn, Args); } void DIBuilder::replaceVTableHolder(DICompositeType *&T, diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index 1dc6c5bdd51f4..ae044b3d28728 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -290,7 +290,7 @@ static MDNode *stripDebugLocFromLoopID(MDNode *N) { bool llvm::stripDebugInfo(Function &F) { bool Changed = false; - if (F.getSubprogram()) { + if (F.getMetadata(LLVMContext::MD_dbg)) { Changed = true; F.setSubprogram(nullptr); } @@ -669,3 +669,26 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) { return Val->getZExtValue(); return 0; } + +void Instruction::applyMergedLocation(const DILocation *LocA, + const DILocation *LocB) { + if (LocA && LocB && (LocA == LocB || !LocA->canDiscriminate(*LocB))) { + setDebugLoc(LocA); + return; + } + if (!LocA || !LocB || !isa(this)) { + setDebugLoc(nullptr); + return; + } + SmallPtrSet InlinedLocationsA; + for (DILocation *L = LocA->getInlinedAt(); L; L = L->getInlinedAt()) + InlinedLocationsA.insert(L); + const DILocation *Result = LocB; + for (DILocation *L = LocB->getInlinedAt(); L; L = L->getInlinedAt()) { + Result = L; + if (InlinedLocationsA.count(L)) + break; + } + setDebugLoc(DILocation::get( + Result->getContext(), 0, 0, Result->getScope(), Result->getInlinedAt())); +} diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index b033f4d545379..946df1a836ce8 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -341,3 +341,83 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const { OS << Arg.Val; return OS.str(); } + +namespace llvm { +namespace yaml { + +void MappingTraits::mapping( + IO &io, DiagnosticInfoOptimizationBase *&OptDiag) { + assert(io.outputting() && "input not yet implemented"); + + if (io.mapTag("!Passed", + (OptDiag->getKind() == DK_OptimizationRemark || + OptDiag->getKind() == DK_MachineOptimizationRemark))) + ; + else if (io.mapTag( + "!Missed", + (OptDiag->getKind() == DK_OptimizationRemarkMissed || + OptDiag->getKind() == DK_MachineOptimizationRemarkMissed))) + ; + else if (io.mapTag( + "!Analysis", + (OptDiag->getKind() == DK_OptimizationRemarkAnalysis || + OptDiag->getKind() == DK_MachineOptimizationRemarkAnalysis))) + ; + else if (io.mapTag("!AnalysisFPCommute", + OptDiag->getKind() == + DK_OptimizationRemarkAnalysisFPCommute)) + ; + else if (io.mapTag("!AnalysisAliasing", + OptDiag->getKind() == + DK_OptimizationRemarkAnalysisAliasing)) + ; + else if (io.mapTag("!Failure", OptDiag->getKind() == DK_OptimizationFailure)) + ; + else + llvm_unreachable("Unknown remark type"); + + // These are read-only for now. + DiagnosticLocation DL = OptDiag->getLocation(); + StringRef FN = + GlobalValue::dropLLVMManglingEscape(OptDiag->getFunction().getName()); + + StringRef PassName(OptDiag->PassName); + io.mapRequired("Pass", PassName); + io.mapRequired("Name", OptDiag->RemarkName); + if (!io.outputting() || DL.isValid()) + io.mapOptional("DebugLoc", DL); + io.mapRequired("Function", FN); + io.mapOptional("Hotness", OptDiag->Hotness); + io.mapOptional("Args", OptDiag->Args); +} + +template <> struct MappingTraits { + static void mapping(IO &io, DiagnosticLocation &DL) { + assert(io.outputting() && "input not yet implemented"); + + StringRef File = DL.getFilename(); + unsigned Line = DL.getLine(); + unsigned Col = DL.getColumn(); + + io.mapRequired("File", File); + io.mapRequired("Line", Line); + io.mapRequired("Column", Col); + } + + static const bool flow = true; +}; + +// Implement this as a mapping for now to get proper quotation for the value. +template <> struct MappingTraits { + static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) { + assert(io.outputting() && "input not yet implemented"); + io.mapRequired(A.Key.data(), A.Val); + if (A.Loc.isValid()) + io.mapOptional("DebugLoc", A.Loc); + } +}; + +} // end namespace yaml +} // end namespace llvm + +LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument) diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 85a019856c017..d47f63a9b157e 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -649,7 +649,10 @@ enum IIT_Info { IIT_VEC_OF_ANYPTRS_TO_ELT = 34, IIT_I128 = 35, IIT_V512 = 36, - IIT_V1024 = 37 + IIT_V1024 = 37, + IIT_STRUCT6 = 38, + IIT_STRUCT7 = 39, + IIT_STRUCT8 = 40 }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -798,6 +801,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, case IIT_EMPTYSTRUCT: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0)); return; + case IIT_STRUCT8: ++StructElts; LLVM_FALLTHROUGH; + case IIT_STRUCT7: ++StructElts; LLVM_FALLTHROUGH; + case IIT_STRUCT6: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT5: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT4: ++StructElts; LLVM_FALLTHROUGH; case IIT_STRUCT3: ++StructElts; LLVM_FALLTHROUGH; @@ -874,11 +880,10 @@ static Type *DecodeFixedType(ArrayRef &Infos, return PointerType::get(DecodeFixedType(Infos, Tys, Context), D.Pointer_AddressSpace); case IITDescriptor::Struct: { - Type *Elts[5]; - assert(D.Struct_NumElements <= 5 && "Can't handle this yet"); + SmallVector Elts; for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i) - Elts[i] = DecodeFixedType(Infos, Tys, Context); - return StructType::get(Context, makeArrayRef(Elts,D.Struct_NumElements)); + Elts.push_back(DecodeFixedType(Infos, Tys, Context)); + return StructType::get(Context, Elts); } case IITDescriptor::Argument: return Tys[D.getArgumentNumber()]; diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp index ad22efdf0effb..8667d7aab5838 100644 --- a/lib/IR/InlineAsm.cpp +++ b/lib/IR/InlineAsm.cpp @@ -163,6 +163,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, return true; // Note that operand #n has a matching input. scInfo.MatchingInput = ConstraintsSoFar.size(); + assert(scInfo.MatchingInput >= 0); } else { if (ConstraintsSoFar[N].hasMatchingInput() && (size_t)ConstraintsSoFar[N].MatchingInput != @@ -170,6 +171,7 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str, return true; // Note that operand #n has a matching input. ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size(); + assert(ConstraintsSoFar[N].MatchingInput >= 0); } } else if (*I == '|') { multipleAlternativeIndex++; diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 2c49564e328bd..490fcbce74398 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -2299,7 +2299,7 @@ bool CastInst::isLosslessCast() const { bool CastInst::isNoopCast(Instruction::CastOps Opcode, Type *SrcTy, Type *DestTy, - Type *IntPtrTy) { + const DataLayout &DL) { switch (Opcode) { default: llvm_unreachable("Invalid CastOp"); case Instruction::Trunc: @@ -2317,30 +2317,16 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode, case Instruction::BitCast: return true; // BitCast never modifies bits. case Instruction::PtrToInt: - return IntPtrTy->getScalarSizeInBits() == + return DL.getIntPtrType(SrcTy)->getScalarSizeInBits() == DestTy->getScalarSizeInBits(); case Instruction::IntToPtr: - return IntPtrTy->getScalarSizeInBits() == + return DL.getIntPtrType(DestTy)->getScalarSizeInBits() == SrcTy->getScalarSizeInBits(); } } -/// @brief Determine if a cast is a no-op. -bool CastInst::isNoopCast(Type *IntPtrTy) const { - return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); -} - bool CastInst::isNoopCast(const DataLayout &DL) const { - Type *PtrOpTy = nullptr; - if (getOpcode() == Instruction::PtrToInt) - PtrOpTy = getOperand(0)->getType(); - else if (getOpcode() == Instruction::IntToPtr) - PtrOpTy = getType(); - - Type *IntPtrTy = - PtrOpTy ? DL.getIntPtrType(PtrOpTy) : DL.getIntPtrType(getContext(), 0); - - return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); + return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), DL); } /// This function determines if a pair of casts can be eliminated and what @@ -2891,12 +2877,15 @@ bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { bool CastInst::isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL) { + // ptrtoint and inttoptr are not allowed on non-integral pointers if (auto *PtrTy = dyn_cast(SrcTy)) if (auto *IntTy = dyn_cast(DestTy)) - return IntTy->getBitWidth() == DL.getPointerTypeSizeInBits(PtrTy); + return (IntTy->getBitWidth() == DL.getPointerTypeSizeInBits(PtrTy) && + !DL.isNonIntegralPointerType(PtrTy)); if (auto *PtrTy = dyn_cast(DestTy)) if (auto *IntTy = dyn_cast(SrcTy)) - return IntTy->getBitWidth() == DL.getPointerTypeSizeInBits(PtrTy); + return (IntTy->getBitWidth() == DL.getPointerTypeSizeInBits(PtrTy) && + !DL.isNonIntegralPointerType(PtrTy)); return isBitCastable(SrcTy, DestTy); } diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 6569695c9963b..a94da5452b87c 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -59,6 +59,7 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { {MD_section_prefix, "section_prefix"}, {MD_absolute_symbol, "absolute_symbol"}, {MD_associated, "associated"}, + {MD_callees, "callees"}, }; for (auto &MDKind : MDKinds) { @@ -199,8 +200,12 @@ static bool isDiagnosticEnabled(const DiagnosticInfo &DI) { // pattern, passed via one of the -pass-remarks* flags, matches the name of // the pass that is emitting the diagnostic. If there is no match, ignore the // diagnostic and return. + // + // Also noisy remarks are only enabled if we have hotness information to sort + // them. if (auto *Remark = dyn_cast(&DI)) - return Remark->isEnabled(); + return Remark->isEnabled() && + (!Remark->isVerbose() || Remark->getHotness()); return true; } @@ -221,6 +226,14 @@ LLVMContext::getDiagnosticMessagePrefix(DiagnosticSeverity Severity) { } void LLVMContext::diagnose(const DiagnosticInfo &DI) { + if (auto *OptDiagBase = dyn_cast(&DI)) { + yaml::Output *Out = getDiagnosticsOutputFile(); + if (Out) { + // For remarks the << operator takes a reference to a pointer. + auto *P = const_cast(OptDiagBase); + *Out << P; + } + } // If there is a report handler, use it. if (pImpl->DiagHandler && (!pImpl->RespectDiagnosticFilters || isDiagnosticEnabled(DI)) && diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp index 84bad3185914d..54783e884e990 100644 --- a/lib/IR/MDBuilder.cpp +++ b/lib/IR/MDBuilder.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" using namespace llvm; @@ -95,6 +96,13 @@ MDNode *MDBuilder::createRange(Constant *Lo, Constant *Hi) { return MDNode::get(Context, {createConstant(Lo), createConstant(Hi)}); } +MDNode *MDBuilder::createCallees(ArrayRef Callees) { + SmallVector Ops; + for (Function *F : Callees) + Ops.push_back(createConstant(F)); + return MDNode::get(Context, Ops); +} + MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) { // To ensure uniqueness the root node is self-referential. auto Dummy = MDNode::getTemporary(Context, None); diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index ac02ff76c8436..a148ab65fc830 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -1431,7 +1431,6 @@ void GlobalObject::setMetadata(StringRef Kind, MDNode *N) { MDNode *GlobalObject::getMetadata(unsigned KindID) const { SmallVector MDs; getMetadata(KindID, MDs); - assert(MDs.size() <= 1 && "Expected at most one metadata attachment"); if (MDs.empty()) return nullptr; return MDs[0]; diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 57559356f4d31..377f26f2565ae 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -4593,6 +4593,11 @@ void Verifier::verifyFnArgs(const DbgInfoIntrinsic &I) { } void Verifier::verifyCompileUnits() { + // When more than one Module is imported into the same context, such as during + // an LTO build before linking the modules, ODR type uniquing may cause types + // to point to a different CU. This check does not make sense in this case. + if (M.getContext().isODRUniquingDebugTypes()) + return; auto *CUs = M.getNamedMetadata("llvm.dbg.cu"); SmallPtrSet Listed; if (CUs) @@ -4684,19 +4689,8 @@ struct VerifierLegacyPass : public FunctionPass { HasErrors |= !V->verify(F); HasErrors |= !V->verify(); - if (FatalErrors) { - if (HasErrors) - report_fatal_error("Broken module found, compilation aborted!"); - assert(!V->hasBrokenDebugInfo() && "Module contains invalid debug info"); - } - - // Strip broken debug info. - if (V->hasBrokenDebugInfo()) { - DiagnosticInfoIgnoringInvalidDebugMetadata DiagInvalid(M); - M.getContext().diagnose(DiagInvalid); - if (!StripDebugInfo(M)) - report_fatal_error("Failed to strip malformed debug info"); - } + if (FatalErrors && (HasErrors || V->hasBrokenDebugInfo())) + report_fatal_error("Broken module found, compilation aborted!"); return false; } @@ -4999,19 +4993,9 @@ VerifierAnalysis::Result VerifierAnalysis::run(Function &F, PreservedAnalyses VerifierPass::run(Module &M, ModuleAnalysisManager &AM) { auto Res = AM.getResult(M); - if (FatalErrors) { - if (Res.IRBroken) - report_fatal_error("Broken module found, compilation aborted!"); - assert(!Res.DebugInfoBroken && "Module contains invalid debug info"); - } + if (FatalErrors && (Res.IRBroken || Res.DebugInfoBroken)) + report_fatal_error("Broken module found, compilation aborted!"); - // Strip broken debug info. - if (Res.DebugInfoBroken) { - DiagnosticInfoIgnoringInvalidDebugMetadata DiagInvalid(M); - M.getContext().diagnose(DiagInvalid); - if (!StripDebugInfo(M)) - report_fatal_error("Failed to strip malformed debug info"); - } return PreservedAnalyses::all(); } diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp index ba587ced71821..c4ba659fd0587 100644 --- a/lib/IRReader/IRReader.cpp +++ b/lib/IRReader/IRReader.cpp @@ -68,7 +68,8 @@ std::unique_ptr llvm::getLazyIRFileModule(StringRef Filename, } std::unique_ptr llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, - LLVMContext &Context) { + LLVMContext &Context, + bool UpgradeDebugInfo) { NamedRegionTimer T(TimeIRParsingName, TimeIRParsingDescription, TimeIRParsingGroupName, TimeIRParsingGroupDescription, TimePassesIsEnabled); @@ -86,11 +87,12 @@ std::unique_ptr llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err, return std::move(ModuleOrErr.get()); } - return parseAssembly(Buffer, Err, Context); + return parseAssembly(Buffer, Err, Context, nullptr, UpgradeDebugInfo); } std::unique_ptr llvm::parseIRFile(StringRef Filename, SMDiagnostic &Err, - LLVMContext &Context) { + LLVMContext &Context, + bool UpgradeDebugInfo) { ErrorOr> FileOrErr = MemoryBuffer::getFileOrSTDIN(Filename); if (std::error_code EC = FileOrErr.getError()) { @@ -99,7 +101,8 @@ std::unique_ptr llvm::parseIRFile(StringRef Filename, SMDiagnostic &Err, return nullptr; } - return parseIR(FileOrErr.get()->getMemBufferRef(), Err, Context); + return parseIR(FileOrErr.get()->getMemBufferRef(), Err, Context, + UpgradeDebugInfo); } //===----------------------------------------------------------------------===// diff --git a/lib/LTO/Caching.cpp b/lib/LTO/Caching.cpp index 98360f7e9e902..1708ab4c5c71b 100644 --- a/lib/LTO/Caching.cpp +++ b/lib/LTO/Caching.cpp @@ -66,15 +66,17 @@ Expected lto::localCache(StringRef CacheDirectoryPath, // Open the file first to avoid racing with a cache pruner. ErrorOr> MBOrErr = MemoryBuffer::getFile(TempFilename); + if (!MBOrErr) + report_fatal_error(Twine("Failed to open new cache file ") + + TempFilename + ": " + + MBOrErr.getError().message() + "\n"); // This is atomic on POSIX systems. if (auto EC = sys::fs::rename(TempFilename, EntryPath)) report_fatal_error(Twine("Failed to rename temporary file ") + - TempFilename + ": " + EC.message() + "\n"); + TempFilename + " to " + EntryPath + ": " + + EC.message() + "\n"); - if (!MBOrErr) - report_fatal_error(Twine("Failed to open cache file ") + EntryPath + - ": " + MBOrErr.getError().message() + "\n"); AddBuffer(Task, std::move(*MBOrErr), EntryPath); } }; diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 0e6c3edb140f7..ba5c04d5b1a83 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -83,16 +83,6 @@ cl::opt LTODiscardValueNames( #endif cl::Hidden); -cl::opt LTOStripInvalidDebugInfo( - "lto-strip-invalid-debug-info", - cl::desc("Strip invalid debug info metadata during LTO instead of aborting."), -#ifdef NDEBUG - cl::init(true), -#else - cl::init(false), -#endif - cl::Hidden); - cl::opt LTORemarksFilename("lto-pass-remarks-output", cl::desc("Output filename for pass remarks"), @@ -228,7 +218,7 @@ bool LTOCodeGenerator::writeMergedModules(StringRef Path) { ToolOutputFile Out(Path, EC, sys::fs::F_None); if (EC) { std::string ErrMsg = "could not open bitcode file for writing: "; - ErrMsg += Path; + ErrMsg += Path.str() + ": " + EC.message(); emitError(ErrMsg); return false; } @@ -239,7 +229,7 @@ bool LTOCodeGenerator::writeMergedModules(StringRef Path) { if (Out.os().has_error()) { std::string ErrMsg = "could not write bitcode file: "; - ErrMsg += Path; + ErrMsg += Path.str() + ": " + Out.os().error().message(); emitError(ErrMsg); Out.os().clear_error(); return false; @@ -270,7 +260,9 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) { bool genResult = compileOptimized(&objFile.os()); objFile.os().close(); if (objFile.os().has_error()) { - emitError((Twine("could not write object file: ") + Filename).str()); + emitError((Twine("could not write object file: ") + Filename + ": " + + objFile.os().error().message()) + .str()); objFile.os().clear_error(); sys::fs::remove(Twine(Filename)); return false; @@ -495,8 +487,7 @@ void LTOCodeGenerator::verifyMergedModuleOnce() { HasVerifiedInput = true; bool BrokenDebugInfo = false; - if (verifyModule(*MergedModule, &dbgs(), - LTOStripInvalidDebugInfo ? &BrokenDebugInfo : nullptr)) + if (verifyModule(*MergedModule, &dbgs(), &BrokenDebugInfo)) report_fatal_error("Broken module found, compilation aborted!"); if (BrokenDebugInfo) { emitWarning("Invalid debug info found, debug info will be stripped"); diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 3cc8b7d0e7706..6a0fbb664da3a 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -60,7 +60,7 @@ LTOModule::~LTOModule() {} /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM /// bitcode. bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) { - ErrorOr BCData = IRObjectFile::findBitcodeInMemBuffer( + Expected BCData = IRObjectFile::findBitcodeInMemBuffer( MemoryBufferRef(StringRef((const char *)Mem, Length), "")); return bool(BCData); } @@ -71,7 +71,7 @@ bool LTOModule::isBitcodeFile(StringRef Path) { if (!BufferOrErr) return false; - ErrorOr BCData = IRObjectFile::findBitcodeInMemBuffer( + Expected BCData = IRObjectFile::findBitcodeInMemBuffer( BufferOrErr.get()->getMemBufferRef()); return bool(BCData); } @@ -87,7 +87,7 @@ bool LTOModule::isThinLTO() { bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, StringRef TriplePrefix) { - ErrorOr BCOrErr = + Expected BCOrErr = IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef()); if (!BCOrErr) return false; @@ -100,7 +100,7 @@ bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer, } std::string LTOModule::getProducerString(MemoryBuffer *Buffer) { - ErrorOr BCOrErr = + Expected BCOrErr = IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef()); if (!BCOrErr) return ""; @@ -174,11 +174,11 @@ LTOModule::createInLocalContext(std::unique_ptr Context, static ErrorOr> parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context, bool ShouldBeLazy) { - // Find the buffer. - ErrorOr MBOrErr = + Expected MBOrErr = IRObjectFile::findBitcodeInMemBuffer(Buffer); - if (std::error_code EC = MBOrErr.getError()) { + if (Error E = MBOrErr.takeError()) { + std::error_code EC = errorToErrorCode(std::move(E)); Context.emitError(EC.message()); return EC; } diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp index ffd78dad9228c..c8b3892375f64 100644 --- a/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/lib/LTO/ThinLTOCodeGenerator.cpp @@ -63,7 +63,6 @@ namespace llvm { extern cl::opt LTODiscardValueNames; extern cl::opt LTORemarksFilename; extern cl::opt LTOPassRemarksWithHotness; -extern cl::opt LTOStripInvalidDebugInfo; } namespace { @@ -158,8 +157,7 @@ class ThinLTODiagnosticInfo : public DiagnosticInfo { /// Verify the module and strip broken debug info. static void verifyLoadedModule(Module &TheModule) { bool BrokenDebugInfo = false; - if (verifyModule(TheModule, &dbgs(), - LTOStripInvalidDebugInfo ? &BrokenDebugInfo : nullptr)) + if (verifyModule(TheModule, &dbgs(), &BrokenDebugInfo)) report_fatal_error("Broken module found, compilation aborted!"); if (BrokenDebugInfo) { TheModule.getContext().diagnose(ThinLTODiagnosticInfo( diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 562f136a3ce2b..b9e23d106e25b 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -10,6 +10,7 @@ add_llvm_library(LLVMMC MCAsmStreamer.cpp MCAssembler.cpp MCCodeEmitter.cpp + MCCodePadder.cpp MCCodeView.cpp MCContext.cpp MCDwarf.cpp diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index eef2757b93b43..e11eaaa30603c 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -162,9 +162,10 @@ class ELFObjectWriter : public MCObjectWriter { bool ZLibStyle, unsigned Alignment); public: - ELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_pwrite_stream &OS, - bool IsLittleEndian) - : MCObjectWriter(OS, IsLittleEndian), TargetObjectWriter(MOTW) {} + ELFObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS, bool IsLittleEndian) + : MCObjectWriter(OS, IsLittleEndian), + TargetObjectWriter(std::move(MOTW)) {} ~ELFObjectWriter() override = default; @@ -1386,8 +1387,9 @@ bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( InSet, IsPCRel); } -MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_pwrite_stream &OS, - bool IsLittleEndian) { - return new ELFObjectWriter(MOTW, OS, IsLittleEndian); +std::unique_ptr +llvm::createELFObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS, bool IsLittleEndian) { + return llvm::make_unique(std::move(MOTW), OS, + IsLittleEndian); } diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp index 3642f37aa855c..b4a4d0a899663 100644 --- a/lib/MC/MCAsmBackend.cpp +++ b/lib/MC/MCAsmBackend.cpp @@ -10,6 +10,7 @@ #include "llvm/MC/MCAsmBackend.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCCodePadder.h" #include "llvm/MC/MCFixupKindInfo.h" #include #include @@ -17,7 +18,10 @@ using namespace llvm; -MCAsmBackend::MCAsmBackend() = default; +MCAsmBackend::MCAsmBackend() : CodePadder(new MCCodePadder()) {} + +MCAsmBackend::MCAsmBackend(std::unique_ptr TargetCodePadder) + : CodePadder(std::move(TargetCodePadder)) {} MCAsmBackend::~MCAsmBackend() = default; @@ -59,3 +63,25 @@ bool MCAsmBackend::fixupNeedsRelaxationAdvanced( return true; return fixupNeedsRelaxation(Fixup, Value, DF, Layout); } + +void MCAsmBackend::handleCodePaddingBasicBlockStart( + MCObjectStreamer *OS, const MCCodePaddingContext &Context) { + CodePadder->handleBasicBlockStart(OS, Context); +} + +void MCAsmBackend::handleCodePaddingBasicBlockEnd( + const MCCodePaddingContext &Context) { + CodePadder->handleBasicBlockEnd(Context); +} + +void MCAsmBackend::handleCodePaddingInstructionBegin(const MCInst &Inst) { + CodePadder->handleInstructionBegin(Inst); +} + +void MCAsmBackend::handleCodePaddingInstructionEnd(const MCInst &Inst) { + CodePadder->handleInstructionEnd(Inst); +} + +bool MCAsmBackend::relaxFragment(MCPaddingFragment *PF, MCAsmLayout &Layout) { + return CodePadder->relaxFragment(PF, Layout); +} \ No newline at end of file diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index b1c928950cbf8..f48ae84950e6a 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -248,6 +248,7 @@ class MCAsmStreamer final : public MCStreamer { void EmitCVStringTableDirective() override; void EmitCVFileChecksumsDirective() override; void EmitCVFileChecksumOffsetDirective(unsigned FileNo) override; + void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc L) override; void EmitIdent(StringRef IdentString) override; void EmitCFISections(bool EH, bool Debug) override; @@ -270,20 +271,24 @@ class MCAsmStreamer final : public MCStreamer { void EmitCFIWindowSave() override; void EmitCFIReturnColumn(int64_t Register) override; - void EmitWinCFIStartProc(const MCSymbol *Symbol) override; - void EmitWinCFIEndProc() override; - void EmitWinCFIStartChained() override; - void EmitWinCFIEndChained() override; - void EmitWinCFIPushReg(unsigned Register) override; - void EmitWinCFISetFrame(unsigned Register, unsigned Offset) override; - void EmitWinCFIAllocStack(unsigned Size) override; - void EmitWinCFISaveReg(unsigned Register, unsigned Offset) override; - void EmitWinCFISaveXMM(unsigned Register, unsigned Offset) override; - void EmitWinCFIPushFrame(bool Code) override; - void EmitWinCFIEndProlog() override; - - void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except) override; - void EmitWinEHHandlerData() override; + void EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override; + void EmitWinCFIEndProc(SMLoc Loc) override; + void EmitWinCFIStartChained(SMLoc Loc) override; + void EmitWinCFIEndChained(SMLoc Loc) override; + void EmitWinCFIPushReg(unsigned Register, SMLoc Loc) override; + void EmitWinCFISetFrame(unsigned Register, unsigned Offset, + SMLoc Loc) override; + void EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) override; + void EmitWinCFISaveReg(unsigned Register, unsigned Offset, + SMLoc Loc) override; + void EmitWinCFISaveXMM(unsigned Register, unsigned Offset, + SMLoc Loc) override; + void EmitWinCFIPushFrame(bool Code, SMLoc Loc) override; + void EmitWinCFIEndProlog(SMLoc Loc) override; + + void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, + SMLoc Loc) override; + void EmitWinEHHandlerData(SMLoc Loc) override; void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool PrintSchedInfo) override; @@ -1248,6 +1253,12 @@ void MCAsmStreamer::EmitCVFileChecksumOffsetDirective(unsigned FileNo) { EmitEOL(); } +void MCAsmStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc L) { + OS << "\t.cv_fpo_data\t"; + ProcSym->print(OS, MAI); + EmitEOL(); +} + void MCAsmStreamer::EmitIdent(StringRef IdentString) { assert(MAI->hasIdentDirective() && ".ident directive not supported"); OS << "\t.ident\t"; @@ -1425,38 +1436,38 @@ void MCAsmStreamer::EmitCFIReturnColumn(int64_t Register) { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) { - MCStreamer::EmitWinCFIStartProc(Symbol); +void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { + MCStreamer::EmitWinCFIStartProc(Symbol, Loc); OS << ".seh_proc "; Symbol->print(OS, MAI); EmitEOL(); } -void MCAsmStreamer::EmitWinCFIEndProc() { - MCStreamer::EmitWinCFIEndProc(); +void MCAsmStreamer::EmitWinCFIEndProc(SMLoc Loc) { + MCStreamer::EmitWinCFIEndProc(Loc); OS << "\t.seh_endproc"; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIStartChained() { - MCStreamer::EmitWinCFIStartChained(); +void MCAsmStreamer::EmitWinCFIStartChained(SMLoc Loc) { + MCStreamer::EmitWinCFIStartChained(Loc); OS << "\t.seh_startchained"; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIEndChained() { - MCStreamer::EmitWinCFIEndChained(); +void MCAsmStreamer::EmitWinCFIEndChained(SMLoc Loc) { + MCStreamer::EmitWinCFIEndChained(Loc); OS << "\t.seh_endchained"; EmitEOL(); } void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, - bool Except) { - MCStreamer::EmitWinEHHandler(Sym, Unwind, Except); + bool Except, SMLoc Loc) { + MCStreamer::EmitWinEHHandler(Sym, Unwind, Except, Loc); OS << "\t.seh_handler "; Sym->print(OS, MAI); @@ -1467,8 +1478,8 @@ void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, EmitEOL(); } -void MCAsmStreamer::EmitWinEHHandlerData() { - MCStreamer::EmitWinEHHandlerData(); +void MCAsmStreamer::EmitWinEHHandlerData(SMLoc Loc) { + MCStreamer::EmitWinEHHandlerData(Loc); // Switch sections. Don't call SwitchSection directly, because that will // cause the section switch to be visible in the emitted assembly. @@ -1483,43 +1494,46 @@ void MCAsmStreamer::EmitWinEHHandlerData() { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIPushReg(unsigned Register) { - MCStreamer::EmitWinCFIPushReg(Register); +void MCAsmStreamer::EmitWinCFIPushReg(unsigned Register, SMLoc Loc) { + MCStreamer::EmitWinCFIPushReg(Register, Loc); OS << "\t.seh_pushreg " << Register; EmitEOL(); } -void MCAsmStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset) { - MCStreamer::EmitWinCFISetFrame(Register, Offset); +void MCAsmStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset, + SMLoc Loc) { + MCStreamer::EmitWinCFISetFrame(Register, Offset, Loc); OS << "\t.seh_setframe " << Register << ", " << Offset; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIAllocStack(unsigned Size) { - MCStreamer::EmitWinCFIAllocStack(Size); +void MCAsmStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { + MCStreamer::EmitWinCFIAllocStack(Size, Loc); OS << "\t.seh_stackalloc " << Size; EmitEOL(); } -void MCAsmStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset) { - MCStreamer::EmitWinCFISaveReg(Register, Offset); +void MCAsmStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset, + SMLoc Loc) { + MCStreamer::EmitWinCFISaveReg(Register, Offset, Loc); OS << "\t.seh_savereg " << Register << ", " << Offset; EmitEOL(); } -void MCAsmStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) { - MCStreamer::EmitWinCFISaveXMM(Register, Offset); +void MCAsmStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset, + SMLoc Loc) { + MCStreamer::EmitWinCFISaveXMM(Register, Offset, Loc); OS << "\t.seh_savexmm " << Register << ", " << Offset; EmitEOL(); } -void MCAsmStreamer::EmitWinCFIPushFrame(bool Code) { - MCStreamer::EmitWinCFIPushFrame(Code); +void MCAsmStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) { + MCStreamer::EmitWinCFIPushFrame(Code, Loc); OS << "\t.seh_pushframe"; if (Code) @@ -1527,8 +1541,8 @@ void MCAsmStreamer::EmitWinCFIPushFrame(bool Code) { EmitEOL(); } -void MCAsmStreamer::EmitWinCFIEndProlog() { - MCStreamer::EmitWinCFIEndProlog(); +void MCAsmStreamer::EmitWinCFIEndProlog(SMLoc Loc) { + MCStreamer::EmitWinCFIEndProlog(Loc); OS << "\t.seh_endprologue"; EmitEOL(); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index eaf6f19326eb4..29b14414ea2cc 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -68,6 +68,10 @@ STATISTIC(FragmentLayouts, "Number of fragment layouts"); STATISTIC(ObjectBytes, "Number of emitted object file bytes"); STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps"); STATISTIC(RelaxedInstructions, "Number of relaxed instructions"); +STATISTIC(PaddingFragmentsRelaxations, + "Number of Padding Fragments relaxations"); +STATISTIC(PaddingFragmentsBytes, + "Total size of all padding from adding Fragments"); } // end namespace stats } // end anonymous namespace @@ -283,6 +287,9 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_LEB: return cast(F).getContents().size(); + case MCFragment::FT_Padding: + return cast(F).getSize(); + case MCFragment::FT_SafeSEH: return 4; @@ -549,6 +556,13 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, break; } + case MCFragment::FT_Padding: { + if (!Asm.getBackend().writeNopData(FragmentSize, OW)) + report_fatal_error("unable to write nop sequence of " + + Twine(FragmentSize) + " bytes"); + break; + } + case MCFragment::FT_SafeSEH: { const MCSafeSEHFragment &SF = cast(F); OW->write32(SF.getSymbol()->getIndex()); @@ -822,6 +836,19 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout, return true; } +bool MCAssembler::relaxPaddingFragment(MCAsmLayout &Layout, + MCPaddingFragment &PF) { + uint64_t OldSize = PF.getSize(); + if (!getBackend().relaxFragment(&PF, Layout)) + return false; + uint64_t NewSize = PF.getSize(); + + ++stats::PaddingFragmentsRelaxations; + stats::PaddingFragmentsBytes += NewSize; + stats::PaddingFragmentsBytes -= OldSize; + return true; +} + bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { uint64_t OldSize = LF.getContents().size(); int64_t Value; @@ -916,6 +943,9 @@ bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec) { case MCFragment::FT_LEB: RelaxedFrag = relaxLEB(Layout, *cast(I)); break; + case MCFragment::FT_Padding: + RelaxedFrag = relaxPaddingFragment(Layout, *cast(I)); + break; case MCFragment::FT_CVInlineLines: RelaxedFrag = relaxCVInlineLineTable(Layout, *cast(I)); diff --git a/lib/MC/MCCodePadder.cpp b/lib/MC/MCCodePadder.cpp new file mode 100644 index 0000000000000..57547814e595f --- /dev/null +++ b/lib/MC/MCCodePadder.cpp @@ -0,0 +1,371 @@ +//===- MCCodePadder.cpp - Target MC Code Padder ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCCodePadder.h" +#include "llvm/MC/MCObjectStreamer.h" +#include +#include +#include + +using namespace llvm; + +//--------------------------------------------------------------------------- +// MCCodePadder +// + +MCCodePadder::~MCCodePadder() { + for (auto *Policy : CodePaddingPolicies) + delete Policy; +} + +bool MCCodePadder::addPolicy(MCCodePaddingPolicy *Policy) { + assert(Policy && "Policy must be valid"); + return CodePaddingPolicies.insert(Policy).second; +} + +void MCCodePadder::handleBasicBlockStart(MCObjectStreamer *OS, + const MCCodePaddingContext &Context) { + assert(OS != nullptr && "OS must be valid"); + assert(this->OS == nullptr && "Still handling another basic block"); + this->OS = OS; + + ArePoliciesActive = usePoliciesForBasicBlock(Context); + + bool InsertionPoint = basicBlockRequiresInsertionPoint(Context); + assert((!InsertionPoint || + OS->getCurrentFragment()->getKind() != MCFragment::FT_Align) && + "Cannot insert padding nops right after an alignment fragment as it " + "will ruin the alignment"); + + uint64_t PoliciesMask = MCPaddingFragment::PFK_None; + if (ArePoliciesActive) { + PoliciesMask = std::accumulate( + CodePaddingPolicies.begin(), CodePaddingPolicies.end(), + MCPaddingFragment::PFK_None, + [&Context](uint64_t Mask, + const MCCodePaddingPolicy *Policy) -> uint64_t { + return Policy->basicBlockRequiresPaddingFragment(Context) + ? (Mask | Policy->getKindMask()) + : Mask; + }); + } + + if (InsertionPoint || PoliciesMask != MCPaddingFragment::PFK_None) { + MCPaddingFragment *PaddingFragment = OS->getOrCreatePaddingFragment(); + if (InsertionPoint) + PaddingFragment->setAsInsertionPoint(); + PaddingFragment->setPaddingPoliciesMask( + PaddingFragment->getPaddingPoliciesMask() | PoliciesMask); + } +} + +void MCCodePadder::handleBasicBlockEnd(const MCCodePaddingContext &Context) { + assert(this->OS != nullptr && "Not handling a basic block"); + OS = nullptr; +} + +void MCCodePadder::handleInstructionBegin(const MCInst &Inst) { + if (!OS) + return; // instruction was emitted outside a function + + assert(CurrHandledInstFragment == nullptr && "Can't start handling an " + "instruction while still " + "handling another instruction"); + + bool InsertionPoint = instructionRequiresInsertionPoint(Inst); + assert((!InsertionPoint || + OS->getCurrentFragment()->getKind() != MCFragment::FT_Align) && + "Cannot insert padding nops right after an alignment fragment as it " + "will ruin the alignment"); + + uint64_t PoliciesMask = MCPaddingFragment::PFK_None; + if (ArePoliciesActive) { + PoliciesMask = std::accumulate( + CodePaddingPolicies.begin(), CodePaddingPolicies.end(), + MCPaddingFragment::PFK_None, + [&Inst](uint64_t Mask, const MCCodePaddingPolicy *Policy) -> uint64_t { + return Policy->instructionRequiresPaddingFragment(Inst) + ? (Mask | Policy->getKindMask()) + : Mask; + }); + } + MCFragment *CurrFragment = OS->getCurrentFragment(); + // CurrFragment can be a previously created MCPaddingFragment. If so, let's + // update it with the information we have, such as the instruction that it + // should point to. + bool needToUpdateCurrFragment = + CurrFragment != nullptr && + CurrFragment->getKind() == MCFragment::FT_Padding; + if (InsertionPoint || PoliciesMask != MCPaddingFragment::PFK_None || + needToUpdateCurrFragment) { + // temporarily holding the fragment as CurrHandledInstFragment, to be + // updated after the instruction will be written + CurrHandledInstFragment = OS->getOrCreatePaddingFragment(); + if (InsertionPoint) + CurrHandledInstFragment->setAsInsertionPoint(); + CurrHandledInstFragment->setPaddingPoliciesMask( + CurrHandledInstFragment->getPaddingPoliciesMask() | PoliciesMask); + } +} + +void MCCodePadder::handleInstructionEnd(const MCInst &Inst) { + if (!OS) + return; // instruction was emitted outside a function + if (CurrHandledInstFragment == nullptr) + return; + + MCFragment *InstFragment = OS->getCurrentFragment(); + if (MCDataFragment *InstDataFragment = + dyn_cast_or_null(InstFragment)) + // Inst is a fixed size instruction and was encoded into a MCDataFragment. + // Let the fragment hold it and its size. Its size is the current size of + // the data fragment, as the padding fragment was inserted right before it + // and nothing was written yet except Inst + CurrHandledInstFragment->setInstAndInstSize( + Inst, InstDataFragment->getContents().size()); + else if (MCRelaxableFragment *InstRelaxableFragment = + dyn_cast_or_null(InstFragment)) + // Inst may be relaxed and its size may vary. + // Let the fragment hold the instruction and the MCRelaxableFragment + // that's holding it. + CurrHandledInstFragment->setInstAndInstFragment(Inst, + InstRelaxableFragment); + else + llvm_unreachable("After encoding an instruction current fragment must be " + "either a MCDataFragment or a MCRelaxableFragment"); + + CurrHandledInstFragment = nullptr; +} + +MCPFRange &MCCodePadder::getJurisdiction(MCPaddingFragment *Fragment, + MCAsmLayout &Layout) { + auto JurisdictionLocation = FragmentToJurisdiction.find(Fragment); + if (JurisdictionLocation != FragmentToJurisdiction.end()) + return JurisdictionLocation->second; + + MCPFRange Jurisdiction; + + // Forward scanning the fragments in this section, starting from the given + // fragments, and adding relevant MCPaddingFragments to the Jurisdiction + for (MCFragment *CurrFragment = Fragment; CurrFragment != nullptr; + CurrFragment = CurrFragment->getNextNode()) { + + MCPaddingFragment *CurrPaddingFragment = + dyn_cast(CurrFragment); + if (CurrPaddingFragment == nullptr) + continue; + + if (CurrPaddingFragment != Fragment && + CurrPaddingFragment->isInsertionPoint()) + // Found next insertion point Fragment. From now on it's its jurisdiction. + break; + for (const auto *Policy : CodePaddingPolicies) { + if (CurrPaddingFragment->hasPaddingPolicy(Policy->getKindMask())) { + Jurisdiction.push_back(CurrPaddingFragment); + break; + } + } + } + + auto InsertionResult = + FragmentToJurisdiction.insert(std::make_pair(Fragment, Jurisdiction)); + assert(InsertionResult.second && + "Insertion to FragmentToJurisdiction failed"); + return InsertionResult.first->second; +} + +uint64_t MCCodePadder::getMaxWindowSize(MCPaddingFragment *Fragment, + MCAsmLayout &Layout) { + auto MaxFragmentSizeLocation = FragmentToMaxWindowSize.find(Fragment); + if (MaxFragmentSizeLocation != FragmentToMaxWindowSize.end()) + return MaxFragmentSizeLocation->second; + + MCPFRange &Jurisdiction = getJurisdiction(Fragment, Layout); + uint64_t JurisdictionMask = MCPaddingFragment::PFK_None; + for (const auto *Protege : Jurisdiction) + JurisdictionMask |= Protege->getPaddingPoliciesMask(); + + uint64_t MaxFragmentSize = UINT64_C(0); + for (const auto *Policy : CodePaddingPolicies) + if ((JurisdictionMask & Policy->getKindMask()) != + MCPaddingFragment::PFK_None) + MaxFragmentSize = std::max(MaxFragmentSize, Policy->getWindowSize()); + + auto InsertionResult = + FragmentToMaxWindowSize.insert(std::make_pair(Fragment, MaxFragmentSize)); + assert(InsertionResult.second && + "Insertion to FragmentToMaxWindowSize failed"); + return InsertionResult.first->second; +} + +bool MCCodePadder::relaxFragment(MCPaddingFragment *Fragment, + MCAsmLayout &Layout) { + if (!Fragment->isInsertionPoint()) + return false; + uint64_t OldSize = Fragment->getSize(); + + uint64_t MaxWindowSize = getMaxWindowSize(Fragment, Layout); + if (MaxWindowSize == UINT64_C(0)) + return false; + assert(isPowerOf2_64(MaxWindowSize) && + "MaxWindowSize must be an integer power of 2"); + uint64_t SectionAlignment = Fragment->getParent()->getAlignment(); + assert(isPowerOf2_64(SectionAlignment) && + "SectionAlignment must be an integer power of 2"); + + MCPFRange &Jurisdiction = getJurisdiction(Fragment, Layout); + uint64_t OptimalSize = UINT64_C(0); + double OptimalWeight = std::numeric_limits::max(); + uint64_t MaxFragmentSize = MaxWindowSize - UINT16_C(1); + for (uint64_t Size = UINT64_C(0); Size <= MaxFragmentSize; ++Size) { + Fragment->setSize(Size); + Layout.invalidateFragmentsFrom(Fragment); + double SizeWeight = 0.0; + // The section is guaranteed to be aligned to SectionAlignment, but that + // doesn't guarantee the exact section offset w.r.t. the policies window + // size. + // As a concrete example, the section could be aligned to 16B, but a + // policy's window size can be 32B. That means that the section actual start + // address can either be 0mod32 or 16mod32. The said policy will act + // differently for each case, so we need to take both into consideration. + for (uint64_t Offset = UINT64_C(0); Offset < MaxWindowSize; + Offset += SectionAlignment) { + double OffsetWeight = std::accumulate( + CodePaddingPolicies.begin(), CodePaddingPolicies.end(), 0.0, + [&Jurisdiction, &Offset, &Layout]( + double Weight, const MCCodePaddingPolicy *Policy) -> double { + double PolicyWeight = + Policy->computeRangePenaltyWeight(Jurisdiction, Offset, Layout); + assert(PolicyWeight >= 0.0 && "A penalty weight must be positive"); + return Weight + PolicyWeight; + }); + SizeWeight = std::max(SizeWeight, OffsetWeight); + } + if (SizeWeight < OptimalWeight) { + OptimalWeight = SizeWeight; + OptimalSize = Size; + } + if (OptimalWeight == 0.0) + break; + } + + Fragment->setSize(OptimalSize); + Layout.invalidateFragmentsFrom(Fragment); + return OldSize != OptimalSize; +} + +//--------------------------------------------------------------------------- +// MCCodePaddingPolicy +// + +uint64_t MCCodePaddingPolicy::getNextFragmentOffset(const MCFragment *Fragment, + const MCAsmLayout &Layout) { + assert(Fragment != nullptr && "Fragment cannot be null"); + MCFragment const *NextFragment = Fragment->getNextNode(); + return NextFragment == nullptr + ? Layout.getSectionAddressSize(Fragment->getParent()) + : Layout.getFragmentOffset(NextFragment); +} + +uint64_t +MCCodePaddingPolicy::getFragmentInstByte(const MCPaddingFragment *Fragment, + MCAsmLayout &Layout) const { + uint64_t InstByte = getNextFragmentOffset(Fragment, Layout); + if (InstByteIsLastByte) + InstByte += Fragment->getInstSize() - UINT64_C(1); + return InstByte; +} + +uint64_t +MCCodePaddingPolicy::computeWindowEndAddress(const MCPaddingFragment *Fragment, + uint64_t Offset, + MCAsmLayout &Layout) const { + uint64_t InstByte = getFragmentInstByte(Fragment, Layout); + return alignTo(InstByte + UINT64_C(1) + Offset, WindowSize) - Offset; +} + +double MCCodePaddingPolicy::computeRangePenaltyWeight( + const MCPFRange &Range, uint64_t Offset, MCAsmLayout &Layout) const { + + SmallVector Windows; + SmallVector::iterator CurrWindowLocation = Windows.end(); + for (const MCPaddingFragment *Fragment : Range) { + if (!Fragment->hasPaddingPolicy(getKindMask())) + continue; + uint64_t FragmentWindowEndAddress = + computeWindowEndAddress(Fragment, Offset, Layout); + if (CurrWindowLocation == Windows.end() || + FragmentWindowEndAddress != + computeWindowEndAddress(*CurrWindowLocation->begin(), Offset, + Layout)) { + // next window is starting + Windows.push_back(MCPFRange()); + CurrWindowLocation = Windows.end() - 1; + } + CurrWindowLocation->push_back(Fragment); + } + + if (Windows.empty()) + return 0.0; + + double RangeWeight = 0.0; + SmallVector::iterator I = Windows.begin(); + RangeWeight += computeFirstWindowPenaltyWeight(*I, Offset, Layout); + ++I; + RangeWeight += std::accumulate( + I, Windows.end(), 0.0, + [this, &Layout, &Offset](double Weight, MCPFRange &Window) -> double { + return Weight += computeWindowPenaltyWeight(Window, Offset, Layout); + }); + return RangeWeight; +} + +double MCCodePaddingPolicy::computeFirstWindowPenaltyWeight( + const MCPFRange &Window, uint64_t Offset, MCAsmLayout &Layout) const { + if (Window.empty()) + return 0.0; + uint64_t WindowEndAddress = + computeWindowEndAddress(*Window.begin(), Offset, Layout); + + MCPFRange FullWindowFirstPart; // will hold all the fragments that are in the + // same window as the fragments in the given + // window but their penalty weight should not + // be added + for (const MCFragment *Fragment = (*Window.begin())->getPrevNode(); + Fragment != nullptr; Fragment = Fragment->getPrevNode()) { + const MCPaddingFragment *PaddingNopFragment = + dyn_cast(Fragment); + if (PaddingNopFragment == nullptr || + !PaddingNopFragment->hasPaddingPolicy(getKindMask())) + continue; + if (WindowEndAddress != + computeWindowEndAddress(PaddingNopFragment, Offset, Layout)) + break; + + FullWindowFirstPart.push_back(PaddingNopFragment); + } + + std::reverse(FullWindowFirstPart.begin(), FullWindowFirstPart.end()); + double FullWindowFirstPartWeight = + computeWindowPenaltyWeight(FullWindowFirstPart, Offset, Layout); + + MCPFRange FullWindow( + FullWindowFirstPart); // will hold all the fragments that are in the + // same window as the fragments in the given + // window, whether their weight should be added + // or not + FullWindow.append(Window.begin(), Window.end()); + double FullWindowWeight = + computeWindowPenaltyWeight(FullWindow, Offset, Layout); + + assert(FullWindowWeight >= FullWindowFirstPartWeight && + "More fragments necessarily means bigger weight"); + return FullWindowWeight - FullWindowFirstPartWeight; +} diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index e7bd045c7574c..5c25e902bbe7f 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -486,17 +486,17 @@ MCSectionCOFF *MCContext::getAssociativeCOFFSection(MCSectionCOFF *Sec, "", 0, UniqueID); } -MCSectionWasm *MCContext::getWasmSection(const Twine &Section, unsigned Type, +MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind K, const Twine &Group, unsigned UniqueID, const char *BeginSymName) { MCSymbolWasm *GroupSym = nullptr; if (!Group.isTriviallyEmpty() && !Group.str().empty()) GroupSym = cast(getOrCreateSymbol(Group)); - return getWasmSection(Section, Type, GroupSym, UniqueID, BeginSymName); + return getWasmSection(Section, K, GroupSym, UniqueID, BeginSymName); } -MCSectionWasm *MCContext::getWasmSection(const Twine &Section, unsigned Type, +MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind, const MCSymbolWasm *GroupSym, unsigned UniqueID, const char *BeginSymName) { @@ -512,14 +512,12 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, unsigned Type, StringRef CachedName = Entry.first.SectionName; - SectionKind Kind = SectionKind::getText(); - MCSymbol *Begin = nullptr; if (BeginSymName) Begin = createTempSymbol(BeginSymName, false); MCSectionWasm *Result = new (WasmAllocator.Allocate()) - MCSectionWasm(CachedName, Type, Kind, GroupSym, UniqueID, Begin); + MCSectionWasm(CachedName, Kind, GroupSym, UniqueID, Begin); Entry.second = Result; return Result; } diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 50c1f6e79f8a2..366125962a5e9 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -39,6 +39,12 @@ using namespace llvm; +MCELFStreamer::MCELFStreamer(MCContext &Context, + std::unique_ptr TAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter) + : MCObjectStreamer(Context, std::move(TAB), OS, std::move(Emitter)) {} + bool MCELFStreamer::isBundleLocked() const { return getCurrentSectionOnly()->isBundleLocked(); } @@ -62,12 +68,13 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF, if (RequiredBundlePadding > 0) { SmallString<256> Code; raw_svector_ostream VecOS(Code); - MCObjectWriter *OW = Assembler.getBackend().createObjectWriter(VecOS); + { + auto OW = Assembler.getBackend().createObjectWriter(VecOS); - EF->setBundlePadding(static_cast(RequiredBundlePadding)); + EF->setBundlePadding(static_cast(RequiredBundlePadding)); - Assembler.writeFragmentPadding(*EF, FSize, OW); - delete OW; + Assembler.writeFragmentPadding(*EF, FSize, OW.get()); + } DF->getContents().append(Code.begin(), Code.end()); } @@ -638,10 +645,13 @@ void MCELFStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, llvm_unreachable("ELF doesn't support this directive"); } -MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_pwrite_stream &OS, MCCodeEmitter *CE, +MCStreamer *llvm::createELFStreamer(MCContext &Context, + std::unique_ptr &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr &&CE, bool RelaxAll) { - MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE); + MCELFStreamer *S = + new MCELFStreamer(Context, std::move(MAB), OS, std::move(CE)); if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp index 31acca01bedbe..94839de14f8d7 100644 --- a/lib/MC/MCFragment.cpp +++ b/lib/MC/MCFragment.cpp @@ -278,6 +278,9 @@ void MCFragment::destroy() { case FT_LEB: delete cast(this); return; + case FT_Padding: + delete cast(this); + return; case FT_SafeSEH: delete cast(this); return; @@ -322,6 +325,7 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break; case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break; case MCFragment::FT_LEB: OS << "MCLEBFragment"; break; + case MCFragment::FT_Padding: OS << "MCPaddingFragment"; break; case MCFragment::FT_SafeSEH: OS << "MCSafeSEHFragment"; break; case MCFragment::FT_CVInlineLines: OS << "MCCVInlineLineTableFragment"; break; case MCFragment::FT_CVDefRange: OS << "MCCVDefRangeTableFragment"; break; @@ -419,6 +423,19 @@ LLVM_DUMP_METHOD void MCFragment::dump() const { OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned(); break; } + case MCFragment::FT_Padding: { + const MCPaddingFragment *F = cast(this); + OS << "\n "; + OS << " PaddingPoliciesMask:" << F->getPaddingPoliciesMask() + << " IsInsertionPoint:" << F->isInsertionPoint() + << " Size:" << F->getSize(); + OS << "\n "; + OS << " Inst:"; + F->getInst().dump_pretty(OS); + OS << " InstSize:" << F->getInstSize(); + OS << "\n "; + break; + } case MCFragment::FT_SafeSEH: { const MCSafeSEHFragment *F = cast(this); OS << "\n "; diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 674c7b9bf6197..a5c1b13df7ce2 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -62,10 +62,12 @@ class MCMachOStreamer : public MCObjectStreamer { void EmitDataRegionEnd(); public: - MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool DWARFMustBeAtTheEnd, bool label) - : MCObjectStreamer(Context, MAB, OS, Emitter), LabelSections(label), - DWARFMustBeAtTheEnd(DWARFMustBeAtTheEnd), CreatedADWARFSection(false) {} + MCMachOStreamer(MCContext &Context, std::unique_ptr MAB, + raw_pwrite_stream &OS, std::unique_ptr Emitter, + bool DWARFMustBeAtTheEnd, bool label) + : MCObjectStreamer(Context, std::move(MAB), OS, std::move(Emitter)), + LabelSections(label), DWARFMustBeAtTheEnd(DWARFMustBeAtTheEnd), + CreatedADWARFSection(false) {} /// state management void reset() override { @@ -483,12 +485,15 @@ void MCMachOStreamer::FinishImpl() { this->MCObjectStreamer::FinishImpl(); } -MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_pwrite_stream &OS, MCCodeEmitter *CE, +MCStreamer *llvm::createMachOStreamer(MCContext &Context, + std::unique_ptr &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr &&CE, bool RelaxAll, bool DWARFMustBeAtTheEnd, bool LabelSections) { - MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE, - DWARFMustBeAtTheEnd, LabelSections); + MCMachOStreamer *S = + new MCMachOStreamer(Context, std::move(MAB), OS, std::move(CE), + DWARFMustBeAtTheEnd, LabelSections); const Triple &TT = Context.getObjectFileInfo()->getTargetTriple(); if (TT.isOSDarwin()) { unsigned Major, Minor, Update; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index c6c5cb3169059..d8077df146986 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -214,6 +214,10 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { Ctx->getMachOSection("__DWARF", "__apple_types", MachO::S_ATTR_DEBUG, SectionKind::getMetadata(), "types_begin"); + DwarfSwiftASTSection = + Ctx->getMachOSection("__DWARF", "__swift_ast", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfAbbrevSection = Ctx->getMachOSection("__DWARF", "__debug_abbrev", MachO::S_ATTR_DEBUG, SectionKind::getMetadata(), "section_abbrev"); @@ -820,24 +824,24 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) { void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) { // TODO: Set the section types and flags. - TextSection = Ctx->getWasmSection(".text", wasm::WASM_SEC_CODE); - DataSection = Ctx->getWasmSection(".data", wasm::WASM_SEC_DATA); + TextSection = Ctx->getWasmSection(".text", SectionKind::getText()); + DataSection = Ctx->getWasmSection(".data", SectionKind::getData()); // TODO: Set the section types and flags. - DwarfLineSection = Ctx->getWasmSection(".debug_line", wasm::WASM_SEC_DATA); - DwarfStrSection = Ctx->getWasmSection(".debug_str", wasm::WASM_SEC_DATA); - DwarfLocSection = Ctx->getWasmSection(".debug_loc", wasm::WASM_SEC_DATA); - DwarfAbbrevSection = Ctx->getWasmSection(".debug_abbrev", wasm::WASM_SEC_DATA, "section_abbrev"); - DwarfARangesSection = Ctx->getWasmSection(".debug_aranges", wasm::WASM_SEC_DATA); - DwarfRangesSection = Ctx->getWasmSection(".debug_ranges", wasm::WASM_SEC_DATA, "debug_range"); - DwarfMacinfoSection = Ctx->getWasmSection(".debug_macinfo", wasm::WASM_SEC_DATA, "debug_macinfo"); - DwarfAddrSection = Ctx->getWasmSection(".debug_addr", wasm::WASM_SEC_DATA); - DwarfCUIndexSection = Ctx->getWasmSection(".debug_cu_index", wasm::WASM_SEC_DATA); - DwarfTUIndexSection = Ctx->getWasmSection(".debug_tu_index", wasm::WASM_SEC_DATA); - DwarfInfoSection = Ctx->getWasmSection(".debug_info", wasm::WASM_SEC_DATA, "section_info"); - DwarfFrameSection = Ctx->getWasmSection(".debug_frame", wasm::WASM_SEC_DATA); - DwarfPubNamesSection = Ctx->getWasmSection(".debug_pubnames", wasm::WASM_SEC_DATA); - DwarfPubTypesSection = Ctx->getWasmSection(".debug_pubtypes", wasm::WASM_SEC_DATA); + DwarfLineSection = Ctx->getWasmSection(".debug_line", SectionKind::getMetadata()); + DwarfStrSection = Ctx->getWasmSection(".debug_str", SectionKind::getMetadata()); + DwarfLocSection = Ctx->getWasmSection(".debug_loc", SectionKind::getMetadata()); + DwarfAbbrevSection = Ctx->getWasmSection(".debug_abbrev", SectionKind::getMetadata(), "section_abbrev"); + DwarfARangesSection = Ctx->getWasmSection(".debug_aranges", SectionKind::getMetadata()); + DwarfRangesSection = Ctx->getWasmSection(".debug_ranges", SectionKind::getMetadata(), "debug_range"); + DwarfMacinfoSection = Ctx->getWasmSection(".debug_macinfo", SectionKind::getMetadata(), "debug_macinfo"); + DwarfAddrSection = Ctx->getWasmSection(".debug_addr", SectionKind::getMetadata()); + DwarfCUIndexSection = Ctx->getWasmSection(".debug_cu_index", SectionKind::getMetadata()); + DwarfTUIndexSection = Ctx->getWasmSection(".debug_tu_index", SectionKind::getMetadata()); + DwarfInfoSection = Ctx->getWasmSection(".debug_info", SectionKind::getMetadata(), "section_info"); + DwarfFrameSection = Ctx->getWasmSection(".debug_frame", SectionKind::getMetadata()); + DwarfPubNamesSection = Ctx->getWasmSection(".debug_pubnames", SectionKind::getMetadata()); + DwarfPubTypesSection = Ctx->getWasmSection(".debug_pubtypes", SectionKind::getMetadata()); // TODO: Define more sections. } diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index e9e3133582c06..f226c2f0a308a 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -25,20 +25,17 @@ #include "llvm/Support/TargetRegistry.h" using namespace llvm; -MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, +MCObjectStreamer::MCObjectStreamer(MCContext &Context, + std::unique_ptr TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter_) - : MCStreamer(Context), - Assembler(new MCAssembler(Context, TAB, *Emitter_, - *TAB.createObjectWriter(OS))), + std::unique_ptr Emitter) + : MCStreamer(Context), ObjectWriter(TAB->createObjectWriter(OS)), + TAB(std::move(TAB)), Emitter(std::move(Emitter)), + Assembler(llvm::make_unique(Context, *this->TAB, + *this->Emitter, *ObjectWriter)), EmitEHFrame(true), EmitDebugFrame(false) {} -MCObjectStreamer::~MCObjectStreamer() { - delete &Assembler->getBackend(); - delete &Assembler->getEmitter(); - delete &Assembler->getWriter(); - delete Assembler; -} +MCObjectStreamer::~MCObjectStreamer() {} void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) { if (PendingLabels.empty()) @@ -111,6 +108,16 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() { return F; } +MCPaddingFragment *MCObjectStreamer::getOrCreatePaddingFragment() { + MCPaddingFragment *F = + dyn_cast_or_null(getCurrentFragment()); + if (!F) { + F = new MCPaddingFragment(); + insert(F); + } + return F; +} + void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) { Assembler->registerSymbol(Sym); } @@ -147,6 +154,12 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, DF->getContents().resize(DF->getContents().size() + Size, 0); } +MCSymbol *MCObjectStreamer::EmitCFILabel() { + MCSymbol *Label = getContext().createTempSymbol("cfi", true); + EmitLabel(Label); + return Label; +} + void MCObjectStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { // We need to create a local symbol to avoid relocations. Frame.Begin = getContext().createTempSymbol(); @@ -244,6 +257,13 @@ bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const { void MCObjectStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool) { + getAssembler().getBackend().handleCodePaddingInstructionBegin(Inst); + EmitInstructionImpl(Inst, STI); + getAssembler().getBackend().handleCodePaddingInstructionEnd(Inst); +} + +void MCObjectStreamer::EmitInstructionImpl(const MCInst &Inst, + const MCSubtargetInfo &STI) { MCStreamer::EmitInstruction(Inst, STI); MCSection *Sec = getCurrentSectionOnly(); @@ -464,6 +484,16 @@ void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset, insert(new MCOrgFragment(*Offset, Value, Loc)); } +void MCObjectStreamer::EmitCodePaddingBasicBlockStart( + const MCCodePaddingContext &Context) { + getAssembler().getBackend().handleCodePaddingBasicBlockStart(this, Context); +} + +void MCObjectStreamer::EmitCodePaddingBasicBlockEnd( + const MCCodePaddingContext &Context) { + getAssembler().getBackend().handleCodePaddingBasicBlockEnd(Context); +} + // Associate DTPRel32 fixup with data and resize data area void MCObjectStreamer::EmitDTPRel32Value(const MCExpr *Value) { MCDataFragment *DF = getOrCreateDataFragment(); diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 2b963607b8374..b83b6d3dcf6a1 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" @@ -68,7 +69,7 @@ int AsmLexer::getNextChar() { /// consumed. AsmToken AsmLexer::LexFloatLiteral() { // Skip the fractional digit sequence. - while (isdigit(*CurPtr)) + while (isDigit(*CurPtr)) ++CurPtr; // Check for exponent; we intentionally accept a slighlty wider set of @@ -78,7 +79,7 @@ AsmToken AsmLexer::LexFloatLiteral() { ++CurPtr; if (*CurPtr == '-' || *CurPtr == '+') ++CurPtr; - while (isdigit(*CurPtr)) + while (isDigit(*CurPtr)) ++CurPtr; } @@ -102,7 +103,7 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { ++CurPtr; const char *FracStart = CurPtr; - while (isxdigit(*CurPtr)) + while (isHexDigit(*CurPtr)) ++CurPtr; NoFracDigits = CurPtr == FracStart; @@ -123,7 +124,7 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { // N.b. exponent digits are *not* hex const char *ExpStart = CurPtr; - while (isdigit(*CurPtr)) + while (isDigit(*CurPtr)) ++CurPtr; if (CurPtr == ExpStart) @@ -135,15 +136,15 @@ AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { /// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* static bool IsIdentifierChar(char c, bool AllowAt) { - return isalnum(c) || c == '_' || c == '$' || c == '.' || + return isAlnum(c) || c == '_' || c == '$' || c == '.' || (c == '@' && AllowAt) || c == '?'; } AsmToken AsmLexer::LexIdentifier() { // Check for floating point literals. - if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { + if (CurPtr[-1] == '.' && isDigit(*CurPtr)) { // Disambiguate a .1243foo identifier from a floating literal. - while (isdigit(*CurPtr)) + while (isDigit(*CurPtr)) ++CurPtr; if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) @@ -244,9 +245,9 @@ static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { const char *FirstHex = nullptr; const char *LookAhead = CurPtr; while (true) { - if (isdigit(*LookAhead)) { + if (isDigit(*LookAhead)) { ++LookAhead; - } else if (isxdigit(*LookAhead)) { + } else if (isHexDigit(*LookAhead)) { if (!FirstHex) FirstHex = LookAhead; ++LookAhead; @@ -282,7 +283,7 @@ AsmToken AsmLexer::LexDigit() { const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ? CurPtr - 1 : nullptr; const char *OldCurPtr = CurPtr; - while (isxdigit(*CurPtr)) { + while (isHexDigit(*CurPtr)) { if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary) FirstNonBinary = CurPtr; ++CurPtr; @@ -346,7 +347,7 @@ AsmToken AsmLexer::LexDigit() { if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) { ++CurPtr; // See if we actually have "0b" as part of something like "jmp 0b\n" - if (!isdigit(CurPtr[0])) { + if (!isDigit(CurPtr[0])) { --CurPtr; StringRef Result(TokStart, CurPtr - TokStart); return AsmToken(AsmToken::Integer, Result, 0); @@ -375,7 +376,7 @@ AsmToken AsmLexer::LexDigit() { if ((*CurPtr == 'x') || (*CurPtr == 'X')) { ++CurPtr; const char *NumStart = CurPtr; - while (isxdigit(CurPtr[0])) + while (isHexDigit(CurPtr[0])) ++CurPtr; // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be @@ -605,8 +606,16 @@ AsmToken AsmLexer::LexToken() { return LexToken(); // Ignore whitespace. else return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); + case '\r': { + IsAtStartOfLine = true; + IsAtStartOfStatement = true; + // If this is a CR followed by LF, treat that as one token. + if (CurPtr != CurBuf.end() && *CurPtr == '\n') + ++CurPtr; + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, CurPtr - TokStart)); + } case '\n': - case '\r': IsAtStartOfLine = true; IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 16c6d562a2b9e..2259136c6ec4c 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -503,6 +503,7 @@ class AsmParser : public MCAsmParser { DK_CV_STRINGTABLE, DK_CV_FILECHECKSUMS, DK_CV_FILECHECKSUM_OFFSET, + DK_CV_FPO_DATA, DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, @@ -538,6 +539,7 @@ class AsmParser : public MCAsmParser { DK_ERR, DK_ERROR, DK_WARNING, + DK_PRINT, DK_END }; @@ -579,6 +581,7 @@ class AsmParser : public MCAsmParser { bool parseDirectiveCVStringTable(); bool parseDirectiveCVFileChecksums(); bool parseDirectiveCVFileChecksumOffset(); + bool parseDirectiveCVFPOData(); // .cfi directives bool parseDirectiveCFIRegister(SMLoc DirectiveLoc); @@ -682,6 +685,9 @@ class AsmParser : public MCAsmParser { // ".warning" bool parseDirectiveWarning(SMLoc DirectiveLoc); + // .print + bool parseDirectivePrint(SMLoc DirectiveLoc); + void initializeDirectiveKindMap(); }; @@ -2035,6 +2041,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, return parseDirectiveCVFileChecksums(); case DK_CV_FILECHECKSUM_OFFSET: return parseDirectiveCVFileChecksumOffset(); + case DK_CV_FPO_DATA: + return parseDirectiveCVFPOData(); case DK_CFI_SECTIONS: return parseDirectiveCFISections(); case DK_CFI_STARTPROC: @@ -2130,6 +2138,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info, case DK_DS_P: case DK_DS_X: return parseDirectiveDS(IDVal, 12); + case DK_PRINT: + return parseDirectivePrint(IDLoc); } return Error(IDLoc, "unknown directive"); @@ -3611,7 +3621,6 @@ bool AsmParser::parseDirectiveCVInlineSiteId() { /// optional items are .loc sub-directives. bool AsmParser::parseDirectiveCVLoc() { SMLoc DirectiveLoc = getTok().getLoc(); - SMLoc Loc; int64_t FunctionId, FileNumber; if (parseCVFunctionId(FunctionId, ".cv_loc") || parseCVFileId(FileNumber, ".cv_loc")) @@ -3786,6 +3795,20 @@ bool AsmParser::parseDirectiveCVFileChecksumOffset() { return false; } +/// parseDirectiveCVFPOData +/// ::= .cv_fpo_data procsym +bool AsmParser::parseDirectiveCVFPOData() { + SMLoc DirLoc = getLexer().getLoc(); + StringRef ProcName; + if (parseIdentifier(ProcName)) + return TokError("expected symbol name"); + if (parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_data' directive"); + MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); + getStreamer().EmitCVFPOData(ProcSym, DirLoc); + return false; +} + /// parseDirectiveCFISections /// ::= .cfi_sections section [, section] bool AsmParser::parseDirectiveCFISections() { @@ -5169,6 +5192,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE; DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS; DirectiveKindMap[".cv_filechecksumoffset"] = DK_CV_FILECHECKSUM_OFFSET; + DirectiveKindMap[".cv_fpo_data"] = DK_CV_FPO_DATA; DirectiveKindMap[".sleb128"] = DK_SLEB128; DirectiveKindMap[".uleb128"] = DK_ULEB128; DirectiveKindMap[".cfi_sections"] = DK_CFI_SECTIONS; @@ -5228,6 +5252,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".ds.s"] = DK_DS_S; DirectiveKindMap[".ds.w"] = DK_DS_W; DirectiveKindMap[".ds.x"] = DK_DS_X; + DirectiveKindMap[".print"] = DK_PRINT; } MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { @@ -5456,6 +5481,17 @@ bool AsmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { return false; } +bool AsmParser::parseDirectivePrint(SMLoc DirectiveLoc) { + const AsmToken StrTok = getTok(); + Lex(); + if (StrTok.isNot(AsmToken::String) || StrTok.getString().front() != '"') + return Error(DirectiveLoc, "expected double quoted string after .print"); + if (parseToken(AsmToken::EndOfStatement, "expected end of statement")) + return true; + llvm::outs() << StrTok.getStringContents() << '\n'; + return false; +} + // We are comparing pointers, but the pointers are relative to a single string. // Thus, this should always be deterministic. static int rewritesSort(const AsmRewrite *AsmRewriteA, diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index b83d68d4fe206..687e0cc1faa59 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -568,7 +568,7 @@ bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) { return false; } -bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) { +bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc Loc) { StringRef SymbolID; if (getParser().parseIdentifier(SymbolID)) return true; @@ -579,29 +579,29 @@ bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) { MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitWinCFIStartProc(Symbol); + getStreamer().EmitWinCFIStartProc(Symbol, Loc); return false; } -bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc) { +bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIEndProc(); + getStreamer().EmitWinCFIEndProc(Loc); return false; } -bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc) { +bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIStartChained(); + getStreamer().EmitWinCFIStartChained(Loc); return false; } -bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc) { +bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIEndChained(); + getStreamer().EmitWinCFIEndChained(Loc); return false; } -bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc) { +bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc Loc) { StringRef SymbolID; if (getParser().parseIdentifier(SymbolID)) return true; @@ -623,17 +623,17 @@ bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc) { MCSymbol *handler = getContext().getOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitWinEHHandler(handler, unwind, except); + getStreamer().EmitWinEHHandler(handler, unwind, except, Loc); return false; } -bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc) { +bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc Loc) { Lex(); getStreamer().EmitWinEHHandlerData(); return false; } -bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc L) { +bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc Loc) { unsigned Reg = 0; if (ParseSEHRegisterNumber(Reg)) return true; @@ -642,11 +642,11 @@ bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc L) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitWinCFIPushReg(Reg); + getStreamer().EmitWinCFIPushReg(Reg, Loc); return false; } -bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) { +bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc Loc) { unsigned Reg = 0; int64_t Off; if (ParseSEHRegisterNumber(Reg)) @@ -655,39 +655,31 @@ bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) { return TokError("you must specify a stack pointer offset"); Lex(); - SMLoc startLoc = getLexer().getLoc(); if (getParser().parseAbsoluteExpression(Off)) return true; - if (Off & 0x0F) - return Error(startLoc, "offset is not a multiple of 16"); - if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitWinCFISetFrame(Reg, Off); + getStreamer().EmitWinCFISetFrame(Reg, Off, Loc); return false; } -bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc) { +bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc Loc) { int64_t Size; - SMLoc startLoc = getLexer().getLoc(); if (getParser().parseAbsoluteExpression(Size)) return true; - if (Size & 7) - return Error(startLoc, "size is not a multiple of 8"); - if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitWinCFIAllocStack(Size); + getStreamer().EmitWinCFIAllocStack(Size, Loc); return false; } -bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc L) { +bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc Loc) { unsigned Reg = 0; int64_t Off; if (ParseSEHRegisterNumber(Reg)) @@ -696,25 +688,21 @@ bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc L) { return TokError("you must specify an offset on the stack"); Lex(); - SMLoc startLoc = getLexer().getLoc(); if (getParser().parseAbsoluteExpression(Off)) return true; - if (Off & 7) - return Error(startLoc, "size is not a multiple of 8"); - if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); Lex(); // FIXME: Err on %xmm* registers - getStreamer().EmitWinCFISaveReg(Reg, Off); + getStreamer().EmitWinCFISaveReg(Reg, Off, Loc); return false; } // FIXME: This method is inherently x86-specific. It should really be in the // x86 backend. -bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc L) { +bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc Loc) { unsigned Reg = 0; int64_t Off; if (ParseSEHRegisterNumber(Reg)) @@ -723,23 +711,19 @@ bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc L) { return TokError("you must specify an offset on the stack"); Lex(); - SMLoc startLoc = getLexer().getLoc(); if (getParser().parseAbsoluteExpression(Off)) return true; if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); - if (Off & 0x0F) - return Error(startLoc, "offset is not a multiple of 16"); - Lex(); // FIXME: Err on non-%xmm* registers - getStreamer().EmitWinCFISaveXMM(Reg, Off); + getStreamer().EmitWinCFISaveXMM(Reg, Off, Loc); return false; } -bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc) { +bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc Loc) { bool Code = false; StringRef CodeID; if (getLexer().is(AsmToken::At)) { @@ -756,13 +740,13 @@ bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitWinCFIPushFrame(Code); + getStreamer().EmitWinCFIPushFrame(Code, Loc); return false; } -bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc) { +bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc Loc) { Lex(); - getStreamer().EmitWinCFIEndProlog(); + getStreamer().EmitWinCFIEndProlog(Loc); return false; } diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index a407691b0bd17..38720c23ff264 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -247,7 +247,7 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) { return false; } - while (true) { + while (!getParser().hasPendingError()) { SMLoc PrevLoc = getLexer().getLoc(); if (getLexer().is(AsmToken::Comma) || getLexer().is(AsmToken::EndOfStatement)) @@ -488,7 +488,6 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { unsigned Flags = 0; const MCExpr *Subsection = nullptr; bool UseLastGroup = false; - StringRef UniqueStr; MCSymbolELF *Associated = nullptr; int64_t UniqueID = ~0; diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp index 64ac82a6c66f3..a0c06c9d50189 100644 --- a/lib/MC/MCParser/MCTargetAsmParser.cpp +++ b/lib/MC/MCParser/MCTargetAsmParser.cpp @@ -13,8 +13,9 @@ using namespace llvm; MCTargetAsmParser::MCTargetAsmParser(MCTargetOptions const &MCOptions, - const MCSubtargetInfo &STI) - : MCOptions(MCOptions), STI(&STI) {} + const MCSubtargetInfo &STI, + const MCInstrInfo &MII) + : MCOptions(MCOptions), STI(&STI), MII(MII) {} MCTargetAsmParser::~MCTargetAsmParser() = default; diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 61f65c5f9461f..4067df0eaf57c 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -56,17 +56,12 @@ MCStreamer::MCStreamer(MCContext &Ctx) SectionStack.push_back(std::pair()); } -MCStreamer::~MCStreamer() { - for (unsigned i = 0; i < getNumWinFrameInfos(); ++i) - delete WinFrameInfos[i]; -} +MCStreamer::~MCStreamer() {} void MCStreamer::reset() { DwarfFrameInfos.clear(); - for (unsigned i = 0; i < getNumWinFrameInfos(); ++i) - delete WinFrameInfos[i]; - WinFrameInfos.clear(); CurrentWinFrameInfo = nullptr; + WinFrameInfos.clear(); SymbolOrdering.clear(); SectionStack.clear(); SectionStack.push_back(std::pair()); @@ -211,21 +206,18 @@ MCSymbol *MCStreamer::getDwarfLineTableSymbol(unsigned CUID) { return Table.getLabel(); } -MCDwarfFrameInfo *MCStreamer::getCurrentDwarfFrameInfo() { - if (DwarfFrameInfos.empty()) - return nullptr; - return &DwarfFrameInfos.back(); -} - bool MCStreamer::hasUnfinishedDwarfFrameInfo() { - MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); - return CurFrame && !CurFrame->End; + return !DwarfFrameInfos.empty() && !DwarfFrameInfos.back().End; } -void MCStreamer::EnsureValidDwarfFrame() { - MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); - if (!CurFrame || CurFrame->End) - report_fatal_error("No open frame"); +MCDwarfFrameInfo *MCStreamer::getCurrentDwarfFrameInfo() { + if (!hasUnfinishedDwarfFrameInfo()) { + getContext().reportError(SMLoc(), "this directive must appear between " + ".cfi_startproc and .cfi_endproc " + "directives"); + return nullptr; + } + return &DwarfFrameInfos.back(); } bool MCStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename, @@ -329,7 +321,8 @@ void MCStreamer::EmitCFISections(bool EH, bool Debug) { void MCStreamer::EmitCFIStartProc(bool IsSimple) { if (hasUnfinishedDwarfFrameInfo()) - report_fatal_error("Starting a frame before finishing the previous one!"); + getContext().reportError( + SMLoc(), "starting new .cfi frame before finishing the previous one"); MCDwarfFrameInfo Frame; Frame.IsSimple = IsSimple; @@ -352,247 +345,298 @@ void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { } void MCStreamer::EmitCFIEndProc() { - EnsureValidDwarfFrame(); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; EmitCFIEndProcImpl(*CurFrame); } void MCStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { // Put a dummy non-null value in Frame.End to mark that this frame has been // closed. - Frame.End = (MCSymbol *) 1; + Frame.End = (MCSymbol *)1; } MCSymbol *MCStreamer::EmitCFILabel() { - MCSymbol *Label = getContext().createTempSymbol("cfi", true); - EmitLabel(Label); - return Label; -} - -MCSymbol *MCStreamer::EmitCFICommon() { - EnsureValidDwarfFrame(); - return EmitCFILabel(); + // Return a dummy non-null value so that label fields appear filled in when + // generating textual assembly. + return (MCSymbol *)1; } void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createDefCfa(Label, Register, Offset); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); CurFrame->CurrentCfaRegister = static_cast(Register); } void MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createDefCfaOffset(Label, Offset); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createAdjustCfaOffset(Label, Adjustment); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIDefCfaRegister(int64_t Register) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createDefCfaRegister(Label, Register); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); CurFrame->CurrentCfaRegister = static_cast(Register); } void MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createOffset(Label, Register, Offset); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createRelOffset(Label, Register, Offset); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding) { - EnsureValidDwarfFrame(); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Personality = Sym; CurFrame->PersonalityEncoding = Encoding; } void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { - EnsureValidDwarfFrame(); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Lsda = Sym; CurFrame->LsdaEncoding = Encoding; } void MCStreamer::EmitCFIRememberState() { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createRememberState(Label); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIRestoreState() { // FIXME: Error if there is no matching cfi_remember_state. - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createRestoreState(Label); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFISameValue(int64_t Register) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createSameValue(Label, Register); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIRestore(int64_t Register) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createRestore(Label, Register); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIEscape(StringRef Values) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createEscape(Label, Values); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIGnuArgsSize(int64_t Size) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createGnuArgsSize(Label, Size); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFISignalFrame() { - EnsureValidDwarfFrame(); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->IsSignalFrame = true; } void MCStreamer::EmitCFIUndefined(int64_t Register) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createUndefined(Label, Register); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createRegister(Label, Register1, Register2); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIWindowSave() { - MCSymbol *Label = EmitCFICommon(); + MCSymbol *Label = EmitCFILabel(); MCCFIInstruction Instruction = MCCFIInstruction::createWindowSave(Label); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->Instructions.push_back(Instruction); } void MCStreamer::EmitCFIReturnColumn(int64_t Register) { - EnsureValidDwarfFrame(); MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo(); + if (!CurFrame) + return; CurFrame->RAReg = Register; } -void MCStreamer::EnsureValidWinFrameInfo() { +WinEH::FrameInfo *MCStreamer::EnsureValidWinFrameInfo(SMLoc Loc) { const MCAsmInfo *MAI = Context.getAsmInfo(); - if (!MAI->usesWindowsCFI()) - report_fatal_error(".seh_* directives are not supported on this target"); - if (!CurrentWinFrameInfo || CurrentWinFrameInfo->End) - report_fatal_error("No open Win64 EH frame function!"); + if (!MAI->usesWindowsCFI()) { + getContext().reportError( + Loc, ".seh_* directives are not supported on this target"); + return nullptr; + } + if (!CurrentWinFrameInfo || CurrentWinFrameInfo->End) { + getContext().reportError( + Loc, ".seh_ directive must appear within an active frame"); + return nullptr; + } + return CurrentWinFrameInfo; } -void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) { +void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) { const MCAsmInfo *MAI = Context.getAsmInfo(); if (!MAI->usesWindowsCFI()) - report_fatal_error(".seh_* directives are not supported on this target"); + return getContext().reportError( + Loc, ".seh_* directives are not supported on this target"); if (CurrentWinFrameInfo && !CurrentWinFrameInfo->End) - report_fatal_error("Starting a function before ending the previous one!"); + getContext().reportError( + Loc, "Starting a function before ending the previous one!"); MCSymbol *StartProc = EmitCFILabel(); - WinFrameInfos.push_back(new WinEH::FrameInfo(Symbol, StartProc)); - CurrentWinFrameInfo = WinFrameInfos.back(); + WinFrameInfos.emplace_back( + llvm::make_unique(Symbol, StartProc)); + CurrentWinFrameInfo = WinFrameInfos.back().get(); CurrentWinFrameInfo->TextSection = getCurrentSectionOnly(); } -void MCStreamer::EmitWinCFIEndProc() { - EnsureValidWinFrameInfo(); - if (CurrentWinFrameInfo->ChainedParent) - report_fatal_error("Not all chained regions terminated!"); +void MCStreamer::EmitWinCFIEndProc(SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; + if (CurFrame->ChainedParent) + getContext().reportError(Loc, "Not all chained regions terminated!"); MCSymbol *Label = EmitCFILabel(); - CurrentWinFrameInfo->End = Label; + CurFrame->End = Label; } -void MCStreamer::EmitWinCFIStartChained() { - EnsureValidWinFrameInfo(); +void MCStreamer::EmitWinCFIStartChained(SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; MCSymbol *StartProc = EmitCFILabel(); - WinFrameInfos.push_back(new WinEH::FrameInfo(CurrentWinFrameInfo->Function, - StartProc, CurrentWinFrameInfo)); - CurrentWinFrameInfo = WinFrameInfos.back(); + WinFrameInfos.emplace_back(llvm::make_unique( + CurFrame->Function, StartProc, CurFrame)); + CurrentWinFrameInfo = WinFrameInfos.back().get(); CurrentWinFrameInfo->TextSection = getCurrentSectionOnly(); } -void MCStreamer::EmitWinCFIEndChained() { - EnsureValidWinFrameInfo(); - if (!CurrentWinFrameInfo->ChainedParent) - report_fatal_error("End of a chained region outside a chained region!"); +void MCStreamer::EmitWinCFIEndChained(SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; + if (!CurFrame->ChainedParent) + return getContext().reportError( + Loc, "End of a chained region outside a chained region!"); MCSymbol *Label = EmitCFILabel(); - CurrentWinFrameInfo->End = Label; - CurrentWinFrameInfo = - const_cast(CurrentWinFrameInfo->ChainedParent); + CurFrame->End = Label; + CurrentWinFrameInfo = const_cast(CurFrame->ChainedParent); } -void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, - bool Except) { - EnsureValidWinFrameInfo(); - if (CurrentWinFrameInfo->ChainedParent) - report_fatal_error("Chained unwind areas can't have handlers!"); - CurrentWinFrameInfo->ExceptionHandler = Sym; +void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except, + SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; + if (CurFrame->ChainedParent) + return getContext().reportError( + Loc, "Chained unwind areas can't have handlers!"); + CurFrame->ExceptionHandler = Sym; if (!Except && !Unwind) - report_fatal_error("Don't know what kind of handler this is!"); + getContext().reportError(Loc, "Don't know what kind of handler this is!"); if (Unwind) - CurrentWinFrameInfo->HandlesUnwind = true; + CurFrame->HandlesUnwind = true; if (Except) - CurrentWinFrameInfo->HandlesExceptions = true; + CurFrame->HandlesExceptions = true; } -void MCStreamer::EmitWinEHHandlerData() { - EnsureValidWinFrameInfo(); - if (CurrentWinFrameInfo->ChainedParent) - report_fatal_error("Chained unwind areas can't have handlers!"); +void MCStreamer::EmitWinEHHandlerData(SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; + if (CurFrame->ChainedParent) + getContext().reportError(Loc, "Chained unwind areas can't have handlers!"); } static MCSection *getWinCFISection(MCContext &Context, unsigned *NextWinCFIID, @@ -629,86 +673,110 @@ MCSection *MCStreamer::getAssociatedXDataSection(const MCSection *TextSec) { void MCStreamer::EmitSyntaxDirective() {} -void MCStreamer::EmitWinCFIPushReg(unsigned Register) { - EnsureValidWinFrameInfo(); +void MCStreamer::EmitWinCFIPushReg(unsigned Register, SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; MCSymbol *Label = EmitCFILabel(); WinEH::Instruction Inst = Win64EH::Instruction::PushNonVol(Label, Register); - CurrentWinFrameInfo->Instructions.push_back(Inst); + CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset) { - EnsureValidWinFrameInfo(); - if (CurrentWinFrameInfo->LastFrameInst >= 0) - report_fatal_error("Frame register and offset already specified!"); +void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset, + SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; + if (CurFrame->LastFrameInst >= 0) + return getContext().reportError( + Loc, "frame register and offset can be set at most once"); if (Offset & 0x0F) - report_fatal_error("Misaligned frame pointer offset!"); + return getContext().reportError(Loc, "offset is not a multiple of 16"); if (Offset > 240) - report_fatal_error("Frame offset must be less than or equal to 240!"); + return getContext().reportError( + Loc, "frame offset must be less than or equal to 240"); MCSymbol *Label = EmitCFILabel(); WinEH::Instruction Inst = Win64EH::Instruction::SetFPReg(Label, Register, Offset); - CurrentWinFrameInfo->LastFrameInst = CurrentWinFrameInfo->Instructions.size(); - CurrentWinFrameInfo->Instructions.push_back(Inst); + CurFrame->LastFrameInst = CurFrame->Instructions.size(); + CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFIAllocStack(unsigned Size) { - EnsureValidWinFrameInfo(); +void MCStreamer::EmitWinCFIAllocStack(unsigned Size, SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; if (Size == 0) - report_fatal_error("Allocation size must be non-zero!"); + return getContext().reportError(Loc, + "stack allocation size must be non-zero"); if (Size & 7) - report_fatal_error("Misaligned stack allocation!"); + return getContext().reportError( + Loc, "stack allocation size is not a multiple of 8"); MCSymbol *Label = EmitCFILabel(); WinEH::Instruction Inst = Win64EH::Instruction::Alloc(Label, Size); - CurrentWinFrameInfo->Instructions.push_back(Inst); + CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset) { - EnsureValidWinFrameInfo(); +void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset, + SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; + if (Offset & 7) - report_fatal_error("Misaligned saved register offset!"); + return getContext().reportError( + Loc, "register save offset is not 8 byte aligned"); MCSymbol *Label = EmitCFILabel(); WinEH::Instruction Inst = Win64EH::Instruction::SaveNonVol(Label, Register, Offset); - CurrentWinFrameInfo->Instructions.push_back(Inst); + CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) { - EnsureValidWinFrameInfo(); +void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset, + SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; if (Offset & 0x0F) - report_fatal_error("Misaligned saved vector register offset!"); + return getContext().reportError(Loc, "offset is not a multiple of 16"); MCSymbol *Label = EmitCFILabel(); WinEH::Instruction Inst = Win64EH::Instruction::SaveXMM(Label, Register, Offset); - CurrentWinFrameInfo->Instructions.push_back(Inst); + CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFIPushFrame(bool Code) { - EnsureValidWinFrameInfo(); - if (!CurrentWinFrameInfo->Instructions.empty()) - report_fatal_error("If present, PushMachFrame must be the first UOP"); +void MCStreamer::EmitWinCFIPushFrame(bool Code, SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; + if (!CurFrame->Instructions.empty()) + return getContext().reportError( + Loc, "If present, PushMachFrame must be the first UOP"); MCSymbol *Label = EmitCFILabel(); WinEH::Instruction Inst = Win64EH::Instruction::PushMachFrame(Label, Code); - CurrentWinFrameInfo->Instructions.push_back(Inst); + CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWinCFIEndProlog() { - EnsureValidWinFrameInfo(); +void MCStreamer::EmitWinCFIEndProlog(SMLoc Loc) { + WinEH::FrameInfo *CurFrame = EnsureValidWinFrameInfo(Loc); + if (!CurFrame) + return; MCSymbol *Label = EmitCFILabel(); - CurrentWinFrameInfo->PrologEnd = Label; + CurFrame->PrologEnd = Label; } void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) { @@ -738,7 +806,9 @@ void MCStreamer::EmitWindowsUnwindTables() { void MCStreamer::Finish() { if (!DwarfFrameInfos.empty() && !DwarfFrameInfos.back().End) - report_fatal_error("Unfinished frame!"); + getContext().reportError(SMLoc(), "Unfinished frame!"); + if (!WinFrameInfos.empty() && !WinFrameInfos.back()->End) + getContext().reportError(SMLoc(), "Unfinished frame!"); MCTargetStreamer *TS = getTargetStreamer(); if (TS) diff --git a/lib/MC/MCWasmStreamer.cpp b/lib/MC/MCWasmStreamer.cpp index be8a5c21610fe..287b7cf7b23f1 100644 --- a/lib/MC/MCWasmStreamer.cpp +++ b/lib/MC/MCWasmStreamer.cpp @@ -99,6 +99,7 @@ bool MCWasmStreamer::EmitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) { case MCSA_Invalid: case MCSA_IndirectSymbol: case MCSA_Hidden: + case MCSA_Protected: return false; case MCSA_Weak: @@ -156,7 +157,7 @@ void MCWasmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, void MCWasmStreamer::EmitIdent(StringRef IdentString) { MCSection *Comment = getAssembler().getContext().getWasmSection( - ".comment", wasm::WASM_SEC_DATA); + ".comment", SectionKind::getMetadata()); PushSection(); SwitchSection(Comment); if (!SeenIdent) { @@ -200,10 +201,13 @@ void MCWasmStreamer::FinishImpl() { this->MCObjectStreamer::FinishImpl(); } -MCStreamer *llvm::createWasmStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_pwrite_stream &OS, MCCodeEmitter *CE, +MCStreamer *llvm::createWasmStreamer(MCContext &Context, + std::unique_ptr &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr &&CE, bool RelaxAll) { - MCWasmStreamer *S = new MCWasmStreamer(Context, MAB, OS, CE); + MCWasmStreamer *S = + new MCWasmStreamer(Context, std::move(MAB), OS, std::move(CE)); if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index fdc4c10cd6cef..44dd8f1385a0d 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -220,17 +220,17 @@ static void EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { void llvm::Win64EH::UnwindEmitter::Emit(MCStreamer &Streamer) const { // Emit the unwind info structs first. - for (WinEH::FrameInfo *CFI : Streamer.getWinFrameInfos()) { + for (const auto &CFI : Streamer.getWinFrameInfos()) { MCSection *XData = Streamer.getAssociatedXDataSection(CFI->TextSection); Streamer.SwitchSection(XData); - ::EmitUnwindInfo(Streamer, CFI); + ::EmitUnwindInfo(Streamer, CFI.get()); } // Now emit RUNTIME_FUNCTION entries. - for (WinEH::FrameInfo *CFI : Streamer.getWinFrameInfos()) { + for (const auto &CFI : Streamer.getWinFrameInfos()) { MCSection *PData = Streamer.getAssociatedPDataSection(CFI->TextSection); Streamer.SwitchSection(PData); - EmitRuntimeFunction(Streamer, CFI); + EmitRuntimeFunction(Streamer, CFI.get()); } } diff --git a/lib/MC/MCWinCOFFStreamer.cpp b/lib/MC/MCWinCOFFStreamer.cpp index bf341bb1f4511..7e0533b8e0073 100644 --- a/lib/MC/MCWinCOFFStreamer.cpp +++ b/lib/MC/MCWinCOFFStreamer.cpp @@ -41,9 +41,12 @@ using namespace llvm; #define DEBUG_TYPE "WinCOFFStreamer" -MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, - MCCodeEmitter &CE, raw_pwrite_stream &OS) - : MCObjectStreamer(Context, MAB, OS, &CE), CurSymbol(nullptr) {} +MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context, + std::unique_ptr MAB, + std::unique_ptr CE, + raw_pwrite_stream &OS) + : MCObjectStreamer(Context, std::move(MAB), OS, std::move(CE)), + CurSymbol(nullptr) {} void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &STI) { @@ -285,7 +288,7 @@ void MCWinCOFFStreamer::EmitIdent(StringRef IdentString) { llvm_unreachable("not implemented"); } -void MCWinCOFFStreamer::EmitWinEHHandlerData() { +void MCWinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { llvm_unreachable("not implemented"); } diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 62bf0a58fdfa9..7dbb84e166f22 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -994,8 +994,9 @@ void MachObjectWriter::writeObject(MCAssembler &Asm, } } -MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW, - raw_pwrite_stream &OS, - bool IsLittleEndian) { - return new MachObjectWriter(MOTW, OS, IsLittleEndian); +std::unique_ptr +llvm::createMachObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS, bool IsLittleEndian) { + return llvm::make_unique(std::move(MOTW), OS, + IsLittleEndian); } diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp index 6025a20a9c193..531bc930c89b7 100644 --- a/lib/MC/StringTableBuilder.cpp +++ b/lib/MC/StringTableBuilder.cpp @@ -82,32 +82,34 @@ static int charTailAt(StringPair *P, size_t Pos) { // Three-way radix quicksort. This is much faster than std::sort with strcmp // because it does not compare characters that we already know the same. -static void multikey_qsort(StringPair **Begin, StringPair **End, int Pos) { +static void multikeySort(MutableArrayRef Vec, int Pos) { tailcall: - if (End - Begin <= 1) + if (Vec.size() <= 1) return; - // Partition items. Items in [Begin, P) are greater than the pivot, - // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot. - int Pivot = charTailAt(*Begin, Pos); - StringPair **P = Begin; - StringPair **Q = End; - for (StringPair **R = Begin + 1; R < Q;) { - int C = charTailAt(*R, Pos); + // Partition items so that items in [0, I) are greater than the pivot, + // [I, J) are the same as the pivot, and [J, Vec.size()) are less than + // the pivot. + int Pivot = charTailAt(Vec[0], Pos); + size_t I = 0; + size_t J = Vec.size(); + for (size_t K = 1; K < J;) { + int C = charTailAt(Vec[K], Pos); if (C > Pivot) - std::swap(*P++, *R++); + std::swap(Vec[I++], Vec[K++]); else if (C < Pivot) - std::swap(*--Q, *R); + std::swap(Vec[--J], Vec[K]); else - R++; + K++; } - multikey_qsort(Begin, P, Pos); - multikey_qsort(Q, End, Pos); + multikeySort(Vec.slice(0, I), Pos); + multikeySort(Vec.slice(J), Pos); + + // multikeySort(Vec.slice(I, J - I), Pos + 1), but with + // tail call optimization. if (Pivot != -1) { - // qsort(P, Q, Pos + 1), but with tail call optimization. - Begin = P; - End = Q; + Vec = Vec.slice(I, J - I); ++Pos; goto tailcall; } @@ -130,12 +132,7 @@ void StringTableBuilder::finalizeStringTable(bool Optimize) { for (StringPair &P : StringIndexMap) Strings.push_back(&P); - if (!Strings.empty()) { - // If we're optimizing, sort by name. If not, sort by previously assigned - // offset. - multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0); - } - + multikeySort(Strings, 0); initSize(); StringRef Previous; diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index d7a9895af0291..44f2ba6ed7d91 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -104,6 +104,8 @@ struct WasmDataSegment { MCSectionWasm *Section; StringRef Name; uint32_t Offset; + uint32_t Alignment; + uint32_t Flags; SmallVector Data; }; @@ -225,8 +227,10 @@ class WasmObjectWriter : public MCObjectWriter { void endSection(SectionBookkeeping &Section); public: - WasmObjectWriter(MCWasmObjectTargetWriter *MOTW, raw_pwrite_stream &OS) - : MCObjectWriter(OS, /*IsLittleEndian=*/true), TargetObjectWriter(MOTW) {} + WasmObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS) + : MCObjectWriter(OS, /*IsLittleEndian=*/true), + TargetObjectWriter(std::move(MOTW)) {} private: ~WasmObjectWriter() override; @@ -282,7 +286,6 @@ class WasmObjectWriter : public MCObjectWriter { void writeDataRelocSection(); void writeLinkingMetaDataSection( ArrayRef Segments, uint32_t DataSize, - uint32_t DataAlignment, SmallVector, 4> SymbolFlags, bool HasStackPointer, uint32_t StackPointerGlobal); @@ -434,10 +437,13 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection); DEBUG(dbgs() << "WasmReloc: " << Rec << "\n"); - if (FixupSection.hasInstructions()) - CodeRelocations.push_back(Rec); - else + if (FixupSection.isWasmData()) DataRelocations.push_back(Rec); + else if (FixupSection.getKind().isText()) + CodeRelocations.push_back(Rec); + else if (!FixupSection.getKind().isMetadata()) + // TODO(sbc): Add support for debug sections. + llvm_unreachable("unexpected section type"); } // Write X as an (unsigned) LEB value at offset Offset in Stream, padded @@ -499,11 +505,11 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) { } static void addData(SmallVectorImpl &DataBytes, - MCSectionWasm &DataSection, uint32_t &DataAlignment) { - DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment())); - DataAlignment = std::max(DataAlignment, DataSection.getAlignment()); + MCSectionWasm &DataSection) { DEBUG(errs() << "addData: " << DataSection.getSectionName() << "\n"); + DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment())); + for (const MCFragment &Frag : DataSection) { if (Frag.hasInstructions()) report_fatal_error("only data supported in data sections"); @@ -914,7 +920,6 @@ void WasmObjectWriter::writeDataRelocSection() { void WasmObjectWriter::writeLinkingMetaDataSection( ArrayRef Segments, uint32_t DataSize, - uint32_t DataAlignment, SmallVector, 4> SymbolFlags, bool HasStackPointer, uint32_t StackPointerGlobal) { SectionBookkeeping Section; @@ -941,17 +946,16 @@ void WasmObjectWriter::writeLinkingMetaDataSection( startSection(SubSection, wasm::WASM_DATA_SIZE); encodeULEB128(DataSize, getStream()); endSection(SubSection); - - startSection(SubSection, wasm::WASM_DATA_ALIGNMENT); - encodeULEB128(DataAlignment, getStream()); - endSection(SubSection); } if (Segments.size()) { - startSection(SubSection, wasm::WASM_SEGMENT_NAMES); + startSection(SubSection, wasm::WASM_SEGMENT_INFO); encodeULEB128(Segments.size(), getStream()); - for (const WasmDataSegment &Segment : Segments) + for (const WasmDataSegment &Segment : Segments) { writeString(Segment.Name); + encodeULEB128(Segment.Alignment, getStream()); + encodeULEB128(Segment.Flags, getStream()); + } endSection(SubSection); } @@ -998,7 +1002,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, SmallPtrSet IsAddressTaken; unsigned NumFuncImports = 0; SmallVector DataSegments; - uint32_t DataAlignment = 1; uint32_t StackPointerGlobal = 0; uint32_t DataSize = 0; bool HasStackPointer = false; @@ -1060,7 +1063,8 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, // In the special .global_variables section, we've encoded global // variables used by the function. Translate them into the Globals // list. - MCSectionWasm *GlobalVars = Ctx.getWasmSection(".global_variables", wasm::WASM_SEC_DATA); + MCSectionWasm *GlobalVars = + Ctx.getWasmSection(".global_variables", SectionKind::getMetadata()); if (!GlobalVars->getFragmentList().empty()) { if (GlobalVars->getFragmentList().size() != 1) report_fatal_error("only one .global_variables fragment supported"); @@ -1116,7 +1120,8 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, // In the special .stack_pointer section, we've encoded the stack pointer // index. - MCSectionWasm *StackPtr = Ctx.getWasmSection(".stack_pointer", wasm::WASM_SEC_DATA); + MCSectionWasm *StackPtr = + Ctx.getWasmSection(".stack_pointer", SectionKind::getMetadata()); if (!StackPtr->getFragmentList().empty()) { if (StackPtr->getFragmentList().size() != 1) report_fatal_error("only one .stack_pointer fragment supported"); @@ -1135,7 +1140,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, for (MCSection &Sec : Asm) { auto &Section = static_cast(Sec); - if (Section.getType() != wasm::WASM_SEC_DATA) + if (!Section.isWasmData()) continue; DataSize = alignTo(DataSize, Section.getAlignment()); @@ -1144,7 +1149,9 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Segment.Name = Section.getSectionName(); Segment.Offset = DataSize; Segment.Section = &Section; - addData(Segment.Data, Section, DataAlignment); + addData(Segment.Data, Section); + Segment.Alignment = Section.getAlignment(); + Segment.Flags = 0; DataSize += Segment.Data.size(); Section.setMemoryOffset(Segment.Offset); } @@ -1308,14 +1315,18 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, writeNameSection(Functions, Imports, NumFuncImports); writeCodeRelocSection(); writeDataRelocSection(); - writeLinkingMetaDataSection(DataSegments, DataSize, DataAlignment, - SymbolFlags, HasStackPointer, StackPointerGlobal); + writeLinkingMetaDataSection(DataSegments, DataSize, SymbolFlags, + HasStackPointer, StackPointerGlobal); // TODO: Translate the .comment section to the output. // TODO: Translate debug sections to the output. } -MCObjectWriter *llvm::createWasmObjectWriter(MCWasmObjectTargetWriter *MOTW, - raw_pwrite_stream &OS) { - return new WasmObjectWriter(MOTW, OS); +std::unique_ptr +llvm::createWasmObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS) { + // FIXME: Can't use make_unique(...) as WasmObjectWriter's + // destructor is private. Is that necessary? + return std::unique_ptr( + new WasmObjectWriter(std::move(MOTW), OS)); } diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 956ae70b38d19..9f1db46939c79 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -145,7 +145,8 @@ class WinCOFFObjectWriter : public MCObjectWriter { bool UseBigObj; - WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_pwrite_stream &OS); + WinCOFFObjectWriter(std::unique_ptr MOTW, + raw_pwrite_stream &OS); void reset() override { memset(&Header, 0, sizeof(Header)); @@ -222,9 +223,9 @@ void COFFSymbol::set_name_offset(uint32_t Offset) { //------------------------------------------------------------------------------ // WinCOFFObjectWriter class implementation -WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, - raw_pwrite_stream &OS) - : MCObjectWriter(OS, true), TargetObjectWriter(MOTW) { +WinCOFFObjectWriter::WinCOFFObjectWriter( + std::unique_ptr MOTW, raw_pwrite_stream &OS) + : MCObjectWriter(OS, true), TargetObjectWriter(std::move(MOTW)) { Header.Machine = TargetObjectWriter->getMachine(); } @@ -1084,8 +1085,7 @@ void MCWinCOFFObjectTargetWriter::anchor() {} //------------------------------------------------------------------------------ // WinCOFFObjectWriter factory function -MCObjectWriter * -llvm::createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, - raw_pwrite_stream &OS) { - return new WinCOFFObjectWriter(MOTW, OS); +std::unique_ptr llvm::createWinCOFFObjectWriter( + std::unique_ptr MOTW, raw_pwrite_stream &OS) { + return llvm::make_unique(std::move(MOTW), OS); } diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp index b36b256d446d7..919e267680205 100644 --- a/lib/Object/ArchiveWriter.cpp +++ b/lib/Object/ArchiveWriter.cpp @@ -167,11 +167,11 @@ printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name, } static void -printBSDMemberHeader(raw_ostream &Out, StringRef Name, +printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name, const sys::TimePoint &ModTime, unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { - uint64_t PosAfterHeader = Out.tell() + 60 + Name.size(); + uint64_t PosAfterHeader = Pos + 60 + Name.size(); // Pad so that even 64 bit object files are aligned. unsigned Pad = OffsetToAlignment(PosAfterHeader, 8); unsigned NameWithPadding = Name.size() + Pad; @@ -179,7 +179,6 @@ printBSDMemberHeader(raw_ostream &Out, StringRef Name, printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, NameWithPadding + Size); Out << Name; - assert(PosAfterHeader == Out.tell()); while (Pad--) Out.write(uint8_t(0)); } @@ -188,21 +187,6 @@ static bool useStringTable(bool Thin, StringRef Name) { return Thin || Name.size() >= 16 || Name.contains('/'); } -static void -printMemberHeader(raw_ostream &Out, object::Archive::Kind Kind, bool Thin, - StringRef Name, - std::vector::iterator &StringMapIndexIter, - const sys::TimePoint &ModTime, - unsigned UID, unsigned GID, unsigned Perms, unsigned Size) { - if (isBSDLike(Kind)) - return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); - if (!useStringTable(Thin, Name)) - return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size); - Out << '/'; - printWithSpacePadding(Out, *StringMapIndexIter++, 15); - printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size); -} - // Compute the relative path from From to To. static std::string computeRelativePath(StringRef From, StringRef To) { if (sys::path::is_absolute(From) || sys::path::is_absolute(To)) @@ -232,41 +216,56 @@ static std::string computeRelativePath(StringRef From, StringRef To) { return Relative.str(); } -static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName, - ArrayRef Members, - std::vector &StringMapIndexes, - bool Thin) { - unsigned StartOffset = 0; - for (const NewArchiveMember &M : Members) { - StringRef Path = M.Buf->getBufferIdentifier(); - StringRef Name = M.MemberName; - if (!useStringTable(Thin, Name)) - continue; - if (StartOffset == 0) { - printWithSpacePadding(Out, "//", 58); - Out << "`\n"; - StartOffset = Out.tell(); - } - StringMapIndexes.push_back(Out.tell() - StartOffset); +static void addToStringTable(raw_ostream &Out, StringRef ArcName, + const NewArchiveMember &M, bool Thin) { + StringRef ID = M.Buf->getBufferIdentifier(); + if (Thin) { + if (M.IsNew) + Out << computeRelativePath(ArcName, ID); + else + Out << ID; + } else + Out << M.MemberName; + Out << "/\n"; +} - if (Thin) { - if (M.IsNew) - Out << computeRelativePath(ArcName, Path); - else - Out << M.Buf->getBufferIdentifier(); - } else - Out << Name; +static void printMemberHeader(raw_ostream &Out, uint64_t Pos, + raw_ostream &StringTable, + object::Archive::Kind Kind, bool Thin, + StringRef ArcName, const NewArchiveMember &M, + unsigned Size) { + if (isBSDLike(Kind)) + return printBSDMemberHeader(Out, Pos, M.MemberName, M.ModTime, M.UID, M.GID, + M.Perms, Size); + if (!useStringTable(Thin, M.MemberName)) + return printGNUSmallMemberHeader(Out, M.MemberName, M.ModTime, M.UID, M.GID, + M.Perms, Size); + Out << '/'; + uint64_t NamePos = StringTable.tell(); + addToStringTable(StringTable, ArcName, M, Thin); + printWithSpacePadding(Out, NamePos, 15); + printRestOfMemberHeader(Out, M.ModTime, M.UID, M.GID, M.Perms, Size); +} - Out << "/\n"; - } - if (StartOffset == 0) - return; - if (Out.tell() % 2) - Out << '\n'; - int Pos = Out.tell(); - Out.seek(StartOffset - 12); - printWithSpacePadding(Out, Pos - StartOffset, 10); - Out.seek(Pos); +namespace { +struct MemberData { + std::vector Symbols; + std::string Header; + StringRef Data; + StringRef Padding; +}; +} // namespace + +static MemberData computeStringTable(StringRef Names) { + unsigned Size = Names.size(); + unsigned Pad = OffsetToAlignment(Size, 2); + std::string Header; + raw_string_ostream Out(Header); + printWithSpacePadding(Out, "//", 48); + printWithSpacePadding(Out, Size + Pad, 10); + Out << "`\n"; + Out.flush(); + return {{}, std::move(Header), Names, Pad ? "\n" : ""}; } static sys::TimePoint now(bool Deterministic) { @@ -289,97 +288,137 @@ static bool isArchiveSymbol(const object::BasicSymbolRef &S) { return true; } -// Returns the offset of the first reference to a member offset. -static Expected -writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind, - ArrayRef Members, - std::vector &MemberOffsetRefs, bool Deterministic) { - unsigned HeaderStartOffset = 0; - unsigned BodyStartOffset = 0; - SmallString<128> NameBuf; - raw_svector_ostream NameOS(NameBuf); - LLVMContext Context; - for (unsigned MemberNum = 0, N = Members.size(); MemberNum < N; ++MemberNum) { - MemoryBufferRef MemberBuffer = Members[MemberNum].Buf->getMemBufferRef(); - Expected> ObjOrErr = - object::SymbolicFile::createSymbolicFile( - MemberBuffer, llvm::file_magic::unknown, &Context); - if (!ObjOrErr) { - // FIXME: check only for "not an object file" errors. - consumeError(ObjOrErr.takeError()); - continue; - } - object::SymbolicFile &Obj = *ObjOrErr.get(); - - if (!HeaderStartOffset) { - HeaderStartOffset = Out.tell(); - if (isBSDLike(Kind)) - printBSDMemberHeader(Out, "__.SYMDEF", now(Deterministic), 0, 0, 0, 0); - else - printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, 0); - BodyStartOffset = Out.tell(); - print32(Out, Kind, 0); // number of entries or bytes - } - - for (const object::BasicSymbolRef &S : Obj.symbols()) { - if (!isArchiveSymbol(S)) - continue; - - unsigned NameOffset = NameOS.tell(); - if (std::error_code EC = S.printName(NameOS)) - return errorCodeToError(EC); - NameOS << '\0'; - MemberOffsetRefs.push_back(MemberNum); - if (isBSDLike(Kind)) - print32(Out, Kind, NameOffset); - print32(Out, Kind, 0); // member offset - } - } +static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, + bool Deterministic, ArrayRef Members, + StringRef StringTable) { + if (StringTable.empty()) + return; - if (HeaderStartOffset == 0) - return 0; + unsigned NumSyms = 0; + for (const MemberData &M : Members) + NumSyms += M.Symbols.size(); - // ld64 prefers the cctools type archive which pads its string table to a - // boundary of sizeof(int32_t). + unsigned Size = 0; + Size += 4; // Number of entries if (isBSDLike(Kind)) - for (unsigned P = OffsetToAlignment(NameOS.tell(), sizeof(int32_t)); P--;) - NameOS << '\0'; - - StringRef StringTable = NameOS.str(); + Size += NumSyms * 8; // Table + else + Size += NumSyms * 4; // Table if (isBSDLike(Kind)) - print32(Out, Kind, StringTable.size()); // byte count of the string table - Out << StringTable; - // If there are no symbols, emit an empty symbol table, to satisfy Solaris - // tools, older versions of which expect a symbol table in a non-empty - // archive, regardless of whether there are any symbols in it. - if (StringTable.size() == 0) - print32(Out, Kind, 0); - + Size += 4; // byte count + Size += StringTable.size(); // ld64 expects the members to be 8-byte aligned for 64-bit content and at // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. // We do this for all bsd formats because it simplifies aligning members. unsigned Alignment = isBSDLike(Kind) ? 8 : 2; - unsigned Pad = OffsetToAlignment(Out.tell(), Alignment); - while (Pad--) - Out.write(uint8_t(0)); + unsigned Pad = OffsetToAlignment(Size, Alignment); + Size += Pad; + + if (isBSDLike(Kind)) + printBSDMemberHeader(Out, Out.tell(), "__.SYMDEF", now(Deterministic), 0, 0, + 0, Size); + else + printGNUSmallMemberHeader(Out, "", now(Deterministic), 0, 0, 0, Size); - // Patch up the size of the symbol table now that we know how big it is. - unsigned Pos = Out.tell(); - const unsigned MemberHeaderSize = 60; - Out.seek(HeaderStartOffset + 48); // offset of the size field. - printWithSpacePadding(Out, Pos - MemberHeaderSize - HeaderStartOffset, 10); + uint64_t Pos = Out.tell() + Size; - // Patch up the number of symbols. - Out.seek(BodyStartOffset); - unsigned NumSyms = MemberOffsetRefs.size(); if (isBSDLike(Kind)) print32(Out, Kind, NumSyms * 8); else print32(Out, Kind, NumSyms); - Out.seek(Pos); - return BodyStartOffset + 4; + for (const MemberData &M : Members) { + for (unsigned StringOffset : M.Symbols) { + if (isBSDLike(Kind)) + print32(Out, Kind, StringOffset); + print32(Out, Kind, Pos); // member offset + } + Pos += M.Header.size() + M.Data.size() + M.Padding.size(); + } + + if (isBSDLike(Kind)) + print32(Out, Kind, StringTable.size()); // byte count of the string table + Out << StringTable; + + while (Pad--) + Out.write(uint8_t(0)); +} + +static Expected> +getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { + std::vector Ret; + LLVMContext Context; + + Expected> ObjOrErr = + object::SymbolicFile::createSymbolicFile(Buf, llvm::file_magic::unknown, + &Context); + if (!ObjOrErr) { + // FIXME: check only for "not an object file" errors. + consumeError(ObjOrErr.takeError()); + return Ret; + } + + HasObject = true; + object::SymbolicFile &Obj = *ObjOrErr.get(); + for (const object::BasicSymbolRef &S : Obj.symbols()) { + if (!isArchiveSymbol(S)) + continue; + Ret.push_back(SymNames.tell()); + if (auto EC = S.printName(SymNames)) + return errorCodeToError(EC); + SymNames << '\0'; + } + return Ret; +} + +static Expected> +computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, + object::Archive::Kind Kind, bool Thin, StringRef ArcName, + ArrayRef NewMembers) { + static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; + + // This ignores the symbol table, but we only need the value mod 8 and the + // symbol table is aligned to be a multiple of 8 bytes + uint64_t Pos = 0; + + std::vector Ret; + bool HasObject = false; + for (const NewArchiveMember &M : NewMembers) { + std::string Header; + raw_string_ostream Out(Header); + + MemoryBufferRef Buf = M.Buf->getMemBufferRef(); + StringRef Data = Thin ? "" : Buf.getBuffer(); + + // ld64 expects the members to be 8-byte aligned for 64-bit content and at + // least 4-byte aligned for 32-bit content. Opt for the larger encoding + // uniformly. This matches the behaviour with cctools and ensures that ld64 + // is happy with archives that we generate. + unsigned MemberPadding = Kind == object::Archive::K_DARWIN + ? OffsetToAlignment(Data.size(), 8) + : 0; + unsigned TailPadding = OffsetToAlignment(Data.size() + MemberPadding, 2); + StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding); + + printMemberHeader(Out, Pos, StringTable, Kind, Thin, ArcName, M, + Buf.getBufferSize() + MemberPadding); + Out.flush(); + + Expected> Symbols = + getSymbols(Buf, SymNames, HasObject); + if (auto E = Symbols.takeError()) + return std::move(E); + + Pos += Header.size() + Data.size() + Padding.size(); + Ret.push_back({std::move(*Symbols), std::move(Header), Data, Padding}); + } + // If there are no symbols, emit an empty symbol table, to satisfy Solaris + // tools, older versions of which expect a symbol table in a non-empty + // archive, regardless of whether there are any symbols in it. + if (HasObject && SymNames.tell() == 0) + SymNames << '\0' << '\0' << '\0'; + return Ret; } Error llvm::writeArchive(StringRef ArcName, @@ -388,6 +427,21 @@ Error llvm::writeArchive(StringRef ArcName, bool Deterministic, bool Thin, std::unique_ptr OldArchiveBuf) { assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode"); + + SmallString<0> SymNamesBuf; + raw_svector_ostream SymNames(SymNamesBuf); + SmallString<0> StringTableBuf; + raw_svector_ostream StringTable(StringTableBuf); + + Expected> DataOrErr = + computeMemberData(StringTable, SymNames, Kind, Thin, ArcName, NewMembers); + if (Error E = DataOrErr.takeError()) + return E; + std::vector &Data = *DataOrErr; + + if (!StringTableBuf.empty()) + Data.insert(Data.begin(), computeStringTable(StringTableBuf)); + SmallString<128> TmpArchive; int TmpArchiveFD; if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a", @@ -401,58 +455,11 @@ Error llvm::writeArchive(StringRef ArcName, else Out << "!\n"; - std::vector MemberOffsetRefs; - - unsigned MemberReferenceOffset = 0; - if (WriteSymtab) { - Expected MemberReferenceOffsetOrErr = writeSymbolTable( - Out, Kind, NewMembers, MemberOffsetRefs, Deterministic); - if (auto E = MemberReferenceOffsetOrErr.takeError()) - return E; - MemberReferenceOffset = MemberReferenceOffsetOrErr.get(); - } - - std::vector StringMapIndexes; - if (!isBSDLike(Kind)) - writeStringTable(Out, ArcName, NewMembers, StringMapIndexes, Thin); - - std::vector::iterator StringMapIndexIter = StringMapIndexes.begin(); - std::vector MemberOffset; - for (const NewArchiveMember &M : NewMembers) { - MemoryBufferRef File = M.Buf->getMemBufferRef(); - unsigned Padding = 0; - - unsigned Pos = Out.tell(); - MemberOffset.push_back(Pos); + if (WriteSymtab) + writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf); - // ld64 expects the members to be 8-byte aligned for 64-bit content and at - // least 4-byte aligned for 32-bit content. Opt for the larger encoding - // uniformly. This matches the behaviour with cctools and ensures that ld64 - // is happy with archives that we generate. - if (Kind == object::Archive::K_DARWIN) - Padding = OffsetToAlignment(M.Buf->getBufferSize(), 8); - - printMemberHeader(Out, Kind, Thin, M.MemberName, StringMapIndexIter, - M.ModTime, M.UID, M.GID, M.Perms, - M.Buf->getBufferSize() + Padding); - - if (!Thin) - Out << File.getBuffer(); - - while (Padding--) - Out << '\n'; - if (Out.tell() % 2) - Out << '\n'; - } - - if (MemberReferenceOffset) { - Out.seek(MemberReferenceOffset); - for (unsigned MemberNum : MemberOffsetRefs) { - if (isBSDLike(Kind)) - Out.seek(Out.tell() + 4); // skip over the string offset - print32(Out, Kind, MemberOffset[MemberNum]); - } - } + for (const MemberData &M : Data) + Out << M.Header << M.Data << M.Padding; Output.keep(); Out.close(); diff --git a/lib/Object/COFFModuleDefinition.cpp b/lib/Object/COFFModuleDefinition.cpp index 510eac8b239ba..6ea6015eabcac 100644 --- a/lib/Object/COFFModuleDefinition.cpp +++ b/lib/Object/COFFModuleDefinition.cpp @@ -57,9 +57,27 @@ struct Token { }; static bool isDecorated(StringRef Sym, bool MingwDef) { - // mingw does not prepend "_". - return (!MingwDef && Sym.startswith("_")) || Sym.startswith("@") || - Sym.startswith("?"); + // In def files, the symbols can either be listed decorated or undecorated. + // + // - For cdecl symbols, only the undecorated form is allowed. + // - For fastcall and vectorcall symbols, both fully decorated or + // undecorated forms can be present. + // - For stdcall symbols in non-MinGW environments, the decorated form is + // fully decorated with leading underscore and trailing stack argument + // size - like "_Func@0". + // - In MinGW def files, a decorated stdcall symbol does not include the + // leading underscore though, like "Func@0". + + // This function controls whether a leading underscore should be added to + // the given symbol name or not. For MinGW, treat a stdcall symbol name such + // as "Func@0" as undecorated, i.e. a leading underscore must be added. + // For non-MinGW, look for '@' in the whole string and consider "_Func@0" + // as decorated, i.e. don't add any more leading underscores. + // We can't check for a leading underscore here, since function names + // themselves can start with an underscore, while a second one still needs + // to be added. + return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") || + (!MingwDef && Sym.contains('@')); } static Error createError(const Twine &Err) { diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index f88ebfc9a1d23..06ac6df79ad66 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -1590,12 +1590,12 @@ std::error_code ImportedSymbolRef::getOrdinal(uint16_t &Result) const { return std::error_code(); } -ErrorOr> +Expected> ObjectFile::createCOFFObjectFile(MemoryBufferRef Object) { std::error_code EC; std::unique_ptr Ret(new COFFObjectFile(Object, EC)); if (EC) - return EC; + return errorCodeToError(EC); return std::move(Ret); } @@ -1633,11 +1633,12 @@ std::error_code BaseRelocRef::getRVA(uint32_t &Result) const { return std::error_code(); } -#define RETURN_IF_ERROR(X) \ - if (auto EC = errorToErrorCode(X)) \ - return EC; +#define RETURN_IF_ERROR(E) \ + if (E) \ + return E; -ErrorOr> ResourceSectionRef::getDirStringAtOffset(uint32_t Offset) { +Expected> +ResourceSectionRef::getDirStringAtOffset(uint32_t Offset) { BinaryStreamReader Reader = BinaryStreamReader(BBS); Reader.setOffset(Offset); uint16_t Length; @@ -1647,12 +1648,12 @@ ErrorOr> ResourceSectionRef::getDirStringAtOffset(uint32_t Offse return RawDirString; } -ErrorOr> +Expected> ResourceSectionRef::getEntryNameString(const coff_resource_dir_entry &Entry) { return getDirStringAtOffset(Entry.Identifier.getNameOffset()); } -ErrorOr +Expected ResourceSectionRef::getTableAtOffset(uint32_t Offset) { const coff_resource_dir_table *Table = nullptr; @@ -1663,11 +1664,11 @@ ResourceSectionRef::getTableAtOffset(uint32_t Offset) { return *Table; } -ErrorOr +Expected ResourceSectionRef::getEntrySubDir(const coff_resource_dir_entry &Entry) { return getTableAtOffset(Entry.Offset.value()); } -ErrorOr ResourceSectionRef::getBaseTable() { +Expected ResourceSectionRef::getBaseTable() { return getTableAtOffset(0); } diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index 130fb2d9ecc69..2bb1fd568948b 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -9,6 +9,7 @@ #include "llvm/Object/ELF.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/Support/LEB128.h" using namespace llvm; using namespace object; @@ -210,3 +211,92 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) { return "Unknown"; } } + +template +Expected> +ELFFile::android_relas(const Elf_Shdr *Sec) const { + // This function reads relocations in Android's packed relocation format, + // which is based on SLEB128 and delta encoding. + Expected> ContentsOrErr = getSectionContents(Sec); + if (!ContentsOrErr) + return ContentsOrErr.takeError(); + const uint8_t *Cur = ContentsOrErr->begin(); + const uint8_t *End = ContentsOrErr->end(); + if (ContentsOrErr->size() < 4 || Cur[0] != 'A' || Cur[1] != 'P' || + Cur[2] != 'S' || Cur[3] != '2') + return createError("invalid packed relocation header"); + Cur += 4; + + const char *ErrStr = nullptr; + auto ReadSLEB = [&]() -> int64_t { + if (ErrStr) + return 0; + unsigned Len; + int64_t Result = decodeSLEB128(Cur, &Len, End, &ErrStr); + Cur += Len; + return Result; + }; + + uint64_t NumRelocs = ReadSLEB(); + uint64_t Offset = ReadSLEB(); + uint64_t Addend = 0; + + if (ErrStr) + return createError(ErrStr); + + std::vector Relocs; + Relocs.reserve(NumRelocs); + while (NumRelocs) { + uint64_t NumRelocsInGroup = ReadSLEB(); + if (NumRelocsInGroup > NumRelocs) + return createError("relocation group unexpectedly large"); + NumRelocs -= NumRelocsInGroup; + + uint64_t GroupFlags = ReadSLEB(); + bool GroupedByInfo = GroupFlags & ELF::RELOCATION_GROUPED_BY_INFO_FLAG; + bool GroupedByOffsetDelta = GroupFlags & ELF::RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG; + bool GroupedByAddend = GroupFlags & ELF::RELOCATION_GROUPED_BY_ADDEND_FLAG; + bool GroupHasAddend = GroupFlags & ELF::RELOCATION_GROUP_HAS_ADDEND_FLAG; + + uint64_t GroupOffsetDelta; + if (GroupedByOffsetDelta) + GroupOffsetDelta = ReadSLEB(); + + uint64_t GroupRInfo; + if (GroupedByInfo) + GroupRInfo = ReadSLEB(); + + if (GroupedByAddend && GroupHasAddend) + Addend += ReadSLEB(); + + for (uint64_t I = 0; I != NumRelocsInGroup; ++I) { + Elf_Rela R; + Offset += GroupedByOffsetDelta ? GroupOffsetDelta : ReadSLEB(); + R.r_offset = Offset; + R.r_info = GroupedByInfo ? GroupRInfo : ReadSLEB(); + + if (GroupHasAddend) { + if (!GroupedByAddend) + Addend += ReadSLEB(); + R.r_addend = Addend; + } else { + R.r_addend = 0; + } + + Relocs.push_back(R); + + if (ErrStr) + return createError(ErrStr); + } + + if (ErrStr) + return createError(ErrStr); + } + + return Relocs; +} + +template class llvm::object::ELFFile; +template class llvm::object::ELFFile; +template class llvm::object::ELFFile; +template class llvm::object::ELFFile; diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 986eccc36e371..0aad1c89a2d8b 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -37,7 +37,16 @@ using namespace object; ELFObjectFileBase::ELFObjectFileBase(unsigned int Type, MemoryBufferRef Source) : ObjectFile(Type, Source) {} -ErrorOr> +template +static Expected>> +createPtr(MemoryBufferRef Object) { + auto Ret = ELFObjectFile::create(Object); + if (Error E = Ret.takeError()) + return std::move(E); + return make_unique>(std::move(*Ret)); +} + +Expected> ObjectFile::createELFObjectFile(MemoryBufferRef Obj) { std::pair Ident = getElfArchType(Obj.getBuffer()); @@ -45,31 +54,24 @@ ObjectFile::createELFObjectFile(MemoryBufferRef Obj) { 1ULL << countTrailingZeros(uintptr_t(Obj.getBufferStart())); if (MaxAlignment < 2) - return object_error::parse_failed; + return createError("Insufficient alignment"); - std::error_code EC; - std::unique_ptr R; if (Ident.first == ELF::ELFCLASS32) { if (Ident.second == ELF::ELFDATA2LSB) - R.reset(new ELFObjectFile>(Obj, EC)); + return createPtr(Obj); else if (Ident.second == ELF::ELFDATA2MSB) - R.reset(new ELFObjectFile>(Obj, EC)); + return createPtr(Obj); else - return object_error::parse_failed; + return createError("Invalid ELF data"); } else if (Ident.first == ELF::ELFCLASS64) { if (Ident.second == ELF::ELFDATA2LSB) - R.reset(new ELFObjectFile>(Obj, EC)); + return createPtr(Obj); else if (Ident.second == ELF::ELFDATA2MSB) - R.reset(new ELFObjectFile>(Obj, EC)); + return createPtr(Obj); else - return object_error::parse_failed; - } else { - return object_error::parse_failed; + return createError("Invalid ELF data"); } - - if (EC) - return EC; - return std::move(R); + return createError("Invalid ELF class"); } SubtargetFeatures ELFObjectFileBase::getMIPSFeatures() const { diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp index e7807b0383351..ed6d6b1cb4e36 100644 --- a/lib/Object/IRObjectFile.cpp +++ b/lib/Object/IRObjectFile.cpp @@ -82,20 +82,22 @@ StringRef IRObjectFile::getTargetTriple() const { return Mods[0]->getTargetTriple(); } -ErrorOr IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) { +Expected +IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) { for (const SectionRef &Sec : Obj.sections()) { if (Sec.isBitcode()) { StringRef SecContents; if (std::error_code EC = Sec.getContents(SecContents)) - return EC; + return errorCodeToError(EC); return MemoryBufferRef(SecContents, Obj.getFileName()); } } - return object_error::bitcode_section_not_found; + return errorCodeToError(object_error::bitcode_section_not_found); } -ErrorOr IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { +Expected +IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) { file_magic Type = identify_magic(Object.getBuffer()); switch (Type) { case file_magic::bitcode: @@ -106,19 +108,19 @@ ErrorOr IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Ob Expected> ObjFile = ObjectFile::createObjectFile(Object, Type); if (!ObjFile) - return errorToErrorCode(ObjFile.takeError()); + return ObjFile.takeError(); return findBitcodeInObject(*ObjFile->get()); } default: - return object_error::invalid_file_type; + return errorCodeToError(object_error::invalid_file_type); } } Expected> IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) { - ErrorOr BCOrErr = findBitcodeInMemBuffer(Object); + Expected BCOrErr = findBitcodeInMemBuffer(Object); if (!BCOrErr) - return errorCodeToError(BCOrErr.getError()); + return BCOrErr.takeError(); Expected> BMsOrErr = getBitcodeModuleList(*BCOrErr); @@ -142,10 +144,10 @@ IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) { Expected object::readIRSymtab(MemoryBufferRef MBRef) { IRSymtabFile F; - ErrorOr BCOrErr = + Expected BCOrErr = IRObjectFile::findBitcodeInMemBuffer(MBRef); if (!BCOrErr) - return errorCodeToError(BCOrErr.getError()); + return BCOrErr.takeError(); Expected BFCOrErr = getBitcodeFileContents(*BCOrErr); if (!BFCOrErr) diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 7b4be973a097d..4620fdde81d2f 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -477,8 +477,8 @@ static Error checkDysymtabCommand(const MachOObjectFile &Obj, "the file"); if (Error Err = checkOverlappingElement(Elements, Dysymtab.tocoff, Dysymtab.ntoc * sizeof(struct - MachO::dylib_table_of_contents), - "table of contents")) + MachO::dylib_table_of_contents), + "table of contents")) return Err; if (Dysymtab.modtaboff > FileSize) return malformedError("modtaboff field of LC_DYSYMTAB command " + @@ -537,7 +537,7 @@ static Error checkDysymtabCommand(const MachOObjectFile &Obj, if (Error Err = checkOverlappingElement(Elements, Dysymtab.indirectsymoff, Dysymtab.nindirectsyms * sizeof(uint32_t), - "indirect table")) + "indirect table")) return Err; if (Dysymtab.extreloff > FileSize) return malformedError("extreloff field of LC_DYSYMTAB command " + @@ -1115,7 +1115,7 @@ static Error checkTwoLevelHintsCommand(const MachOObjectFile &Obj, Twine(LoadCommandIndex) + " extends past the end of " "the file"); uint64_t BigSize = Hints.nhints; - BigSize *= Hints.nhints * sizeof(MachO::twolevel_hint); + BigSize *= sizeof(MachO::twolevel_hint); BigSize += Hints.offset; if (BigSize > FileSize) return malformedError("offset field plus nhints times sizeof(struct " diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 2b80d0bf538fb..652a2b2497efe 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -125,7 +125,7 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) { case file_magic::elf_executable: case file_magic::elf_shared_object: case file_magic::elf_core: - return errorOrToExpected(createELFObjectFile(Object)); + return createELFObjectFile(Object); case file_magic::macho_object: case file_magic::macho_executable: case file_magic::macho_fixed_virtual_memory_shared_lib: @@ -141,7 +141,7 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type) { case file_magic::coff_object: case file_magic::coff_import_library: case file_magic::pecoff_executable: - return errorOrToExpected(createCOFFObjectFile(Object)); + return createCOFFObjectFile(Object); case file_magic::wasm_object: return createWasmObjectFile(Object); } diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp index 1042d29d2350c..2e7f2cc0d1d93 100644 --- a/lib/Object/SymbolicFile.cpp +++ b/lib/Object/SymbolicFile.cpp @@ -80,10 +80,12 @@ SymbolicFile::createSymbolicFile(MemoryBufferRef Object, file_magic Type, if (!Obj || !Context) return std::move(Obj); - ErrorOr BCData = + Expected BCData = IRObjectFile::findBitcodeInObject(*Obj->get()); - if (!BCData) + if (!BCData) { + consumeError(BCData.takeError()); return std::move(Obj); + } return IRObjectFile::create( MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()), diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index 1954335eeeb4c..86ce9c2209c26 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -178,14 +178,16 @@ static wasm::WasmTable readTable(const uint8_t *&Ptr) { } static Error readSection(WasmSection &Section, const uint8_t *&Ptr, - const uint8_t *Start) { - // TODO(sbc): Avoid reading past EOF in the case of malformed files. + const uint8_t *Start, const uint8_t *Eof) { Section.Offset = Ptr - Start; Section.Type = readVaruint7(Ptr); uint32_t Size = readVaruint32(Ptr); if (Size == 0) return make_error("Zero length section", object_error::parse_failed); + if (Ptr + Size > Eof) + return make_error("Section too large", + object_error::parse_failed); Section.Content = ArrayRef(Ptr, Size); Ptr += Size; return Error::success(); @@ -193,7 +195,6 @@ static Error readSection(WasmSection &Section, const uint8_t *&Ptr, WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) : ObjectFile(Binary::ID_Wasm, Buffer) { - LinkingData.DataAlignment = 0; LinkingData.DataSize = 0; ErrorAsOutParameter ErrAsOutParam(&Err); @@ -222,7 +223,7 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err) WasmSection Sec; while (Ptr < Eof) { - if ((Err = readSection(Sec, Ptr, getPtr(0)))) + if ((Err = readSection(Sec, Ptr, getPtr(0), Eof))) return; if ((Err = parseSection(Sec))) return; @@ -385,16 +386,16 @@ Error WasmObjectFile::parseLinkingSection(const uint8_t *Ptr, case wasm::WASM_DATA_SIZE: LinkingData.DataSize = readVaruint32(Ptr); break; - case wasm::WASM_DATA_ALIGNMENT: - LinkingData.DataAlignment = readVaruint32(Ptr); - break; - case wasm::WASM_SEGMENT_NAMES: { + case wasm::WASM_SEGMENT_INFO: { uint32_t Count = readVaruint32(Ptr); if (Count > DataSegments.size()) return make_error("Too many segment names", object_error::parse_failed); - for (uint32_t i = 0; i < Count; i++) + for (uint32_t i = 0; i < Count; i++) { DataSegments[i].Data.Name = readString(Ptr); + DataSegments[i].Data.Alignment = readVaruint32(Ptr); + DataSegments[i].Data.Flags = readVaruint32(Ptr); + } break; } case wasm::WASM_STACK_POINTER: @@ -734,6 +735,8 @@ Error WasmObjectFile::parseDataSection(const uint8_t *Ptr, const uint8_t *End) { return Err; uint32_t Size = readVaruint32(Ptr); Segment.Data.Content = ArrayRef(Ptr, Size); + Segment.Data.Alignment = 0; + Segment.Data.Flags = 0; Segment.SectionOffset = Ptr - Start; Ptr += Size; DataSegments.push_back(Segment); diff --git a/lib/ObjectYAML/ELFYAML.cpp b/lib/ObjectYAML/ELFYAML.cpp index d6123499a222e..b19a57585a30a 100644 --- a/lib/ObjectYAML/ELFYAML.cpp +++ b/lib/ObjectYAML/ELFYAML.cpp @@ -246,7 +246,6 @@ void ScalarEnumerationTraits::enumeration( ECase(ELFOSABI_HPUX); ECase(ELFOSABI_NETBSD); ECase(ELFOSABI_GNU); - ECase(ELFOSABI_GNU); ECase(ELFOSABI_HURD); ECase(ELFOSABI_SOLARIS); ECase(ELFOSABI_AIX); @@ -260,10 +259,12 @@ void ScalarEnumerationTraits::enumeration( ECase(ELFOSABI_AROS); ECase(ELFOSABI_FENIXOS); ECase(ELFOSABI_CLOUDABI); - ECase(ELFOSABI_C6000_ELFABI); ECase(ELFOSABI_AMDGPU_HSA); - ECase(ELFOSABI_C6000_LINUX); + ECase(ELFOSABI_AMDGPU_PAL); + ECase(ELFOSABI_AMDGPU_MESA3D); ECase(ELFOSABI_ARM); + ECase(ELFOSABI_C6000_ELFABI); + ECase(ELFOSABI_C6000_LINUX); ECase(ELFOSABI_STANDALONE); #undef ECase } @@ -359,7 +360,18 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(EF_AVR_ARCH_XMEGA6); BCase(EF_AVR_ARCH_XMEGA7); break; + case ELF::EM_RISCV: + BCase(EF_RISCV_RVC); + BCaseMask(EF_RISCV_FLOAT_ABI_SOFT, EF_RISCV_FLOAT_ABI); + BCaseMask(EF_RISCV_FLOAT_ABI_SINGLE, EF_RISCV_FLOAT_ABI); + BCaseMask(EF_RISCV_FLOAT_ABI_DOUBLE, EF_RISCV_FLOAT_ABI); + BCaseMask(EF_RISCV_FLOAT_ABI_QUAD, EF_RISCV_FLOAT_ABI); + BCase(EF_RISCV_RVE); + break; case ELF::EM_AMDGPU: + BCaseMask(EF_AMDGPU_ARCH_R600, EF_AMDGPU_ARCH); + BCaseMask(EF_AMDGPU_ARCH_GCN, EF_AMDGPU_ARCH); + break; case ELF::EM_X86_64: break; default: @@ -393,6 +405,8 @@ void ScalarEnumerationTraits::enumeration( ECase(SHT_GROUP); ECase(SHT_SYMTAB_SHNDX); ECase(SHT_LOOS); + ECase(SHT_ANDROID_REL); + ECase(SHT_ANDROID_RELA); ECase(SHT_LLVM_ODRTAB); ECase(SHT_GNU_ATTRIBUTES); ECase(SHT_GNU_HASH); @@ -450,6 +464,7 @@ void ScalarBitSetTraits::bitset(IO &IO, BCase(SHF_OS_NONCONFORMING); BCase(SHF_GROUP); BCase(SHF_TLS); + BCase(SHF_COMPRESSED); switch (Object->Header.Machine) { case ELF::EM_ARM: BCase(SHF_ARM_PURECODE); diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp index a5c1d13598c02..2f961cf68a04b 100644 --- a/lib/ObjectYAML/WasmYAML.cpp +++ b/lib/ObjectYAML/WasmYAML.cpp @@ -58,9 +58,8 @@ static void sectionMapping(IO &IO, WasmYAML::LinkingSection &Section) { commonSectionMapping(IO, Section); IO.mapRequired("Name", Section.Name); IO.mapRequired("DataSize", Section.DataSize); - IO.mapRequired("DataAlignment", Section.DataAlignment); IO.mapOptional("SymbolInfo", Section.SymbolInfos); - IO.mapOptional("SegmentNames", Section.SegmentNames); + IO.mapOptional("SegmentInfo", Section.SegmentInfos); } static void sectionMapping(IO &IO, WasmYAML::CustomSection &Section) { @@ -266,6 +265,14 @@ void MappingTraits::mapping( IO.mapRequired("Name", NameEntry.Name); } +void MappingTraits::mapping( + IO &IO, WasmYAML::SegmentInfo &SegmentInfo) { + IO.mapRequired("Index", SegmentInfo.Index); + IO.mapRequired("Name", SegmentInfo.Name); + IO.mapRequired("Alignment", SegmentInfo.Alignment); + IO.mapRequired("Flags", SegmentInfo.Flags); +} + void MappingTraits::mapping( IO &IO, WasmYAML::LocalDecl &LocalDecl) { IO.mapRequired("Type", LocalDecl.Type); diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index c277b5b14e750..8796ff56e5e1f 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -41,7 +41,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/RegionInfo.h" @@ -63,6 +63,7 @@ #include "llvm/Transforms/GCOVProfiler.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" +#include "llvm/Transforms/IPO/CalledValuePropagation.h" #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/Transforms/IPO/CrossDSOCFI.h" #include "llvm/Transforms/IPO/DeadArgumentElimination.h" @@ -362,6 +363,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, invokePeepholeEPCallbacks(FPM, Level); + // For PGO use pipeline, try to optimize memory intrinsics such as memcpy + // using the size value profile. Don't perform this when optimizing for size. + if (PGOOpt && !PGOOpt->ProfileUseFile.empty() && + !isOptimizingForSize(Level)) + FPM.addPass(PGOMemOPSizeOpt()); + FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); @@ -555,7 +562,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); + MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + Phase == ThinLTOPhase::PreLink)); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the ThinLTO backend. if (Phase != ThinLTOPhase::PreLink) @@ -573,6 +581,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // years, it should be re-analyzed. MPM.addPass(IPSCCPPass()); + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); + // Optimize globals to try and fold them into constants. MPM.addPass(GlobalOptPass()); @@ -669,6 +681,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // Optimize globals now that the module is fully simplified. MPM.addPass(GlobalOptPass()); + MPM.addPass(GlobalDCEPass()); // Run partial inlining pass to partially inline functions that have // large bodies. @@ -913,6 +926,10 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. MPM.addPass(IPSCCPPass()); + + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); } // Now deduce any function attributes based in the current code. diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def index bfe3dd782c1e0..20d1220ac3301 100644 --- a/lib/Passes/PassRegistry.def +++ b/lib/Passes/PassRegistry.def @@ -39,6 +39,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA()) #define MODULE_PASS(NAME, CREATE_PASS) #endif MODULE_PASS("always-inline", AlwaysInlinerPass()) +MODULE_PASS("called-value-propagation", CalledValuePropagationPass()) MODULE_PASS("constmerge", ConstantMergePass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) diff --git a/lib/ProfileData/Coverage/CoverageMapping.cpp b/lib/ProfileData/Coverage/CoverageMapping.cpp index 52f9447aa3e7b..ce9322969971f 100644 --- a/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -296,7 +297,7 @@ namespace { /// An instantiation set is a collection of functions that have the same source /// code, ie, template functions specializations. class FunctionInstantiationSetCollector { - using MapT = DenseMap>; + using MapT = std::map>; MapT InstantiatedFunctions; public: @@ -671,6 +672,59 @@ CoverageData CoverageMapping::getCoverageForExpansion( return ExpansionCoverage; } +LineCoverageStats::LineCoverageStats( + ArrayRef LineSegments, + const CoverageSegment *WrappedSegment, unsigned Line) + : ExecutionCount(0), HasMultipleRegions(false), Mapped(false), Line(Line), + LineSegments(LineSegments), WrappedSegment(WrappedSegment) { + // Find the minimum number of regions which start in this line. + unsigned MinRegionCount = 0; + auto isStartOfRegion = [](const CoverageSegment *S) { + return !S->IsGapRegion && S->HasCount && S->IsRegionEntry; + }; + for (unsigned I = 0; I < LineSegments.size() && MinRegionCount < 2; ++I) + if (isStartOfRegion(LineSegments[I])) + ++MinRegionCount; + + bool StartOfSkippedRegion = !LineSegments.empty() && + !LineSegments.front()->HasCount && + LineSegments.front()->IsRegionEntry; + + HasMultipleRegions = MinRegionCount > 1; + Mapped = + !StartOfSkippedRegion && + ((WrappedSegment && WrappedSegment->HasCount) || (MinRegionCount > 0)); + + if (!Mapped) + return; + + // Pick the max count from the non-gap, region entry segments. If there + // aren't any, use the wrapped count. + if (!MinRegionCount) { + ExecutionCount = WrappedSegment->Count; + return; + } + for (const auto *LS : LineSegments) + if (isStartOfRegion(LS)) + ExecutionCount = std::max(ExecutionCount, LS->Count); +} + +LineCoverageIterator &LineCoverageIterator::operator++() { + if (Next == CD.end()) { + Stats = LineCoverageStats(); + Ended = true; + return *this; + } + if (Segments.size()) + WrappedSegment = Segments.back(); + Segments.clear(); + while (Next != CD.end() && Next->Line == Line) + Segments.push_back(&*Next++); + Stats = LineCoverageStats(Segments, WrappedSegment, Line); + ++Line; + return *this; +} + static std::string getCoverageMapErrString(coveragemap_error Err) { switch (Err) { case coveragemap_error::success: diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index 1b39a0695aac6..cdf50c2df0c85 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -733,8 +733,6 @@ Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, } Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { - static unsigned RecordIndex = 0; - ArrayRef Data; Error E = Index->getRecords(Data); diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp index 234fe02ac8a87..1028c35e8c2da 100644 --- a/lib/ProfileData/SampleProfReader.cpp +++ b/lib/ProfileData/SampleProfReader.cpp @@ -759,8 +759,6 @@ setupMemoryBuffer(const Twine &Filename) { /// /// \param Filename The file to open. /// -/// \param Reader The reader to instantiate according to \p Filename's format. -/// /// \param C The LLVM context to use to emit diagnostics. /// /// \returns an error code indicating the status of the created reader. @@ -776,8 +774,6 @@ SampleProfileReader::create(const Twine &Filename, LLVMContext &C) { /// /// \param B The memory buffer to create the reader from (assumes ownership). /// -/// \param Reader The reader to instantiate according to \p Filename's format. -/// /// \param C The LLVM context to use to emit diagnostics. /// /// \returns an error code indicating the status of the created reader. diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp index b9d357ab15ef1..59c4885fcdbe4 100644 --- a/lib/ProfileData/SampleProfWriter.cpp +++ b/lib/ProfileData/SampleProfWriter.cpp @@ -251,8 +251,6 @@ std::error_code SampleProfileWriterBinary::write(const FunctionSamples &S) { /// /// \param Filename The file to create. /// -/// \param Writer The writer to instantiate according to the specified format. -/// /// \param Format Encoding format for the profile file. /// /// \returns an error code indicating the status of the created writer. @@ -274,8 +272,6 @@ SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) { /// /// \param OS The output stream to store the profile data to. /// -/// \param Writer The writer to instantiate according to the specified format. -/// /// \param Format Encoding format for the profile file. /// /// \returns an error code indicating the status of the created writer. diff --git a/lib/Support/AMDGPUCodeObjectMetadata.cpp b/lib/Support/AMDGPUMetadata.cpp similarity index 74% rename from lib/Support/AMDGPUCodeObjectMetadata.cpp rename to lib/Support/AMDGPUMetadata.cpp index 863093ab7def7..ec2714cfc1c55 100644 --- a/lib/Support/AMDGPUCodeObjectMetadata.cpp +++ b/lib/Support/AMDGPUMetadata.cpp @@ -1,4 +1,4 @@ -//===--- AMDGPUCodeObjectMetadata.cpp ---------------------------*- C++ -*-===// +//===--- AMDGPUMetadata.cpp -------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,17 +8,17 @@ //===----------------------------------------------------------------------===// // /// \file -/// \brief AMDGPU Code Object Metadata definitions and in-memory -/// representations. +/// \brief AMDGPU metadata definitions and in-memory representations. /// // //===----------------------------------------------------------------------===// -#include "llvm/Support/AMDGPUCodeObjectMetadata.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/YAMLTraits.h" using namespace llvm::AMDGPU; -using namespace llvm::AMDGPU::CodeObject; +using namespace llvm::AMDGPU::HSAMD; LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata) LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata) @@ -96,52 +96,58 @@ struct MappingTraits { MD.mWorkGroupSizeHint, std::vector()); YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint, MD.mVecTypeHint, std::string()); + YIO.mapOptional(Kernel::Attrs::Key::RuntimeHandle, MD.mRuntimeHandle, + std::string()); } }; template <> struct MappingTraits { static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) { + YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); + YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize); YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign); YIO.mapRequired(Kernel::Arg::Key::ValueKind, MD.mValueKind); YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType); YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign, uint32_t(0)); - YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, - AccessQualifier::Unknown); YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual, AddressSpaceQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual, + AccessQualifier::Unknown); + YIO.mapOptional(Kernel::Arg::Key::ActualAccQual, MD.mActualAccQual, + AccessQualifier::Unknown); YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false); - YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false); YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false); - YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string()); - YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string()); + YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false); } }; template <> struct MappingTraits { static void mapping(IO &YIO, Kernel::CodeProps::Metadata &MD) { - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentSize, - MD.mKernargSegmentSize, uint64_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkgroupGroupSegmentSize, - MD.mWorkgroupGroupSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemPrivateSegmentSize, - MD.mWorkitemPrivateSegmentSize, uint32_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontNumSGPRs, - MD.mWavefrontNumSGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WorkitemNumVGPRs, - MD.mWorkitemNumVGPRs, uint16_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::KernargSegmentAlign, - MD.mKernargSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::GroupSegmentAlign, - MD.mGroupSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::PrivateSegmentAlign, - MD.mPrivateSegmentAlign, uint8_t(0)); - YIO.mapOptional(Kernel::CodeProps::Key::WavefrontSize, - MD.mWavefrontSize, uint8_t(0)); + YIO.mapRequired(Kernel::CodeProps::Key::KernargSegmentSize, + MD.mKernargSegmentSize); + YIO.mapRequired(Kernel::CodeProps::Key::GroupSegmentFixedSize, + MD.mGroupSegmentFixedSize); + YIO.mapRequired(Kernel::CodeProps::Key::PrivateSegmentFixedSize, + MD.mPrivateSegmentFixedSize); + YIO.mapRequired(Kernel::CodeProps::Key::KernargSegmentAlign, + MD.mKernargSegmentAlign); + YIO.mapRequired(Kernel::CodeProps::Key::WavefrontSize, + MD.mWavefrontSize); + YIO.mapOptional(Kernel::CodeProps::Key::NumSGPRs, + MD.mNumSGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::NumVGPRs, + MD.mNumVGPRs, uint16_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::MaxFlatWorkGroupSize, + MD.mMaxFlatWorkGroupSize, uint32_t(0)); + YIO.mapOptional(Kernel::CodeProps::Key::IsDynamicCallStack, + MD.mIsDynamicCallStack, false); + YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled, + MD.mIsXNACKEnabled, false); } }; @@ -165,6 +171,7 @@ template <> struct MappingTraits { static void mapping(IO &YIO, Kernel::Metadata &MD) { YIO.mapRequired(Kernel::Key::Name, MD.mName); + YIO.mapRequired(Kernel::Key::SymbolName, MD.mSymbolName); YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string()); YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion, std::vector()); @@ -180,8 +187,8 @@ struct MappingTraits { }; template <> -struct MappingTraits { - static void mapping(IO &YIO, CodeObject::Metadata &MD) { +struct MappingTraits { + static void mapping(IO &YIO, HSAMD::Metadata &MD) { YIO.mapRequired(Key::Version, MD.mVersion); YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector()); if (!MD.mKernels.empty() || !YIO.outputting()) @@ -192,25 +199,35 @@ struct MappingTraits { } // end namespace yaml namespace AMDGPU { -namespace CodeObject { +namespace HSAMD { -/* static */ -std::error_code Metadata::fromYamlString( - std::string YamlString, Metadata &CodeObjectMetadata) { - yaml::Input YamlInput(YamlString); - YamlInput >> CodeObjectMetadata; +std::error_code fromString(std::string String, Metadata &HSAMetadata) { + yaml::Input YamlInput(String); + YamlInput >> HSAMetadata; return YamlInput.error(); } -/* static */ -std::error_code Metadata::toYamlString( - Metadata CodeObjectMetadata, std::string &YamlString) { - raw_string_ostream YamlStream(YamlString); +std::error_code toString(Metadata HSAMetadata, std::string &String) { + raw_string_ostream YamlStream(String); yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits::max()); - YamlOutput << CodeObjectMetadata; + YamlOutput << HSAMetadata; + return std::error_code(); +} + +} // end namespace HSAMD + +namespace PALMD { + +std::error_code toString(const Metadata &PALMetadata, std::string &String) { + raw_string_ostream Stream(String); + for (auto I = PALMetadata.begin(), E = PALMetadata.end(); I != E; ++I) { + Stream << Twine(I == PALMetadata.begin() ? " 0x" : ",0x"); + Stream << Twine::utohexstr(*I); + } + Stream.flush(); return std::error_code(); } -} // end namespace CodeObject +} // end namespace PALMD } // end namespace AMDGPU } // end namespace llvm diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index bed40a33704d1..56aaf10ec2cd4 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -30,7 +30,7 @@ elseif( CMAKE_HOST_UNIX ) endif( MSVC OR MINGW ) add_llvm_library(LLVMSupport - AMDGPUCodeObjectMetadata.cpp + AMDGPUMetadata.cpp APFloat.cpp APInt.cpp APSInt.cpp diff --git a/lib/Support/CachePruning.cpp b/lib/Support/CachePruning.cpp index 60d0964f27646..5a9580cf44097 100644 --- a/lib/Support/CachePruning.cpp +++ b/lib/Support/CachePruning.cpp @@ -182,19 +182,9 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { bool ShouldComputeSize = (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0); - // Keep track of space + // Keep track of space. Needs to be kept ordered by size for determinism. std::set> FileSizes; uint64_t TotalSize = 0; - // Helper to add a path to the set of files to consider for size-based - // pruning, sorted by size. - auto AddToFileListForSizePruning = - [&](StringRef Path) { - if (!ShouldComputeSize) - return; - TotalSize += FileStatus.getSize(); - FileSizes.insert( - std::make_pair(FileStatus.getSize(), std::string(Path))); - }; // Walk the entire directory cache, looking for unused files. std::error_code EC; @@ -212,13 +202,14 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { // Look at this file. If we can't stat it, there's nothing interesting // there. - if (sys::fs::status(File->path(), FileStatus)) { + ErrorOr StatusOrErr = File->status(); + if (!StatusOrErr) { DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n"); continue; } // If the file hasn't been used recently enough, delete it - const auto FileAccessTime = FileStatus.getLastAccessedTime(); + const auto FileAccessTime = StatusOrErr->getLastAccessedTime(); auto FileAge = CurrentTime - FileAccessTime; if (FileAge > Policy.Expiration) { DEBUG(dbgs() << "Remove " << File->path() << " (" @@ -228,7 +219,10 @@ bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) { } // Leave it here for now, but add it to the list of size-based pruning. - AddToFileListForSizePruning(File->path()); + if (!ShouldComputeSize) + continue; + TotalSize += StatusOrErr->getSize(); + FileSizes.insert({StatusOrErr->getSize(), std::string(File->path())}); } // Prune for size now if needed diff --git a/lib/Support/Chrono.cpp b/lib/Support/Chrono.cpp index daccaf1fc103d..a39b485bd1387 100644 --- a/lib/Support/Chrono.cpp +++ b/lib/Support/Chrono.cpp @@ -51,4 +51,44 @@ raw_ostream &operator<<(raw_ostream &OS, TimePoint<> TP) { .count())); } +void format_provider>::format(const TimePoint<> &T, raw_ostream &OS, + StringRef Style) { + using namespace std::chrono; + TimePoint Truncated = time_point_cast(T); + auto Fractional = T - Truncated; + struct tm LT = getStructTM(Truncated); + // Handle extensions first. strftime mangles unknown %x on some platforms. + if (Style.empty()) Style = "%Y-%m-%d %H:%M:%S.%N"; + std::string Format; + raw_string_ostream FStream(Format); + for (unsigned I = 0; I < Style.size(); ++I) { + if (Style[I] == '%' && Style.size() > I + 1) switch (Style[I + 1]) { + case 'L': // Milliseconds, from Ruby. + FStream << llvm::format( + "%.3lu", duration_cast(Fractional).count()); + ++I; + continue; + case 'f': // Microseconds, from Python. + FStream << llvm::format( + "%.6lu", duration_cast(Fractional).count()); + ++I; + continue; + case 'N': // Nanoseconds, from date(1). + FStream << llvm::format( + "%.6lu", duration_cast(Fractional).count()); + ++I; + continue; + case '%': // Consume %%, so %%f parses as (%%)f not %(%f) + FStream << "%%"; + ++I; + continue; + } + FStream << Style[I]; + } + FStream.flush(); + char Buffer[256]; // Should be enough for anywhen. + size_t Len = strftime(Buffer, sizeof(Buffer), Format.c_str(), <); + OS << (Len ? Buffer : "BAD-DATE-FORMAT"); +} + } // namespace llvm diff --git a/lib/Support/FormatVariadic.cpp b/lib/Support/FormatVariadic.cpp index de61dae814b57..6dd133e6c50a2 100644 --- a/lib/Support/FormatVariadic.cpp +++ b/lib/Support/FormatVariadic.cpp @@ -91,8 +91,6 @@ formatv_object_base::parseReplacementItem(StringRef Spec) { std::pair formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) { - StringRef Rep; - StringRef Remainder; std::size_t From = 0; while (From < Fmt.size() && From != StringRef::npos) { std::size_t BO = Fmt.find_first_of('{', From); diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index e5bd7e0e44ffa..b1d5b44ebd001 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -339,6 +339,7 @@ enum ProcessorTypes { AMD_BTVER1, AMD_BTVER2, AMDFAM17H, + INTEL_KNM, // Entries below this are not in libgcc/compiler-rt. INTEL_i386, INTEL_i486, @@ -759,6 +760,9 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, case 0x57: *Type = INTEL_KNL; // knl break; + case 0x85: + *Type = INTEL_KNM; // knm + break; default: // Unknown family 6 CPU, try to guess. if (Features & (1 << FEATURE_AVX512F)) { @@ -1167,6 +1171,8 @@ StringRef sys::getHostCPUName() { return "goldmont"; case INTEL_KNL: return "knl"; + case INTEL_KNM: + return "knm"; case INTEL_X86_64: return "x86-64"; case INTEL_NOCONA: diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index 3ee3af7731e6b..81f3fd0b887c8 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -201,12 +201,11 @@ LockFileManager::LockFileManager(StringRef FileName) Out.close(); if (Out.has_error()) { - // We failed to write out PID, so make up an excuse, remove the + // We failed to write out PID, so report the error, remove the // unique lock file, and fail. - auto EC = make_error_code(errc::no_space_on_device); std::string S("failed to write to "); S.append(UniqueLockFileName.str()); - setError(EC, S); + setError(Out.error(), S); sys::fs::remove(UniqueLockFileName); return; } diff --git a/lib/Support/MD5.cpp b/lib/Support/MD5.cpp index 545a64cfc7679..a531722792362 100644 --- a/lib/Support/MD5.cpp +++ b/lib/Support/MD5.cpp @@ -230,7 +230,7 @@ void MD5::update(StringRef Str) { } /// \brief Finish the hash and place the resulting hash into \p result. -/// \param result is assumed to be a minimum of 16-bytes in size. +/// \param Result is assumed to be a minimum of 16-bytes in size. void MD5::final(MD5Result &Result) { unsigned long used, free; diff --git a/lib/Support/Parallel.cpp b/lib/Support/Parallel.cpp index ab2cfdebf07d4..010e42916f957 100644 --- a/lib/Support/Parallel.cpp +++ b/lib/Support/Parallel.cpp @@ -9,6 +9,7 @@ #include "llvm/Support/Parallel.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Threading.h" #include #include @@ -70,8 +71,7 @@ Executor *Executor::getDefaultExecutor() { /// in filo order. class ThreadPoolExecutor : public Executor { public: - explicit ThreadPoolExecutor( - unsigned ThreadCount = std::thread::hardware_concurrency()) + explicit ThreadPoolExecutor(unsigned ThreadCount = hardware_concurrency()) : Done(ThreadCount) { // Spawn all but one of the threads in another thread as spawning threads // can take a while. diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index f30e8a8b0cb70..9692acb528303 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -952,11 +952,11 @@ ErrorOr md5_contents(const Twine &Path) { return Result; } -bool exists(file_status status) { +bool exists(const basic_file_status &status) { return status_known(status) && status.type() != file_type::file_not_found; } -bool status_known(file_status s) { +bool status_known(const basic_file_status &s) { return s.type() != file_type::status_error; } @@ -967,7 +967,7 @@ file_type get_file_type(const Twine &Path, bool Follow) { return st.type(); } -bool is_directory(file_status status) { +bool is_directory(const basic_file_status &status) { return status.type() == file_type::directory_file; } @@ -979,7 +979,7 @@ std::error_code is_directory(const Twine &path, bool &result) { return std::error_code(); } -bool is_regular_file(file_status status) { +bool is_regular_file(const basic_file_status &status) { return status.type() == file_type::regular_file; } @@ -991,7 +991,7 @@ std::error_code is_regular_file(const Twine &path, bool &result) { return std::error_code(); } -bool is_symlink_file(file_status status) { +bool is_symlink_file(const basic_file_status &status) { return status.type() == file_type::symlink_file; } @@ -1003,7 +1003,7 @@ std::error_code is_symlink_file(const Twine &path, bool &result) { return std::error_code(); } -bool is_other(file_status status) { +bool is_other(const basic_file_status &status) { return exists(status) && !is_regular_file(status) && !is_directory(status); @@ -1017,17 +1017,14 @@ std::error_code is_other(const Twine &Path, bool &Result) { return std::error_code(); } -void directory_entry::replace_filename(const Twine &filename, file_status st) { +void directory_entry::replace_filename(const Twine &filename, + basic_file_status st) { SmallString<128> path = path::parent_path(Path); path::append(path, filename); Path = path.str(); Status = st; } -std::error_code directory_entry::status(file_status &result) const { - return fs::status(Path, result, FollowSymlinks); -} - ErrorOr getPermissions(const Twine &Path) { file_status Status; if (std::error_code EC = status(Path, Status)) diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp index caec993ee1653..1c8cc6e83ad1a 100644 --- a/lib/Support/Process.cpp +++ b/lib/Support/Process.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Process.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include "llvm/Support/FileSystem.h" @@ -26,9 +27,14 @@ using namespace sys; //=== independent code. //===----------------------------------------------------------------------===// -Optional Process::FindInEnvPath(const std::string& EnvName, - const std::string& FileName) -{ +Optional Process::FindInEnvPath(StringRef EnvName, + StringRef FileName) { + return FindInEnvPath(EnvName, FileName, {}); +} + +Optional Process::FindInEnvPath(StringRef EnvName, + StringRef FileName, + ArrayRef IgnoreList) { assert(!path::is_absolute(FileName)); Optional FoundPath; Optional OptPath = Process::GetEnv(EnvName); @@ -39,10 +45,13 @@ Optional Process::FindInEnvPath(const std::string& EnvName, SmallVector Dirs; SplitString(OptPath.getValue(), Dirs, EnvPathSeparatorStr); - for (const auto &Dir : Dirs) { + for (StringRef Dir : Dirs) { if (Dir.empty()) continue; + if (any_of(IgnoreList, [&](StringRef S) { return fs::equivalent(S, Dir); })) + continue; + SmallString<128> FilePath(Dir); path::append(FilePath, FileName); if (fs::exists(Twine(FilePath))) { diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 47e960e243cf5..119bb871d4c0a 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -61,6 +61,7 @@ SmallPtrSetImplBase::insert_imp_big(const void *Ptr) { else ++NumNonEmpty; // Track density. *Bucket = Ptr; + incrementEpoch(); return std::make_pair(Bucket, true); } diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index b0609d4fe047c..a8f6208a558c9 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -384,6 +384,11 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors, S.changeColor(raw_ostream::BLACK, true); S << "note: "; break; + case SourceMgr::DK_Remark: + if (ShowColors) + S.changeColor(raw_ostream::BLUE, true); + S << "remark: "; + break; } if (ShowColors) { diff --git a/lib/Support/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index 25222b04119b4..a659a2afee6ae 100644 --- a/lib/Support/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -28,6 +28,11 @@ namespace llvm { bool SpecialCaseList::Matcher::insert(std::string Regexp, std::string &REError) { + if (Regexp.empty()) { + REError = "Supplied regexp was blank"; + return false; + } + if (Regex::isLiteralERE(Regexp)) { Strings.insert(Regexp); return true; diff --git a/lib/Support/ThreadPool.cpp b/lib/Support/ThreadPool.cpp index 22b7550d49714..f1b5bdf40c32b 100644 --- a/lib/Support/ThreadPool.cpp +++ b/lib/Support/ThreadPool.cpp @@ -14,14 +14,15 @@ #include "llvm/Support/ThreadPool.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #if LLVM_ENABLE_THREADS -// Default to std::thread::hardware_concurrency -ThreadPool::ThreadPool() : ThreadPool(std::thread::hardware_concurrency()) {} +// Default to hardware_concurrency +ThreadPool::ThreadPool() : ThreadPool(hardware_concurrency()) {} ThreadPool::ThreadPool(unsigned ThreadCount) : ActiveThreads(0), EnableFlag(true) { diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp index 6a10b988d4648..473c84808af16 100644 --- a/lib/Support/Threading.cpp +++ b/lib/Support/Threading.cpp @@ -47,6 +47,8 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void *), void *UserData, unsigned llvm::heavyweight_hardware_concurrency() { return 1; } +unsigned llvm::hardware_concurrency() { return 1; } + uint64_t llvm::get_threadid() { return 0; } uint32_t llvm::get_max_thread_name_length() { return 0; } @@ -71,6 +73,18 @@ unsigned llvm::heavyweight_hardware_concurrency() { return NumPhysical; } +unsigned llvm::hardware_concurrency() { +#if defined(HAVE_SCHED_GETAFFINITY) && defined(HAVE_CPU_COUNT) + cpu_set_t Set; + if (sched_getaffinity(0, sizeof(Set), &Set)) + return CPU_COUNT(&Set); +#endif + // Guard against std::thread::hardware_concurrency() returning 0. + if (unsigned Val = std::thread::hardware_concurrency()) + return Val; + return 1; +} + // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX #include "Unix/Threading.inc" diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 8dc582ab95aae..4f0a30042b763 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -207,6 +207,7 @@ StringRef Triple::getOSTypeName(OSType Kind) { case WatchOS: return "watchos"; case Mesa3D: return "mesa3d"; case Contiki: return "contiki"; + case AMDPAL: return "amdpal"; } llvm_unreachable("Invalid OSType"); @@ -234,6 +235,7 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { case AMDOpenCL: return "amdopencl"; case CoreCLR: return "coreclr"; case OpenCL: return "opencl"; + case Simulator: return "simulator"; } llvm_unreachable("Invalid EnvironmentType!"); @@ -499,6 +501,7 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("watchos", Triple::WatchOS) .StartsWith("mesa3d", Triple::Mesa3D) .StartsWith("contiki", Triple::Contiki) + .StartsWith("amdpal", Triple::AMDPAL) .Default(Triple::UnknownOS); } @@ -523,6 +526,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { .StartsWith("amdopencl", Triple::AMDOpenCL) .StartsWith("coreclr", Triple::CoreCLR) .StartsWith("opencl", Triple::OpenCL) + .StartsWith("simulator", Triple::Simulator) .Default(Triple::UnknownEnvironment); } diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index dd39ef935bf92..cf812d008d3be 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -27,7 +27,7 @@ #if defined(__mips__) # if defined(__OpenBSD__) # include -# else +# elif !defined(__FreeBSD__) # include # endif #endif diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index d0bb6a4fffbc1..781a911ed57cd 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -217,11 +217,11 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) { return ""; } -TimePoint<> file_status::getLastAccessedTime() const { +TimePoint<> basic_file_status::getLastAccessedTime() const { return toTimePoint(fs_st_atime); } -TimePoint<> file_status::getLastModificationTime() const { +TimePoint<> basic_file_status::getLastModificationTime() const { return toTimePoint(fs_st_mtime); } @@ -713,6 +713,13 @@ std::error_code detail::directory_iterator_increment(detail::DirIterState &it) { return std::error_code(); } +ErrorOr directory_entry::status() const { + file_status s; + if (auto EC = fs::status(Path, s, FollowSymlinks)) + return EC; + return s; +} + #if !defined(F_GETPATH) static bool hasProcSelfFD() { // If we have a /proc filesystem mounted, we can quickly establish the @@ -809,12 +816,11 @@ static std::error_code remove_directories_impl(const T &Entry, directory_iterator End; while (Begin != End) { auto &Item = *Begin; - file_status st; - EC = Item.status(st); - if (EC && !IgnoreErrors) - return EC; + ErrorOr st = Item.status(); + if (!st && !IgnoreErrors) + return st.getError(); - if (is_directory(st)) { + if (is_directory(*st)) { EC = remove_directories_impl(Item, IgnoreErrors); if (EC && !IgnoreErrors) return EC; diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index c54bdedbde9bc..31462633ee837 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -168,14 +168,14 @@ ErrorOr disk_space(const Twine &Path) { return SpaceInfo; } -TimePoint<> file_status::getLastAccessedTime() const { +TimePoint<> basic_file_status::getLastAccessedTime() const { FILETIME Time; Time.dwLowDateTime = LastAccessedTimeLow; Time.dwHighDateTime = LastAccessedTimeHigh; return toTimePoint(Time); } -TimePoint<> file_status::getLastModificationTime() const { +TimePoint<> basic_file_status::getLastModificationTime() const { FILETIME Time; Time.dwLowDateTime = LastWriteTimeLow; Time.dwHighDateTime = LastWriteTimeHigh; @@ -259,29 +259,32 @@ std::error_code create_hard_link(const Twine &to, const Twine &from) { std::error_code remove(const Twine &path, bool IgnoreNonExisting) { SmallVector path_utf16; - file_status ST; - if (std::error_code EC = status(path, ST)) { - if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting) - return EC; - return std::error_code(); - } - if (std::error_code ec = widenPath(path, path_utf16)) return ec; - if (ST.type() == file_type::directory_file) { - if (!::RemoveDirectoryW(c_str(path_utf16))) { - std::error_code EC = mapWindowsError(::GetLastError()); - if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting) - return EC; - } - return std::error_code(); - } - if (!::DeleteFileW(c_str(path_utf16))) { + // We don't know whether this is a file or a directory, and remove() can + // accept both. The usual way to delete a file or directory is to use one of + // the DeleteFile or RemoveDirectory functions, but that requires you to know + // which one it is. We could stat() the file to determine that, but that would + // cost us additional system calls, which can be slow in a directory + // containing a large number of files. So instead we call CreateFile directly. + // The important part is the FILE_FLAG_DELETE_ON_CLOSE flag, which causes the + // file to be deleted once it is closed. We also use the flags + // FILE_FLAG_BACKUP_SEMANTICS (which allows us to open directories), and + // FILE_FLAG_OPEN_REPARSE_POINT (don't follow symlinks). + ScopedFileHandle h(::CreateFileW( + c_str(path_utf16), DELETE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_BACKUP_SEMANTICS | + FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_DELETE_ON_CLOSE, + NULL)); + if (!h) { std::error_code EC = mapWindowsError(::GetLastError()); if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting) return EC; } + return std::error_code(); } @@ -359,65 +362,142 @@ std::error_code is_local(int FD, bool &Result) { return is_local_internal(FinalPath, Result); } -std::error_code rename(const Twine &from, const Twine &to) { - // Convert to utf-16. - SmallVector wide_from; - SmallVector wide_to; - if (std::error_code ec = widenPath(from, wide_from)) - return ec; - if (std::error_code ec = widenPath(to, wide_to)) - return ec; +static std::error_code rename_internal(HANDLE FromHandle, const Twine &To, + bool ReplaceIfExists) { + SmallVector ToWide; + if (auto EC = widenPath(To, ToWide)) + return EC; - std::error_code ec = std::error_code(); + std::vector RenameInfoBuf(sizeof(FILE_RENAME_INFO) - sizeof(wchar_t) + + (ToWide.size() * sizeof(wchar_t))); + FILE_RENAME_INFO &RenameInfo = + *reinterpret_cast(RenameInfoBuf.data()); + RenameInfo.ReplaceIfExists = ReplaceIfExists; + RenameInfo.RootDirectory = 0; + RenameInfo.FileNameLength = ToWide.size(); + std::copy(ToWide.begin(), ToWide.end(), &RenameInfo.FileName[0]); + + SetLastError(ERROR_SUCCESS); + if (!SetFileInformationByHandle(FromHandle, FileRenameInfo, &RenameInfo, + RenameInfoBuf.size())) { + unsigned Error = GetLastError(); + if (Error == ERROR_SUCCESS) + Error = ERROR_CALL_NOT_IMPLEMENTED; // Wine doesn't always set error code. + return mapWindowsError(Error); + } - // Retry while we see recoverable errors. - // System scanners (eg. indexer) might open the source file when it is written - // and closed. + return std::error_code(); +} - bool TryReplace = true; +std::error_code rename(const Twine &From, const Twine &To) { + // Convert to utf-16. + SmallVector WideFrom; + SmallVector WideTo; + if (std::error_code EC = widenPath(From, WideFrom)) + return EC; + if (std::error_code EC = widenPath(To, WideTo)) + return EC; - for (int i = 0; i < 2000; i++) { - if (i > 0) - ::Sleep(1); + ScopedFileHandle FromHandle; + // Retry this a few times to defeat badly behaved file system scanners. + for (unsigned Retry = 0; Retry != 200; ++Retry) { + if (Retry != 0) + ::Sleep(10); + FromHandle = + ::CreateFileW(WideFrom.begin(), GENERIC_READ | DELETE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (FromHandle) + break; + } + if (!FromHandle) + return mapWindowsError(GetLastError()); - if (TryReplace) { - // Try ReplaceFile first, as it is able to associate a new data stream - // with the destination even if the destination file is currently open. - if (::ReplaceFileW(wide_to.data(), wide_from.data(), NULL, 0, NULL, NULL)) + // We normally expect this loop to succeed after a few iterations. If it + // requires more than 200 tries, it's more likely that the failures are due to + // a true error, so stop trying. + for (unsigned Retry = 0; Retry != 200; ++Retry) { + auto EC = rename_internal(FromHandle, To, true); + + if (EC == + std::error_code(ERROR_CALL_NOT_IMPLEMENTED, std::system_category())) { + // Wine doesn't support SetFileInformationByHandle in rename_internal. + // Fall back to MoveFileEx. + if (::MoveFileExW(WideFrom.begin(), WideTo.begin(), + MOVEFILE_REPLACE_EXISTING)) return std::error_code(); + return mapWindowsError(GetLastError()); + } - DWORD ReplaceError = ::GetLastError(); - ec = mapWindowsError(ReplaceError); + if (!EC || EC != errc::permission_denied) + return EC; - // If ReplaceFileW returned ERROR_UNABLE_TO_MOVE_REPLACEMENT or - // ERROR_UNABLE_TO_MOVE_REPLACEMENT_2, retry but only use MoveFileExW(). - if (ReplaceError == ERROR_UNABLE_TO_MOVE_REPLACEMENT || - ReplaceError == ERROR_UNABLE_TO_MOVE_REPLACEMENT_2) { - TryReplace = false; - continue; - } - // If ReplaceFileW returned ERROR_UNABLE_TO_REMOVE_REPLACED, retry - // using ReplaceFileW(). - if (ReplaceError == ERROR_UNABLE_TO_REMOVE_REPLACED) + // The destination file probably exists and is currently open in another + // process, either because the file was opened without FILE_SHARE_DELETE or + // it is mapped into memory (e.g. using MemoryBuffer). Rename it in order to + // move it out of the way of the source file. Use FILE_FLAG_DELETE_ON_CLOSE + // to arrange for the destination file to be deleted when the other process + // closes it. + ScopedFileHandle ToHandle( + ::CreateFileW(WideTo.begin(), GENERIC_READ | DELETE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL | FILE_FLAG_DELETE_ON_CLOSE, NULL)); + if (!ToHandle) { + auto EC = mapWindowsError(GetLastError()); + // Another process might have raced with us and moved the existing file + // out of the way before we had a chance to open it. If that happens, try + // to rename the source file again. + if (EC == errc::no_such_file_or_directory) continue; - // We get ERROR_FILE_NOT_FOUND if the destination file is missing. - // MoveFileEx can handle this case. - if (ReplaceError != ERROR_ACCESS_DENIED && - ReplaceError != ERROR_FILE_NOT_FOUND && - ReplaceError != ERROR_SHARING_VIOLATION) - break; + return EC; } - if (::MoveFileExW(wide_from.begin(), wide_to.begin(), - MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)) - return std::error_code(); + BY_HANDLE_FILE_INFORMATION FI; + if (!GetFileInformationByHandle(ToHandle, &FI)) + return mapWindowsError(GetLastError()); + + // Try to find a unique new name for the destination file. + for (unsigned UniqueId = 0; UniqueId != 200; ++UniqueId) { + std::string TmpFilename = (To + ".tmp" + utostr(UniqueId)).str(); + if (auto EC = rename_internal(ToHandle, TmpFilename, false)) { + if (EC == errc::file_exists || EC == errc::permission_denied) { + // Again, another process might have raced with us and moved the file + // before we could move it. Check whether this is the case, as it + // might have caused the permission denied error. If that was the + // case, we don't need to move it ourselves. + ScopedFileHandle ToHandle2(::CreateFileW( + WideTo.begin(), 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL)); + if (!ToHandle2) { + auto EC = mapWindowsError(GetLastError()); + if (EC == errc::no_such_file_or_directory) + break; + return EC; + } + BY_HANDLE_FILE_INFORMATION FI2; + if (!GetFileInformationByHandle(ToHandle2, &FI2)) + return mapWindowsError(GetLastError()); + if (FI.nFileIndexHigh != FI2.nFileIndexHigh || + FI.nFileIndexLow != FI2.nFileIndexLow || + FI.dwVolumeSerialNumber != FI2.dwVolumeSerialNumber) + break; + continue; + } + return EC; + } + break; + } - DWORD MoveError = ::GetLastError(); - ec = mapWindowsError(MoveError); - if (MoveError != ERROR_ACCESS_DENIED) break; + // Okay, the old destination file has probably been moved out of the way at + // this point, so try to rename the source file again. Still, another + // process might have raced with us to create and open the destination + // file, so we need to keep doing this until we succeed. } - return ec; + // The most likely root cause. + return errc::permission_denied; } std::error_code resize_file(int FD, uint64_t Size) { @@ -505,6 +585,15 @@ static bool isReservedName(StringRef path) { return false; } +static file_type file_type_from_attrs(DWORD Attrs) { + return (Attrs & FILE_ATTRIBUTE_DIRECTORY) ? file_type::directory_file + : file_type::regular_file; +} + +static perms perms_from_attrs(DWORD Attrs) { + return (Attrs & FILE_ATTRIBUTE_READONLY) ? (all_read | all_exe) : all_all; +} + static std::error_code getStatus(HANDLE FileHandle, file_status &Result) { if (FileHandle == INVALID_HANDLE_VALUE) goto handle_status_error; @@ -533,22 +622,14 @@ static std::error_code getStatus(HANDLE FileHandle, file_status &Result) { if (!::GetFileInformationByHandle(FileHandle, &Info)) goto handle_status_error; - { - file_type Type = (Info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) - ? file_type::directory_file - : file_type::regular_file; - perms Permissions = (Info.dwFileAttributes & FILE_ATTRIBUTE_READONLY) - ? (all_read | all_exe) - : all_all; - Result = file_status( - Type, Permissions, Info.nNumberOfLinks, - Info.ftLastAccessTime.dwHighDateTime, - Info.ftLastAccessTime.dwLowDateTime, - Info.ftLastWriteTime.dwHighDateTime, Info.ftLastWriteTime.dwLowDateTime, - Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow, - Info.nFileIndexHigh, Info.nFileIndexLow); - return std::error_code(); - } + Result = file_status( + file_type_from_attrs(Info.dwFileAttributes), + perms_from_attrs(Info.dwFileAttributes), Info.nNumberOfLinks, + Info.ftLastAccessTime.dwHighDateTime, Info.ftLastAccessTime.dwLowDateTime, + Info.ftLastWriteTime.dwHighDateTime, Info.ftLastWriteTime.dwLowDateTime, + Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow, + Info.nFileIndexHigh, Info.nFileIndexLow); + return std::error_code(); handle_status_error: DWORD LastError = ::GetLastError(); @@ -734,6 +815,16 @@ int mapped_file_region::alignment() { return SysInfo.dwAllocationGranularity; } +static basic_file_status status_from_find_data(WIN32_FIND_DATAW *FindData) { + return basic_file_status(file_type_from_attrs(FindData->dwFileAttributes), + perms_from_attrs(FindData->dwFileAttributes), + FindData->ftLastAccessTime.dwHighDateTime, + FindData->ftLastAccessTime.dwLowDateTime, + FindData->ftLastWriteTime.dwHighDateTime, + FindData->ftLastWriteTime.dwLowDateTime, + FindData->nFileSizeHigh, FindData->nFileSizeLow); +} + std::error_code detail::directory_iterator_construct(detail::DirIterState &it, StringRef path, bool follow_symlinks) { @@ -754,7 +845,9 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it, // Get the first directory entry. WIN32_FIND_DATAW FirstFind; - ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind)); + ScopedFindHandle FindHandle(::FindFirstFileExW( + c_str(path_utf16), FindExInfoBasic, &FirstFind, FindExSearchNameMatch, + NULL, FIND_FIRST_EX_LARGE_FETCH)); if (!FindHandle) return mapWindowsError(::GetLastError()); @@ -781,7 +874,8 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &it, it.IterationHandle = intptr_t(FindHandle.take()); SmallString<128> directory_entry_path(path); path::append(directory_entry_path, directory_entry_name_utf8); - it.CurrentEntry = directory_entry(directory_entry_path, follow_symlinks); + it.CurrentEntry = directory_entry(directory_entry_path, follow_symlinks, + status_from_find_data(&FirstFind)); return std::error_code(); } @@ -817,10 +911,15 @@ std::error_code detail::directory_iterator_increment(detail::DirIterState &it) { directory_entry_path_utf8)) return ec; - it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8)); + it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8), + status_from_find_data(&FindData)); return std::error_code(); } +ErrorOr directory_entry::status() const { + return Status; +} + static std::error_code realPathFromHandle(HANDLE H, SmallVectorImpl &RealPath) { RealPath.clear(); diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index c66457ca06625..d6b958d18449b 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -517,8 +517,7 @@ raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC, /// FD is the file descriptor that this writes to. If ShouldClose is true, this /// closes the file when the stream is destroyed. raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered) - : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose), - Error(false) { + : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose) { if (FD < 0 ) { ShouldClose = false; return; @@ -552,8 +551,10 @@ raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered) raw_fd_ostream::~raw_fd_ostream() { if (FD >= 0) { flush(); - if (ShouldClose && sys::Process::SafelyCloseFileDescriptor(FD)) - error_detected(); + if (ShouldClose) { + if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) + error_detected(EC); + } } #ifdef __MINGW32__ @@ -569,7 +570,8 @@ raw_fd_ostream::~raw_fd_ostream() { // has_error() and clear the error flag with clear_error() before // destructing raw_ostream objects which may have errors. if (has_error()) - report_fatal_error("IO failure on output stream.", /*GenCrashDiag=*/false); + report_fatal_error("IO failure on output stream: " + error().message(), + /*GenCrashDiag=*/false); } void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { @@ -613,7 +615,7 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { continue; // Otherwise it's a non-recoverable error. Note it and quit. - error_detected(); + error_detected(std::error_code(errno, std::generic_category())); break; } @@ -629,8 +631,8 @@ void raw_fd_ostream::close() { assert(ShouldClose); ShouldClose = false; flush(); - if (sys::Process::SafelyCloseFileDescriptor(FD)) - error_detected(); + if (auto EC = sys::Process::SafelyCloseFileDescriptor(FD)) + error_detected(EC); FD = -1; } @@ -645,7 +647,7 @@ uint64_t raw_fd_ostream::seek(uint64_t off) { pos = ::lseek(FD, off, SEEK_SET); #endif if (pos == (uint64_t)-1) - error_detected(); + error_detected(std::error_code(errno, std::generic_category())); return pos; } diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 500632b50cdfc..ce0bce5e3ae31 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -274,14 +274,17 @@ def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", FeaturePerfMon ]>; +// Note that cyclone does not fuse AES instructions, but newer apple chips do +// perform the fusion and cyclone is used by default when targetting apple OSes. def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", "Cyclone", [ FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, FeatureCrypto, FeatureDisableLatencySchedHeuristic, FeatureFPARMv8, - FeatureArithmeticBccFusion, - FeatureArithmeticCbzFusion, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeatureSlowMisaligned128Store, diff --git a/lib/Target/AArch64/AArch64CallLowering.cpp b/lib/Target/AArch64/AArch64CallLowering.cpp index 13769a2280088..a56c7508844e1 100644 --- a/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/lib/Target/AArch64/AArch64CallLowering.cpp @@ -70,8 +70,18 @@ struct IncomingArgHandler : public CallLowering::ValueHandler { void assignValueToReg(unsigned ValVReg, unsigned PhysReg, CCValAssign &VA) override { markPhysRegUsed(PhysReg); - MIRBuilder.buildCopy(ValVReg, PhysReg); - // FIXME: assert extension + switch (VA.getLocInfo()) { + default: + MIRBuilder.buildCopy(ValVReg, PhysReg); + break; + case CCValAssign::LocInfo::SExt: + case CCValAssign::LocInfo::ZExt: + case CCValAssign::LocInfo::AExt: { + auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + break; + } + } } void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index ff9bf2a7daf98..bec872ae8c099 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1972,10 +1972,41 @@ SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first; } +// Returns true if the given Op is the overflow flag result of an overflow +// intrinsic operation. +static bool isOverflowIntrOpRes(SDValue Op) { + unsigned Opc = Op.getOpcode(); + return (Op.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)); +} + static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { SDValue Sel = Op.getOperand(0); SDValue Other = Op.getOperand(1); + SDLoc dl(Sel); + + // If the operand is an overflow checking operation, invert the condition + // code and kill the Not operation. I.e., transform: + // (xor (overflow_op_bool, 1)) + // --> + // (csel 1, 0, invert(cc), overflow_op_bool) + // ... which later gets transformed to just a cset instruction with an + // inverted condition code, rather than a cset + eor sequence. + if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0))) + return SDValue(); + SDValue TVal = DAG.getConstant(1, dl, MVT::i32); + SDValue FVal = DAG.getConstant(0, dl, MVT::i32); + AArch64CC::CondCode CC; + SDValue Value, Overflow; + std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG); + SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal, + CCVal, Overflow); + } // If neither operand is a SELECT_CC, give up. if (Sel.getOpcode() != ISD::SELECT_CC) std::swap(Sel, Other); @@ -1994,7 +2025,6 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { SDValue RHS = Sel.getOperand(1); SDValue TVal = Sel.getOperand(2); SDValue FVal = Sel.getOperand(3); - SDLoc dl(Sel); // FIXME: This could be generalized to non-integer comparisons. if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) @@ -3457,6 +3487,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, AArch64II::MO_GOT) { Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT); Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); + } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT); } else { const GlobalValue *GV = G->getGlobal(); Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); @@ -3657,11 +3691,12 @@ SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty, // (loadGOT sym) template -SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG, + unsigned Flags) const { DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n"); SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); - SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT); + SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags); // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes instead of using a wrapper node. return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr); @@ -3669,29 +3704,30 @@ SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const { // (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym)) template -SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG) - const { +SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG, + unsigned Flags) const { DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n"); SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); const unsigned char MO_NC = AArch64II::MO_NC; return DAG.getNode( - AArch64ISD::WrapperLarge, DL, Ty, - getTargetNode(N, Ty, DAG, AArch64II::MO_G3), - getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC), - getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC), - getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC)); + AArch64ISD::WrapperLarge, DL, Ty, + getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags), + getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags), + getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags), + getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags)); } // (addlow (adrp %hi(sym)) %lo(sym)) template -SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, + unsigned Flags) const { DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n"); SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); - SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE); + SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags); SDValue Lo = getTargetNode(N, Ty, DAG, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags); SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi); return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo); } @@ -3700,6 +3736,9 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); + const AArch64II::TOF TargetFlags = + (GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT + : AArch64II::MO_NO_FLAG); unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); @@ -3708,14 +3747,21 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, // This also catches the large code model case for Darwin. if ((OpFlags & AArch64II::MO_GOT) != 0) { - return getGOT(GN, DAG); + return getGOT(GN, DAG, TargetFlags); } + SDValue Result; if (getTargetMachine().getCodeModel() == CodeModel::Large) { - return getAddrLarge(GN, DAG); + Result = getAddrLarge(GN, DAG, TargetFlags); } else { - return getAddr(GN, DAG); + Result = getAddr(GN, DAG, TargetFlags); } + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc DL(GN); + if (GV->hasDLLImportStorageClass()) + Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + return Result; } /// \brief Convert a TLS address reference into the correct sequence of loads @@ -3958,10 +4004,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch // instruction. - unsigned Opc = LHS.getOpcode(); - if (LHS.getResNo() == 1 && isOneConstant(RHS) && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS)) { assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unexpected condition code."); // Only lower legal XALUO ops. @@ -4453,12 +4496,9 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, SDValue FVal = Op->getOperand(2); SDLoc DL(Op); - unsigned Opc = CCVal.getOpcode(); // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select // instruction. - if (CCVal.getResNo() == 1 && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + if (isOverflowIntrOpRes(CCVal)) { // Only lower legal XALUO ops. if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0))) return SDValue(); diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index f4e08ad165e47..dfeeabf642c58 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -538,10 +538,12 @@ class AArch64TargetLowering : public TargetLowering { unsigned Flag) const; SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const; - template SDValue getGOT(NodeTy *N, SelectionDAG &DAG) const; template - SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG) const; - template SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const; + SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; + template + SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; + template + SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index e19deb4f0d833..80c5092a4eedc 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -500,14 +500,14 @@ let DiagnosticType = "LogicalSecondSource" in { let Name = "LogicalImm64Not"; } } -def logical_imm32 : Operand, PatLeaf<(imm), [{ - return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32); +def logical_imm32 : Operand, IntImmLeaf { let PrintMethod = "printLogicalImm32"; let ParserMatchClass = LogicalImm32Operand; } -def logical_imm64 : Operand, PatLeaf<(imm), [{ - return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 64); +def logical_imm64 : Operand, IntImmLeaf { let PrintMethod = "printLogicalImm64"; let ParserMatchClass = LogicalImm64Operand; @@ -754,8 +754,8 @@ class arith_extended_reg32to64 : Operand, // Floating-point immediate. def fpimm16 : Operand, - PatLeaf<(f16 fpimm), [{ - return AArch64_AM::getFP16Imm(N->getValueAPF()) != -1; + FPImmLeafgetValueAPF(); uint32_t enc = AArch64_AM::getFP16Imm(InVal); @@ -765,8 +765,8 @@ def fpimm16 : Operand, let PrintMethod = "printFPImmOperand"; } def fpimm32 : Operand, - PatLeaf<(f32 fpimm), [{ - return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1; + FPImmLeafgetValueAPF(); uint32_t enc = AArch64_AM::getFP32Imm(InVal); @@ -776,8 +776,8 @@ def fpimm32 : Operand, let PrintMethod = "printFPImmOperand"; } def fpimm64 : Operand, - PatLeaf<(f64 fpimm), [{ - return AArch64_AM::getFP64Imm(N->getValueAPF()) != -1; + FPImmLeafgetValueAPF(); uint32_t enc = AArch64_AM::getFP64Imm(InVal); @@ -792,8 +792,8 @@ def fpimm8 : Operand { let PrintMethod = "printFPImmOperand"; } -def fpimm0 : PatLeaf<(fpimm), [{ - return N->isExactlyValue(+0.0); +def fpimm0 : FPImmLeaf; // Vector lane operands @@ -847,10 +847,9 @@ def VectorIndexD : Operand, ImmLeaf, - PatLeaf<(f64 fpimm), [{ - return AArch64_AM::isAdvSIMDModImmType10(N->getValueAPF() - .bitcastToAPInt() - .getZExtValue()); + FPImmLeafgetValueAPF(); uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() @@ -2517,6 +2516,22 @@ def am_indexed32 : ComplexPattern; def am_indexed64 : ComplexPattern; def am_indexed128 : ComplexPattern; +def gi_am_indexed8 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed16 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed32 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed64 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_am_indexed128 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; + class UImm12OffsetOperand : AsmOperandClass { let Name = "UImm12Offset" # Scale; let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">"; @@ -2588,6 +2603,23 @@ multiclass StoreUI sz, bit V, bits<2> opc, RegisterClass regtype, (!cast(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; } +// Same as StoreUI, but take a RegisterOperand. This is used by GlobalISel to +// substitute zero-registers automatically. +// +// TODO: Roll out zero-register subtitution to GPR32/GPR64 and fold this back +// into StoreUI. +multiclass StoreUIz sz, bit V, bits<2> opc, RegisterOperand regtype, + Operand indextype, string asm, list pattern> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def ui : BaseLoadStoreUI, + Sched<[WriteST]>; + + def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + def PrefetchOperand : AsmOperandClass { let Name = "Prefetch"; let ParserMethod = "tryParsePrefetch"; @@ -3147,6 +3179,23 @@ def am_unscaled32 : ComplexPattern; def am_unscaled64 : ComplexPattern; def am_unscaled128 :ComplexPattern; +def gi_am_unscaled8 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled16 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled32 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled64 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; +def gi_am_unscaled128 : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + class BaseLoadStoreUnscale sz, bit V, bits<2> opc, dag oops, dag iops, string asm, list pattern> : I { diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 1d35fb3da2bdc..9fc178292469c 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1038,6 +1038,12 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint( bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, unsigned &SrcReg2, int &CmpMask, int &CmpValue) const { + // The first operand can be a frame index where we'd normally expect a + // register. + assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands"); + if (!MI.getOperand(1).isReg()) + return false; + switch (MI.getOpcode()) { default: break; @@ -4646,13 +4652,24 @@ AArch64InstrInfo::getOutlininingCandidateInfo( FrameID); } -bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { - // If MF has a red zone, then we ought not to outline from it, since outlined - // functions can modify/read from the stack. - // If MF's address is taken, then we don't want to outline from it either - // since we don't really know what the user is doing with it. - return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone) && - !MF.getFunction()->hasAddressTaken(); +bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const { + const Function *F = MF.getFunction(); + + // If F uses a redzone, then don't outline from it because it might mess up + // the stack. + if (!F->hasFnAttribute(Attribute::NoRedZone)) + return false; + + // If anyone is using the address of this function, don't outline from it. + if (F->hasAddressTaken()) + return false; + + // Can F be deduplicated by the linker? If it can, don't outline from it. + if (!OutlineFromLinkOnceODRs && F->hasLinkOnceODRLinkage()) + return false; + + return true; } AArch64GenInstrInfo::MachineOutlinerInstrType diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 9a338b53c7a92..24758e9788860 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -352,7 +352,8 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool canOutlineWithoutLRSave(MachineBasicBlock::iterator &CallInsertionPt) const; - bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; + bool isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const override; MachineOutlinerInfo getOutlininingCandidateInfo( std::vector< std::pair> diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index cb562026997ef..eabbc05a0332e 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2249,11 +2249,11 @@ let AddedComplexity = 19 in { //--- // (unsigned immediate) -defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str", - [(store GPR64:$Rt, +defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str", + [(store GPR64z:$Rt, (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; -defm STRW : StoreUI<0b10, 0, 0b00, GPR32, uimm12s4, "str", - [(store GPR32:$Rt, +defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str", + [(store GPR32z:$Rt, (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; defm STRB : StoreUI<0b00, 1, 0b00, FPR8, uimm12s1, "str", [(store FPR8:$Rt, @@ -2269,12 +2269,12 @@ defm STRD : StoreUI<0b11, 1, 0b00, FPR64, uimm12s8, "str", (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; defm STRQ : StoreUI<0b00, 1, 0b10, FPR128, uimm12s16, "str", []>; -defm STRHH : StoreUI<0b01, 0, 0b00, GPR32, uimm12s2, "strh", - [(truncstorei16 GPR32:$Rt, +defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh", + [(truncstorei16 GPR32z:$Rt, (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; -defm STRBB : StoreUI<0b00, 0, 0b00, GPR32, uimm12s1, "strb", - [(truncstorei8 GPR32:$Rt, +defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb", + [(truncstorei8 GPR32z:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 58624f24ec0f4..e6b7dca92669c 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -64,7 +64,33 @@ class AArch64InstructionSelector : public InstructionSelector { bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const; - ComplexRendererFn selectArithImmed(MachineOperand &Root) const; + ComplexRendererFns selectArithImmed(MachineOperand &Root) const; + + ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, + unsigned Size) const; + + ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 1); + } + ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 2); + } + ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 4); + } + ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 8); + } + ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { + return selectAddrModeUnscaled(Root, 16); + } + + ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, + unsigned Size) const; + template + ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { + return selectAddrModeIndexed(Root, Width / 8); + } const AArch64TargetMachine &TM; const AArch64Subtarget &STI; @@ -705,6 +731,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { << " constant on bank: " << RB << ", expected: FPR\n"); return false; } + + // The case when we have 0.0 is covered by tablegen. Reject it here so we + // can be sure tablegen works correctly and isn't rescued by this code. + if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0)) + return false; } else { // s32 and s64 are covered by tablegen. if (Ty != p0) { @@ -1342,7 +1373,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { /// SelectArithImmed - Select an immediate value that can be represented as /// a 12-bit value shifted left by either 0 or 12. If so, return true with /// Val set to the 12-bit value and Shift set to the shifter operand. -InstructionSelector::ComplexRendererFn +InstructionSelector::ComplexRendererFns AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { MachineInstr &MI = *Root.getParent(); MachineBasicBlock &MBB = *MI.getParent(); @@ -1362,13 +1393,13 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { else if (Root.isReg()) { MachineInstr *Def = MRI.getVRegDef(Root.getReg()); if (Def->getOpcode() != TargetOpcode::G_CONSTANT) - return nullptr; + return None; MachineOperand &Op1 = Def->getOperand(1); if (!Op1.isCImm() || Op1.getCImm()->getBitWidth() > 64) - return nullptr; + return None; Immed = Op1.getCImm()->getZExtValue(); } else - return nullptr; + return None; unsigned ShiftAmt; @@ -1378,10 +1409,116 @@ AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { ShiftAmt = 12; Immed = Immed >> 12; } else - return nullptr; + return None; unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); - return [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed).addImm(ShVal); }; + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, + }}; +} + +/// Select a "register plus unscaled signed 9-bit immediate" address. This +/// should only match when there is an offset that is not valid for a scaled +/// immediate addressing mode. The "Size" argument is the size in bytes of the +/// memory reference, which is needed here to know what is valid for a scaled +/// immediate. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, + unsigned Size) const { + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + if (!Root.isReg()) + return None; + + if (!isBaseWithConstantOffset(Root, MRI)) + return None; + + MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (!RootDef) + return None; + + MachineOperand &OffImm = RootDef->getOperand(2); + if (!OffImm.isReg()) + return None; + MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); + if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) + return None; + int64_t RHSC; + MachineOperand &RHSOp1 = RHS->getOperand(1); + if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) + return None; + RHSC = RHSOp1.getCImm()->getSExtValue(); + + // If the offset is valid as a scaled immediate, don't match here. + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) + return None; + if (RHSC >= -256 && RHSC < 256) { + MachineOperand &Base = RootDef->getOperand(1); + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, + }}; + } + return None; +} + +/// Select a "register plus scaled unsigned 12-bit immediate" address. The +/// "Size" argument is the size in bytes of the memory reference, which +/// determines the scale. +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, + unsigned Size) const { + MachineRegisterInfo &MRI = + Root.getParent()->getParent()->getParent()->getRegInfo(); + + if (!Root.isReg()) + return None; + + MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (!RootDef) + return None; + + if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + }}; + } + + if (isBaseWithConstantOffset(Root, MRI)) { + MachineOperand &LHS = RootDef->getOperand(1); + MachineOperand &RHS = RootDef->getOperand(2); + MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); + MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); + if (LHSDef && RHSDef) { + int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); + unsigned Scale = Log2_32(Size); + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { + if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, + }}; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, + }}; + } + } + } + + // Before falling back to our general case, check if the unscaled + // instructions can handle this. If so, that's preferable. + if (selectAddrModeUnscaled(Root, Size).hasValue()) + return None; + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, + }}; } namespace llvm { diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 380668d7bd864..2d45be37ca777 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -31,6 +31,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + const LLT s128 = LLT::scalar(128); const LLT v2s32 = LLT::vector(2, 32); const LLT v4s32 = LLT::vector(4, 32); const LLT v2s64 = LLT::vector(2, 64); @@ -38,7 +39,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { for (auto Ty : {p0, s1, s8, s16, s32, s64}) setAction({G_IMPLICIT_DEF, Ty}, Legal); - for (auto Ty : {s16, s32, s64}) + for (auto Ty : {s16, s32, s64, p0}) setAction({G_PHI, Ty}, Legal); for (auto Ty : {s1, s8}) @@ -229,7 +230,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo() { setAction({G_INTTOPTR, 1, s64}, Legal); // Casts for 32 and 64-bit width type are just copies. - for (auto Ty : {s1, s8, s16, s32, s64}) { + // Same for 128-bit width type, except they are on the FPR bank. + for (auto Ty : {s1, s8, s16, s32, s64, s128}) { setAction({G_BITCAST, 0, Ty}, Legal); setAction({G_BITCAST, 1, Ty}, Legal); } diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index f82b9dbc2c9f7..f1281a1b91249 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -19,10 +19,12 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Mangler.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -33,7 +35,25 @@ AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer) MCSymbol * AArch64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { - return Printer.getSymbol(MO.getGlobal()); + const GlobalValue *GV = MO.getGlobal(); + unsigned TargetFlags = MO.getTargetFlags(); + const Triple &TheTriple = Printer.TM.getTargetTriple(); + if (!TheTriple.isOSBinFormatCOFF()) + return Printer.getSymbol(GV); + + assert(TheTriple.isOSWindows() && + "Windows is the only supported COFF target"); + + bool IsIndirect = (TargetFlags & AArch64II::MO_DLLIMPORT); + if (!IsIndirect) + return Printer.getSymbol(GV); + + SmallString<128> Name; + Name = "__imp_"; + Printer.TM.getNameWithPrefix(Name, GV, + Printer.getObjFileLowering().getMangler()); + + return Ctx.getOrCreateSymbol(Name); } MCSymbol * diff --git a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp index a02321d4a04f5..ec98980fa0b97 100644 --- a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -201,6 +201,9 @@ bool AArch64RedundantCopyElimination::knownRegValInBlock( // CMP is an alias for SUBS with a dead destination register. case AArch64::SUBSWri: case AArch64::SUBSXri: { + // Sometimes the first operand is a FrameIndex. Bail if tht happens. + if (!PredI.getOperand(1).isReg()) + return false; MCPhysReg DstReg = PredI.getOperand(0).getReg(); MCPhysReg SrcReg = PredI.getOperand(1).getReg(); diff --git a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp index 3c505fcec16d0..391e8ed633d7f 100644 --- a/lib/Target/AArch64/AArch64RegisterBankInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterBankInfo.cpp @@ -59,10 +59,9 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI) assert(&AArch64::FPRRegBank == &RBFPR && "The order in RegBanks is messed up"); - const RegisterBank &RBCCR = getRegBank(AArch64::CCRRegBankID); + const RegisterBank &RBCCR = getRegBank(AArch64::CCRegBankID); (void)RBCCR; - assert(&AArch64::CCRRegBank == &RBCCR && - "The order in RegBanks is messed up"); + assert(&AArch64::CCRegBank == &RBCCR && "The order in RegBanks is messed up"); // The GPR register bank is fully defined by all the registers in // GR64all + its subclasses. @@ -229,7 +228,7 @@ const RegisterBank &AArch64RegisterBankInfo::getRegBankFromRegClass( case AArch64::XSeqPairsClassRegClassID: return getRegBank(AArch64::GPRRegBankID); case AArch64::CCRRegClassID: - return getRegBank(AArch64::CCRRegBankID); + return getRegBank(AArch64::CCRegBankID); default: llvm_unreachable("Register class not supported"); } @@ -415,12 +414,10 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping( const RegisterBankInfo::InstructionMapping & AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const unsigned Opc = MI.getOpcode(); - const MachineFunction &MF = *MI.getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. - if (!isPreISelGenericOpcode(Opc) || + if ((Opc != TargetOpcode::COPY && !isPreISelGenericOpcode(Opc)) || Opc == TargetOpcode::G_PHI) { const RegisterBankInfo::InstructionMapping &Mapping = getInstrMappingImpl(MI); @@ -428,6 +425,11 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return Mapping; } + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + switch (Opc) { // G_{F|S|U}REM are not listed because they are not legal. // Arithmetic ops. @@ -451,12 +453,39 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_FMUL: case TargetOpcode::G_FDIV: return getSameKindOfOperandsMapping(MI); + case TargetOpcode::COPY: { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned SrcReg = MI.getOperand(1).getReg(); + // Check if one of the register is not a generic register. + if ((TargetRegisterInfo::isPhysicalRegister(DstReg) || + !MRI.getType(DstReg).isValid()) || + (TargetRegisterInfo::isPhysicalRegister(SrcReg) || + !MRI.getType(SrcReg).isValid())) { + const RegisterBank *DstRB = getRegBank(DstReg, MRI, TRI); + const RegisterBank *SrcRB = getRegBank(SrcReg, MRI, TRI); + if (!DstRB) + DstRB = SrcRB; + else if (!SrcRB) + SrcRB = DstRB; + // If both RB are null that means both registers are generic. + // We shouldn't be here. + assert(DstRB && SrcRB && "Both RegBank were nullptr"); + unsigned Size = getSizeInBits(DstReg, MRI, TRI); + return getInstructionMapping( + DefaultMappingID, copyCost(*DstRB, *SrcRB, Size), + getCopyMapping(DstRB->getID(), SrcRB->getID(), Size), + // We only care about the mapping of the destination. + /*NumOperands*/ 1); + } + // Both registers are generic, use G_BITCAST. + LLVM_FALLTHROUGH; + } case TargetOpcode::G_BITCAST: { LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); unsigned Size = DstTy.getSizeInBits(); - bool DstIsGPR = !DstTy.isVector(); - bool SrcIsGPR = !SrcTy.isVector(); + bool DstIsGPR = !DstTy.isVector() && DstTy.getSizeInBits() <= 64; + bool SrcIsGPR = !SrcTy.isVector() && SrcTy.getSizeInBits() <= 64; const RegisterBank &DstRB = DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank; const RegisterBank &SrcRB = @@ -464,7 +493,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { return getInstructionMapping( DefaultMappingID, copyCost(DstRB, SrcRB, Size), getCopyMapping(DstRB.getID(), SrcRB.getID(), Size), - /*NumOperands*/ 2); + // We only care about the mapping of the destination for COPY. + /*NumOperands*/ Opc == TargetOpcode::G_BITCAST ? 2 : 1); } default: break; diff --git a/lib/Target/AArch64/AArch64RegisterBanks.td b/lib/Target/AArch64/AArch64RegisterBanks.td index c2b6c0b04e9b4..eee584708f69a 100644 --- a/lib/Target/AArch64/AArch64RegisterBanks.td +++ b/lib/Target/AArch64/AArch64RegisterBanks.td @@ -17,4 +17,4 @@ def GPRRegBank : RegisterBank<"GPR", [GPR64all]>; def FPRRegBank : RegisterBank<"FPR", [QQQQ]>; /// Conditional register: NZCV. -def CCRRegBank : RegisterBank<"CCR", [CCR]>; +def CCRegBank : RegisterBank<"CC", [CCR]>; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index 7e29ee5e9bafe..ee5d3547aaaec 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -169,6 +169,15 @@ def GPR64sp0 : RegisterOperand { let ParserMatchClass = GPR64spPlus0Operand; } +// GPR32/GPR64 but with zero-register substitution enabled. +// TODO: Roll this out to GPR32/GPR64/GPR32all/GPR64all. +def GPR32z : RegisterOperand { + let GIZeroRegister = WZR; +} +def GPR64z : RegisterOperand { + let GIZeroRegister = XZR; +} + // GPR register classes which include WZR/XZR AND SP/WSP. This is not a // constraint used by any instructions, it is used as a common super-class. def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>; diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index e65b382e85b2f..1762475ac93fa 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -365,7 +365,7 @@ void AArch64PassConfig::addIRPasses() { // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass()); + addPass(createLateCFGSimplificationPass()); // Run LoopDataPrefetch // diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 3ba7a692039ba..1f06d4065b391 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -139,7 +139,7 @@ class AArch64AsmParser : public MCTargetAsmParser { AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI) { + : MCTargetAsmParser(Options, STI, MII) { IsILP32 = Options.getABIName() == "ilp32"; MCAsmParserExtension::Initialize(Parser); MCStreamer &S = getParser().getStreamer(); @@ -3297,7 +3297,8 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, } } -std::string AArch64MnemonicSpellCheck(StringRef S, uint64_t FBS); +static std::string AArch64MnemonicSpellCheck(StringRef S, uint64_t FBS, + unsigned VariantID = 0); bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode, OperandVector &Operands) { @@ -4255,6 +4256,7 @@ extern "C" void LLVMInitializeAArch64AsmParser() { #define GET_REGISTER_MATCHER #define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER #include "AArch64GenAsmMatcher.inc" // Define this matcher function after the auto-generated include so we diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 1e18a5c345c80..62e5d02f60329 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -689,7 +689,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, const MCSubtargetInfo &STI) { unsigned Opcode = MI->getOpcode(); - StringRef Layout, Mnemonic; + StringRef Layout; bool IsTbx; if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 0e42cf422bd5f..7b33b4b5b5427 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -432,7 +432,8 @@ class DarwinAArch64AsmBackend : public AArch64AsmBackend { const MCRegisterInfo &MRI) : AArch64AsmBackend(T, TT, /*IsLittleEndian*/ true), MRI(MRI) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64, MachO::CPU_SUBTYPE_ARM64_ALL); } @@ -582,7 +583,8 @@ class ELFAArch64AsmBackend : public AArch64AsmBackend { : AArch64AsmBackend(T, TT, IsLittleEndian), OSABI(OSABI), IsILP32(IsILP32) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32); } }; @@ -595,7 +597,8 @@ class COFFAArch64AsmBackend : public AArch64AsmBackend { COFFAArch64AsmBackend(const Target &T, const Triple &TheTriple) : AArch64AsmBackend(T, TheTriple, /*IsLittleEndian*/ true) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createAArch64WinCOFFObjectWriter(OS); } }; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 89c3e5b4c76ec..2d90e67960f8e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include @@ -428,11 +429,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, llvm_unreachable("Unimplemented fixup -> relocation"); } -MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI, - bool IsLittleEndian, - bool IsILP32) { - MCELFObjectTargetWriter *MOTW = - new AArch64ELFObjectWriter(OSABI, IsLittleEndian, IsILP32); - return createELFObjectWriter(MOTW, OS, IsLittleEndian); +std::unique_ptr +llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, + bool IsLittleEndian, bool IsILP32) { + auto MOTW = + llvm::make_unique(OSABI, IsLittleEndian, IsILP32); + return createELFObjectWriter(std::move(MOTW), OS, IsLittleEndian); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index a0de3c39562b2..1e8a07aea8ac9 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -86,10 +86,11 @@ class AArch64ELFStreamer : public MCELFStreamer { public: friend class AArch64TargetELFStreamer; - AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_pwrite_stream &OS, MCCodeEmitter *Emitter) - : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0), - LastEMS(EMS_None) {} + AArch64ELFStreamer(MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter) + : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)), + MappingSymbolCounter(0), LastEMS(EMS_None) {} void ChangeSection(MCSection *Section, const MCExpr *Subsection) override { // We have to keep track of the mapping symbol state of any sections we @@ -198,10 +199,13 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, return new AArch64TargetAsmStreamer(S, OS); } -MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, +MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, + std::unique_ptr TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll) { - AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); + std::unique_ptr Emitter, + bool RelaxAll) { + AArch64ELFStreamer *S = + new AArch64ELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)); if (RelaxAll) S->getAssembler().setRelaxAll(true); return S; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h index ef48203c8bc05..19b188aa1c61a 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -18,9 +18,11 @@ namespace llvm { -MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, +MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, + std::unique_ptr TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll); + std::unique_ptr Emitter, + bool RelaxAll); } #endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 8618069fb0d11..c3458d625b832 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -16,6 +16,8 @@ #include "AArch64MCAsmInfo.h" #include "AArch64WinCOFFStreamer.h" #include "InstPrinter/AArch64InstPrinter.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -100,25 +102,32 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T, } static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, - MCAsmBackend &TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll) { - return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll); + std::unique_ptr &&TAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, + bool RelaxAll) { + return createAArch64ELFStreamer(Ctx, std::move(TAB), OS, std::move(Emitter), + RelaxAll); } -static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB, +static MCStreamer *createMachOStreamer(MCContext &Ctx, + std::unique_ptr &&TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, + std::unique_ptr &&Emitter, + bool RelaxAll, bool DWARFMustBeAtTheEnd) { - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, - DWARFMustBeAtTheEnd, + return createMachOStreamer(Ctx, std::move(TAB), OS, std::move(Emitter), + RelaxAll, DWARFMustBeAtTheEnd, /*LabelSections*/ true); } -static MCStreamer *createWinCOFFStreamer(MCContext &Ctx, MCAsmBackend &TAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, - bool IncrementalLinkerCompatible) { - return createAArch64WinCOFFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, +static MCStreamer * +createWinCOFFStreamer(MCContext &Ctx, std::unique_ptr &&TAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, bool RelaxAll, + bool IncrementalLinkerCompatible) { + return createAArch64WinCOFFStreamer(Ctx, std::move(TAB), OS, + std::move(Emitter), RelaxAll, IncrementalLinkerCompatible); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index f7248faa5d0fc..b9e1673b9317d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -16,6 +16,8 @@ #include "llvm/Support/DataTypes.h" +#include + namespace llvm { class formatted_raw_ostream; class MCAsmBackend; @@ -51,16 +53,16 @@ MCAsmBackend *createAArch64beAsmBackend(const Target &T, const Triple &TT, StringRef CPU, const MCTargetOptions &Options); -MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI, - bool IsLittleEndian, - bool IsILP32); +std::unique_ptr +createAArch64ELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, + bool IsLittleEndian, bool IsILP32); -MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS, - uint32_t CPUType, - uint32_t CPUSubtype); +std::unique_ptr +createAArch64MachObjectWriter(raw_pwrite_stream &OS, uint32_t CPUType, + uint32_t CPUSubtype); -MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS); +std::unique_ptr +createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS); MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 19b2576f68951..55151c2b8d213 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -430,10 +430,10 @@ void AArch64MachObjectWriter::recordRelocation( Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS, - uint32_t CPUType, - uint32_t CPUSubtype) { +std::unique_ptr +llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS, uint32_t CPUType, + uint32_t CPUSubtype) { return createMachObjectWriter( - new AArch64MachObjectWriter(CPUType, CPUSubtype), OS, + llvm::make_unique(CPUType, CPUSubtype), OS, /*IsLittleEndian=*/true); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp index 31762b9e4cd50..d06c5e8862aec 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/Support/ErrorHandling.h" @@ -96,9 +97,10 @@ bool AArch64WinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { namespace llvm { -MCObjectWriter *createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS) { - MCWinCOFFObjectTargetWriter *MOTW = new AArch64WinCOFFObjectWriter(); - return createWinCOFFObjectWriter(MOTW, OS); +std::unique_ptr +createAArch64WinCOFFObjectWriter(raw_pwrite_stream &OS) { + auto MOTW = llvm::make_unique(); + return createWinCOFFObjectWriter(std::move(MOTW), OS); } } // end namespace llvm diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp index 6c8da27e398ff..9d0f39e5f6ad9 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "AArch64WinCOFFStreamer.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" using namespace llvm; @@ -17,19 +19,20 @@ class AArch64WinCOFFStreamer : public MCWinCOFFStreamer { public: friend class AArch64TargetWinCOFFStreamer; - AArch64WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, - raw_pwrite_stream &OS) - : MCWinCOFFStreamer(C, AB, CE, OS) {} + AArch64WinCOFFStreamer(MCContext &C, std::unique_ptr AB, + std::unique_ptr CE, + raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), OS) {} }; } // end anonymous namespace namespace llvm { -MCWinCOFFStreamer -*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, - bool IncrementalLinkerCompatible) { - auto *S = new AArch64WinCOFFStreamer(Context, MAB, *Emitter, OS); +MCWinCOFFStreamer *createAArch64WinCOFFStreamer( + MCContext &Context, std::unique_ptr MAB, + raw_pwrite_stream &OS, std::unique_ptr Emitter, + bool RelaxAll, bool IncrementalLinkerCompatible) { + auto *S = new AArch64WinCOFFStreamer(Context, std::move(MAB), + std::move(Emitter), OS); S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); return S; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h index 1b4fcd6804e2b..b67a19e883e96 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.h @@ -33,11 +33,10 @@ class AArch64TargetWinCOFFStreamer : public llvm::AArch64TargetStreamer { namespace llvm { -MCWinCOFFStreamer -*createAArch64WinCOFFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, - bool IncrementalLinkerCompatible); +MCWinCOFFStreamer *createAArch64WinCOFFStreamer( + MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, std::unique_ptr Emitter, + bool RelaxAll, bool IncrementalLinkerCompatible); } // end llvm namespace #endif diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 5d76681cd97b0..c1c799b7b349f 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -517,7 +517,12 @@ namespace AArch64II { /// thread-local symbol. On Darwin, only one type of thread-local access /// exists (pre linker-relaxation), but on ELF the TLSModel used for the /// referee will affect interpretation. - MO_TLS = 0x40 + MO_TLS = 0x40, + + /// MO_DLLIMPORT - On a symbol operand, this represents that the reference + /// to the symbol is for an import stub. This is used for DLL import + /// storage class indication on Windows. + MO_DLLIMPORT = 0x80, }; } // end namespace AArch64II diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index b101ff1985b7e..bc2321601a51b 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -23,6 +23,7 @@ class ModulePass; class Pass; class Target; class TargetMachine; +class TargetOptions; class PassRegistry; class Module; @@ -52,7 +53,7 @@ FunctionPass *createSIDebuggerInsertNopsPass(); FunctionPass *createSIInsertWaitsPass(); FunctionPass *createSIInsertWaitcntsPass(); FunctionPass *createSIFixWWMLivenessPass(); -FunctionPass *createAMDGPUSimplifyLibCallsPass(); +FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &); FunctionPass *createAMDGPUUseNativeCallsPass(); FunctionPass *createAMDGPUCodeGenPreparePass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); @@ -201,6 +202,10 @@ void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); Pass *createAMDGPUFunctionInliningPass(); void initializeAMDGPUInlinerPass(PassRegistry&); +ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); +void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); +extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; + Target &getTheAMDGPUTarget(); Target &getTheGCNTarget(); diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 9b077bde61426..deaf8398b92b6 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -109,6 +109,12 @@ def FeatureApertureRegs : SubtargetFeature<"aperture-regs", "Has Memory Aperture Base and Size Registers" >; +def FeatureMadMixInsts : SubtargetFeature<"mad-mix-insts", + "HasMadMixInsts", + "true", + "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions" +>; + // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support // XNACK. The current default kernel driver setting is: // - graphics ring: XNACK disabled @@ -181,13 +187,13 @@ def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding", def FeatureCIInsts : SubtargetFeature<"ci-insts", "CIInsts", "true", - "Additional intstructions for CI+" + "Additional instructions for CI+" >; def FeatureGFX9Insts : SubtargetFeature<"gfx9-insts", "GFX9Insts", "true", - "Additional intstructions for GFX9+" + "Additional instructions for GFX9+" >; def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime", @@ -408,6 +414,13 @@ def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature < "Hardware automatically inserts waitcnt before barrier" >; +def FeatureCodeObjectV3 : SubtargetFeature < + "code-object-v3", + "CodeObjectV3", + "true", + "Generate code object version 3" +>; + // Dummy feature used to disable assembler instructions. def FeatureDisable : SubtargetFeature<"", "FeatureDisable","true", @@ -546,19 +559,25 @@ def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0, def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0, [FeatureGFX9, - FeatureLDSBankCount32]>; + FeatureMadMixInsts, + FeatureLDSBankCount32 + ]>; def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1, [FeatureGFX9, + FeatureMadMixInsts, FeatureLDSBankCount32, FeatureXNACK]>; def FeatureISAVersion9_0_2 : SubtargetFeatureISAVersion <9,0,2, [FeatureGFX9, - FeatureLDSBankCount32]>; + FeatureMadMixInsts, + FeatureLDSBankCount32 + ]>; def FeatureISAVersion9_0_3 : SubtargetFeatureISAVersion <9,0,3, [FeatureGFX9, + FeatureMadMixInsts, FeatureLDSBankCount32, FeatureXNACK]>; @@ -721,21 +740,33 @@ def HasDPP : Predicate<"Subtarget->hasDPP()">, def HasIntClamp : Predicate<"Subtarget->hasIntClamp()">, AssemblerPredicate<"FeatureIntClamp">; -def HasMadMix : Predicate<"Subtarget->hasMadMixInsts()">, - AssemblerPredicate<"FeatureGFX9Insts">; +def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">, + AssemblerPredicate<"FeatureMadMixInsts">; + +def EnableLateCFGStructurize : Predicate< + "EnableLateStructurizeCFG">; + +// Exists to help track down where SubtargetPredicate isn't set rather +// than letting tablegen crash with an unhelpful error. +def InvalidPred : Predicate<"predicate not set on instruction or pattern">; class PredicateControl { - Predicate SubtargetPredicate; + Predicate SubtargetPredicate = InvalidPred; Predicate SIAssemblerPredicate = isSICI; Predicate VIAssemblerPredicate = isVI; list AssemblerPredicates = []; Predicate AssemblerPredicate = TruePredicate; list OtherPredicates = []; - list Predicates = !listconcat([SubtargetPredicate, AssemblerPredicate], + list Predicates = !listconcat([SubtargetPredicate, + AssemblerPredicate], AssemblerPredicates, OtherPredicates); } +class AMDGPUPat : Pat, + PredicateControl; + + // Include AMDGPU TD files include "R600Schedule.td" include "SISchedule.td" diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 9527c9f9884d6..392b011e387c5 100644 --- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -129,8 +129,11 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc, switch (F->getCallingConv()) { default: return AAResultBase::pointsToConstantMemory(Loc, OrLocal); - case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_LS: + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_VS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_KERNEL: diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0facae0992b0e..5a1d1a55795bc 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -36,11 +36,13 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; +using namespace llvm::AMDGPU; // TODO: This should get the default rounding mode from the kernel. We just set // the default here, but this could change if the OpenCL rounding mode pragmas @@ -105,28 +107,71 @@ const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const { return TM.getMCSubtargetInfo(); } -AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const { - return static_cast(*OutStreamer->getTargetStreamer()); +AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const { + if (!OutStreamer) + return nullptr; + return static_cast(OutStreamer->getTargetStreamer()); } void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) { - if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + if (TM.getTargetTriple().getArch() != Triple::amdgcn) + return; + + if (TM.getTargetTriple().getOS() != Triple::AMDHSA && + TM.getTargetTriple().getOS() != Triple::AMDPAL) + return; + + if (TM.getTargetTriple().getOS() == Triple::AMDHSA) + HSAMetadataStream.begin(M); + + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) + readPALMetadata(M); + + // Deprecated notes are not emitted for code object v3. + if (IsaInfo::hasCodeObjectV3(getSTI()->getFeatureBits())) return; - AMDGPU::IsaInfo::IsaVersion ISA = - AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits()); + // HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2. + if (TM.getTargetTriple().getOS() == Triple::AMDHSA) + getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1); - getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1); - getTargetStreamer().EmitDirectiveHSACodeObjectISA( + // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2. + IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits()); + getTargetStreamer()->EmitDirectiveHSACodeObjectISA( ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU"); - getTargetStreamer().EmitStartOfCodeObjectMetadata(M); } void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { - if (TM.getTargetTriple().getOS() != Triple::AMDHSA) + if (TM.getTargetTriple().getArch() != Triple::amdgcn) + return; + + // Following code requires TargetStreamer to be present. + if (!getTargetStreamer()) return; - getTargetStreamer().EmitEndOfCodeObjectMetadata(); + // Emit ISA Version (NT_AMD_AMDGPU_ISA). + std::string ISAVersionString; + raw_string_ostream ISAVersionStream(ISAVersionString); + IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream); + getTargetStreamer()->EmitISAVersion(ISAVersionStream.str()); + + // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA). + if (TM.getTargetTriple().getOS() == Triple::AMDHSA) { + HSAMetadataStream.end(); + getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata()); + } + + // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA). + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) { + // Copy the PAL metadata from the map where we collected it into a vector, + // then write it as a .note. + PALMD::Metadata PALMetadataVector; + for (auto i : PALMetadataMap) { + PALMetadataVector.push_back(i.first); + PALMetadataVector.push_back(i.second); + } + getTargetStreamer()->EmitPALMetadata(PALMetadataVector); + } } bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough( @@ -154,13 +199,15 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF); OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); - getTargetStreamer().EmitAMDKernelCodeT(KernelCode); + getTargetStreamer()->EmitAMDKernelCodeT(KernelCode); } if (TM.getTargetTriple().getOS() != Triple::AMDHSA) return; - getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction(), - KernelCode); + + HSAMetadataStream.emitKernel(*MF->getFunction(), + getHSACodeProps(*MF, CurrentProgramInfo), + getHSADebugProps(*MF, CurrentProgramInfo)); } void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { @@ -169,7 +216,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { if (MFI->isEntryFunction() && STM.isAmdCodeObjectV2(*MF)) { SmallString<128> SymbolName; getNameWithPrefix(SymbolName, MF->getFunction()), - getTargetStreamer().EmitAMDGPUSymbolType( + getTargetStreamer()->EmitAMDGPUSymbolType( SymbolName, ELF::STT_AMDGPU_HSA_KERNEL); } @@ -190,6 +237,27 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +// For the amdpal OS type, read the amdgpu.pal.metadata supplied by the +// frontend into our PALMetadataMap, ready for per-function modification. It +// is a NamedMD containing an MDTuple containing a number of MDNodes each of +// which is an integer value, and each two integer values forms a key=value +// pair that we store as PALMetadataMap[key]=value in the map. +void AMDGPUAsmPrinter::readPALMetadata(Module &M) { + auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata"); + if (!NamedMD || !NamedMD->getNumOperands()) + return; + auto Tuple = dyn_cast(NamedMD->getOperand(0)); + if (!Tuple) + return; + for (unsigned I = 0, E = Tuple->getNumOperands() & -2; I != E; I += 2) { + auto Key = mdconst::dyn_extract(Tuple->getOperand(I)); + auto Val = mdconst::dyn_extract(Tuple->getOperand(I + 1)); + if (!Key || !Val) + continue; + PALMetadataMap[Key->getZExtValue()] = Val->getZExtValue(); + } +} + // Print comments that apply to both callable functions and entry points. void AMDGPUAsmPrinter::emitCommonFunctionComments( uint32_t NumVGPR, @@ -232,6 +300,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Info = analyzeResourceUsage(MF); } + if (STM.isAmdPalOS()) + EmitPALMetadata(MF, CurrentProgramInfo); if (!STM.isAmdHsaOS()) { EmitProgramInfoSI(MF, CurrentProgramInfo); } @@ -865,10 +935,12 @@ static unsigned getRsrcReg(CallingConv::ID CallConv) { switch (CallConv) { default: LLVM_FALLTHROUGH; case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1; + case CallingConv::AMDGPU_LS: return R_00B528_SPI_SHADER_PGM_RSRC1_LS; case CallingConv::AMDGPU_HS: return R_00B428_SPI_SHADER_PGM_RSRC1_HS; + case CallingConv::AMDGPU_ES: return R_00B328_SPI_SHADER_PGM_RSRC1_ES; case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS; - case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS; case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS; + case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS; } } @@ -895,19 +967,24 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer->EmitIntValue(RsrcReg, 4); OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) | S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4); + unsigned Rsrc2Val = 0; if (STM.isVGPRSpillingEnabled(*MF.getFunction())) { OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4); OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4); + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) + Rsrc2Val = S_00B84C_SCRATCH_EN(CurrentProgramInfo.ScratchBlocks > 0); + } + if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { + OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); + OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4); + OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4); + OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4); + Rsrc2Val |= S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks); + } + if (Rsrc2Val) { + OutStreamer->EmitIntValue(RsrcReg + 4 /*rsrc2*/, 4); + OutStreamer->EmitIntValue(Rsrc2Val, 4); } - } - - if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { - OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); - OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4); - OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); - OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4); - OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4); - OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4); } OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4); @@ -916,6 +993,75 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer->EmitIntValue(MFI->getNumSpilledVGPRs(), 4); } +// This is the equivalent of EmitProgramInfoSI above, but for when the OS type +// is AMDPAL. It stores each compute/SPI register setting and other PAL +// metadata items into the PALMetadataMap, combining with any provided by the +// frontend as LLVM metadata. Once all functions are written, PALMetadataMap is +// then written as a single block in the .note section. +void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF, + const SIProgramInfo &CurrentProgramInfo) { + const SIMachineFunctionInfo *MFI = MF.getInfo(); + // Given the calling convention, calculate the register number for rsrc1. In + // principle the register number could change in future hardware, but we know + // it is the same for gfx6-9 (except that LS and ES don't exist on gfx9), so + // we can use the same fixed value that .AMDGPU.config has for Mesa. Note + // that we use a register number rather than a byte offset, so we need to + // divide by 4. + unsigned Rsrc1Reg = getRsrcReg(MF.getFunction()->getCallingConv()) / 4; + unsigned Rsrc2Reg = Rsrc1Reg + 1; + // Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used + // with a constant offset to access any non-register shader-specific PAL + // metadata key. + unsigned ScratchSizeKey = PALMD::Key::CS_SCRATCH_SIZE; + switch (MF.getFunction()->getCallingConv()) { + case CallingConv::AMDGPU_PS: + ScratchSizeKey = PALMD::Key::PS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_VS: + ScratchSizeKey = PALMD::Key::VS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_GS: + ScratchSizeKey = PALMD::Key::GS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_ES: + ScratchSizeKey = PALMD::Key::ES_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_HS: + ScratchSizeKey = PALMD::Key::HS_SCRATCH_SIZE; + break; + case CallingConv::AMDGPU_LS: + ScratchSizeKey = PALMD::Key::LS_SCRATCH_SIZE; + break; + } + unsigned NumUsedVgprsKey = ScratchSizeKey + + PALMD::Key::VS_NUM_USED_VGPRS - PALMD::Key::VS_SCRATCH_SIZE; + unsigned NumUsedSgprsKey = ScratchSizeKey + + PALMD::Key::VS_NUM_USED_SGPRS - PALMD::Key::VS_SCRATCH_SIZE; + PALMetadataMap[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU; + PALMetadataMap[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU; + if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) { + PALMetadataMap[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1; + PALMetadataMap[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2; + // ScratchSize is in bytes, 16 aligned. + PALMetadataMap[ScratchSizeKey] |= + alignTo(CurrentProgramInfo.ScratchSize, 16); + } else { + PALMetadataMap[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) | + S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks); + if (CurrentProgramInfo.ScratchBlocks > 0) + PALMetadataMap[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1); + // ScratchSize is in bytes, 16 aligned. + PALMetadataMap[ScratchSizeKey] |= + alignTo(CurrentProgramInfo.ScratchSize, 16); + } + if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { + PALMetadataMap[Rsrc2Reg] |= + S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks); + PALMetadataMap[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable(); + PALMetadataMap[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr(); + } +} + // This is supposed to be log2(Size) static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) { switch (Size) { @@ -1017,6 +1163,53 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, } } +AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps( + const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { + const SISubtarget &STM = MF.getSubtarget(); + const SIMachineFunctionInfo &MFI = *MF.getInfo(); + HSAMD::Kernel::CodeProps::Metadata HSACodeProps; + + HSACodeProps.mKernargSegmentSize = + STM.getKernArgSegmentSize(MF, MFI.getABIArgOffset()); + HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize; + HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize; + HSACodeProps.mKernargSegmentAlign = + std::max(uint32_t(4), MFI.getMaxKernArgAlign()); + HSACodeProps.mWavefrontSize = STM.getWavefrontSize(); + HSACodeProps.mNumSGPRs = CurrentProgramInfo.NumSGPR; + HSACodeProps.mNumVGPRs = CurrentProgramInfo.NumVGPR; + HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize(); + HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack; + HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); + + return HSACodeProps; +} + +AMDGPU::HSAMD::Kernel::DebugProps::Metadata AMDGPUAsmPrinter::getHSADebugProps( + const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const { + const SISubtarget &STM = MF.getSubtarget(); + HSAMD::Kernel::DebugProps::Metadata HSADebugProps; + + if (!STM.debuggerSupported()) + return HSADebugProps; + + HSADebugProps.mDebuggerABIVersion.push_back(1); + HSADebugProps.mDebuggerABIVersion.push_back(0); + HSADebugProps.mReservedNumVGPRs = ProgramInfo.ReservedVGPRCount; + HSADebugProps.mReservedFirstVGPR = ProgramInfo.ReservedVGPRFirst; + + if (STM.debuggerEmitPrologue()) { + HSADebugProps.mPrivateSegmentBufferSGPR = + ProgramInfo.DebuggerPrivateSegmentBufferSGPR; + HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR = + ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR; + } + + return HSADebugProps; +} + bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 0a58ce06704dd..45b8181846757 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -17,6 +17,7 @@ #include "AMDGPU.h" #include "AMDKernelCodeT.h" +#include "MCTargetDesc/AMDGPUHSAMetadataStreamer.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include @@ -113,9 +114,13 @@ class AMDGPUAsmPrinter final : public AsmPrinter { SIProgramInfo CurrentProgramInfo; DenseMap CallGraphResourceInfo; + AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream; + std::map PALMetadataMap; + uint64_t getFunctionCodeSize(const MachineFunction &MF) const; SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const; + void readPALMetadata(Module &M); void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF); void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo, const MachineFunction &MF) const; @@ -123,10 +128,20 @@ class AMDGPUAsmPrinter final : public AsmPrinter { unsigned &NumSGPR, unsigned &NumVGPR) const; + AMDGPU::HSAMD::Kernel::CodeProps::Metadata getHSACodeProps( + const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const; + AMDGPU::HSAMD::Kernel::DebugProps::Metadata getHSADebugProps( + const MachineFunction &MF, + const SIProgramInfo &ProgramInfo) const; + /// \brief Emit register usage information so that the GPU driver /// can correctly setup the GPU state. void EmitProgramInfoR600(const MachineFunction &MF); - void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo); + void EmitProgramInfoSI(const MachineFunction &MF, + const SIProgramInfo &KernelInfo); + void EmitPALMetadata(const MachineFunction &MF, + const SIProgramInfo &KernelInfo); void emitCommonFunctionComments(uint32_t NumVGPR, uint32_t NumSGPR, uint32_t ScratchSize, @@ -140,7 +155,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter { const MCSubtargetInfo* getSTI() const; - AMDGPUTargetStreamer& getTargetStreamer() const; + AMDGPUTargetStreamer* getTargetStreamer() const; bool doFinalization(Module &M) override; bool runOnMachineFunction(MachineFunction &MF) override; diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 7faf3e123f811..1e4992555dc00 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -18,6 +18,7 @@ #include "AMDGPUInstrInfo.h" #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" +#include "AMDGPUTargetMachine.h" #include "SIDefines.h" #include "SIISelLowering.h" #include "SIInstrInfo.h" @@ -69,12 +70,14 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { // make the right decision when generating code for different targets. const AMDGPUSubtarget *Subtarget; AMDGPUAS AMDGPUASI; + bool EnableLateStructurizeCFG; public: explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, CodeGenOpt::Level OptLevel = CodeGenOpt::Default) : SelectionDAGISel(*TM, OptLevel) { AMDGPUASI = AMDGPU::getAMDGPUAS(*TM); + EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; } ~AMDGPUDAGToDAGISel() override = default; @@ -786,7 +789,7 @@ void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { SDLoc SL(N); - // src0_modifiers, src0, src1_modifiers, src1, clamp, omod + // src0_modifiers, src0, src1_modifiers, src1, clamp, omod SDValue Ops[8]; SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); @@ -1235,24 +1238,30 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant, SDValue &SOffset, SDValue &ImmOffset) const { SDLoc DL(Constant); + const uint32_t Align = 4; + const uint32_t MaxImm = alignDown(4095, Align); uint32_t Imm = cast(Constant)->getZExtValue(); uint32_t Overflow = 0; - if (Imm >= 4096) { - if (Imm <= 4095 + 64) { - // Use an SOffset inline constant for 1..64 - Overflow = Imm - 4095; - Imm = 4095; + if (Imm > MaxImm) { + if (Imm <= MaxImm + 64) { + // Use an SOffset inline constant for 4..64 + Overflow = Imm - MaxImm; + Imm = MaxImm; } else { // Try to keep the same value in SOffset for adjacent loads, so that // the corresponding register contents can be re-used. // - // Load values with all low-bits set into SOffset, so that a larger - // range of values can be covered using s_movk_i32 - uint32_t High = (Imm + 1) & ~4095; - uint32_t Low = (Imm + 1) & 4095; + // Load values with all low-bits (except for alignment bits) set into + // SOffset, so that a larger range of values can be covered using + // s_movk_i32. + // + // Atomic operations fail to work correctly when individual address + // components are unaligned, even if their sum is aligned. + uint32_t High = (Imm + Align) & ~4095; + uint32_t Low = (Imm + Align) & 4095; Imm = Low; - Overflow = High - 1; + Overflow = High - Align; } } @@ -1636,16 +1645,13 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { return; } - if (isCBranchSCC(N)) { - // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it. - SelectCode(N); - return; - } - + bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); + unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; + unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC; SDLoc SL(N); - SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond); - CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other, + SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); + CurDAG->SelectNodeTo(N, BrOp, MVT::Other, N->getOperand(2), // Basic Block VCC.getValue(0)); } @@ -1706,7 +1712,7 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { MachineSDNode *CmpSwap = nullptr; if (Subtarget->hasAddr64()) { - SDValue SRsrc, VAddr, SOffset, Offset, GLC, SLC; + SDValue SRsrc, VAddr, SOffset, Offset, SLC; if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) { unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : @@ -1976,15 +1982,31 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, assert(Src.getValueType() == MVT::f16); Src = stripBitcast(Src); + // Be careful about folding modifiers if we already have an abs. fneg is + // applied last, so we don't want to apply an earlier fneg. + if ((Mods & SISrcMods::ABS) == 0) { + unsigned ModsTmp; + SelectVOP3ModsImpl(Src, Src, ModsTmp); + + if ((ModsTmp & SISrcMods::NEG) != 0) + Mods ^= SISrcMods::NEG; + + if ((ModsTmp & SISrcMods::ABS) != 0) + Mods |= SISrcMods::ABS; + } + // op_sel/op_sel_hi decide the source type and source. // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. // If the sources's op_sel is set, it picks the high half of the source // register. Mods |= SISrcMods::OP_SEL_1; - if (isExtractHiElt(Src, Src)) + if (isExtractHiElt(Src, Src)) { Mods |= SISrcMods::OP_SEL_0; + // TODO: Should we try to look for neg/abs here? + } + return true; } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 5ace79de89ae7..fe2c9337721bb 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -417,8 +417,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom); if (Subtarget->hasFFBL()) - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Legal); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom); + setOperationAction(ISD::CTTZ, MVT::i64, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom); setOperationAction(ISD::CTLZ, MVT::i64, Custom); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom); @@ -476,6 +478,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); + setOperationAction(ISD::SETCC, VT, Expand); } static const MVT::SimpleValueType FloatVectorTypes[] = { @@ -508,6 +511,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::FCOPYSIGN, VT, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); + setOperationAction(ISD::SETCC, VT, Expand); } // This causes using an unrolled select operation rather than expansion with @@ -823,6 +827,17 @@ bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return isZExtFree(Val.getValueType(), VT2); } +// v_mad_mix* support a conversion from f16 to f32. +// +// There is only one special case when denormals are enabled we don't currently, +// where this is OK to use. +bool AMDGPUTargetLowering::isFPExtFoldable(unsigned Opcode, + EVT DestVT, EVT SrcVT) const { + return Opcode == ISD::FMAD && Subtarget->hasMadMixInsts() && + DestVT.getScalarType() == MVT::f32 && !Subtarget->hasFP32Denormals() && + SrcVT.getScalarType() == MVT::f16; +} + bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { // There aren't really 64-bit registers, but pairs of 32-bit ones and only a // limited number of native 64-bit operations. Shrinking an operation to fit @@ -848,6 +863,8 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: + case CallingConv::AMDGPU_LS: return CC_AMDGPU; case CallingConv::C: case CallingConv::Fast: @@ -869,6 +886,8 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: + case CallingConv::AMDGPU_LS: return RetCC_SI_Shader; case CallingConv::C: case CallingConv::Fast: @@ -1107,9 +1126,11 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - return LowerCTLZ(Op, DAG); + return LowerCTLZ_CTTZ(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); } return Op; @@ -1499,49 +1520,181 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl &Results) const { - assert(Op.getValueType() == MVT::i64); - SDLoc DL(Op); EVT VT = Op.getValueType(); + + assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64"); + EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); - SDValue one = DAG.getConstant(1, DL, HalfVT); - SDValue zero = DAG.getConstant(0, DL, HalfVT); + SDValue One = DAG.getConstant(1, DL, HalfVT); + SDValue Zero = DAG.getConstant(0, DL, HalfVT); //HiLo split SDValue LHS = Op.getOperand(0); - SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero); - SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one); + SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero); + SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, One); SDValue RHS = Op.getOperand(1); - SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero); - SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one); + SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero); + SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, One); - if (VT == MVT::i64 && - DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) && - DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) { + if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) && + DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) { SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), LHS_Lo, RHS_Lo); - SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), zero}); - SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), zero}); + SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), Zero}); + SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), Zero}); Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV)); Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM)); return; } + if (isTypeLegal(MVT::i64)) { + // Compute denominator reciprocal. + unsigned FMAD = Subtarget->hasFP32Denormals() ? + (unsigned)AMDGPUISD::FMAD_FTZ : + (unsigned)ISD::FMAD; + + SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo); + SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi); + SDValue Mad1 = DAG.getNode(FMAD, DL, MVT::f32, Cvt_Hi, + DAG.getConstantFP(APInt(32, 0x4f800000).bitsToFloat(), DL, MVT::f32), + Cvt_Lo); + SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1); + SDValue Mul1 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Rcp, + DAG.getConstantFP(APInt(32, 0x5f7ffffc).bitsToFloat(), DL, MVT::f32)); + SDValue Mul2 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Mul1, + DAG.getConstantFP(APInt(32, 0x2f800000).bitsToFloat(), DL, MVT::f32)); + SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2); + SDValue Mad2 = DAG.getNode(FMAD, DL, MVT::f32, Trunc, + DAG.getConstantFP(APInt(32, 0xcf800000).bitsToFloat(), DL, MVT::f32), + Mul1); + SDValue Rcp_Lo = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Mad2); + SDValue Rcp_Hi = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Trunc); + SDValue Rcp64 = DAG.getBitcast(VT, + DAG.getBuildVector(MVT::v2i32, DL, {Rcp_Lo, Rcp_Hi})); + + SDValue Zero64 = DAG.getConstant(0, DL, VT); + SDValue One64 = DAG.getConstant(1, DL, VT); + SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1); + SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1); + + SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS); + SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64); + SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1); + SDValue Mulhi1_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, + Zero); + SDValue Mulhi1_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, + One); + + SDValue Add1_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Lo, + Mulhi1_Lo, Zero1); + SDValue Add1_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Hi, + Mulhi1_Hi, Add1_Lo.getValue(1)); + SDValue Add1_HiNc = DAG.getNode(ISD::ADD, DL, HalfVT, Rcp_Hi, Mulhi1_Hi); + SDValue Add1 = DAG.getBitcast(VT, + DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi})); + + SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1); + SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2); + SDValue Mulhi2_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, + Zero); + SDValue Mulhi2_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, + One); + + SDValue Add2_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Lo, + Mulhi2_Lo, Zero1); + SDValue Add2_HiC = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_HiNc, + Mulhi2_Hi, Add1_Lo.getValue(1)); + SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add2_HiC, + Zero, Add2_Lo.getValue(1)); + SDValue Add2 = DAG.getBitcast(VT, + DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi})); + SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2); + + SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3); + + SDValue Mul3_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, Zero); + SDValue Mul3_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, One); + SDValue Sub1_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Lo, + Mul3_Lo, Zero1); + SDValue Sub1_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Hi, + Mul3_Hi, Sub1_Lo.getValue(1)); + SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi); + SDValue Sub1 = DAG.getBitcast(VT, + DAG.getBuildVector(MVT::v2i32, DL, {Sub1_Lo, Sub1_Hi})); + + SDValue MinusOne = DAG.getConstant(0xffffffffu, DL, HalfVT); + SDValue C1 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, MinusOne, Zero, + ISD::SETUGE); + SDValue C2 = DAG.getSelectCC(DL, Sub1_Lo, RHS_Lo, MinusOne, Zero, + ISD::SETUGE); + SDValue C3 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, C2, C1, ISD::SETEQ); + + // TODO: Here and below portions of the code can be enclosed into if/endif. + // Currently control flow is unconditional and we have 4 selects after + // potential endif to substitute PHIs. + + // if C3 != 0 ... + SDValue Sub2_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Lo, + RHS_Lo, Zero1); + SDValue Sub2_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Mi, + RHS_Hi, Sub1_Lo.getValue(1)); + SDValue Sub2_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi, + Zero, Sub2_Lo.getValue(1)); + SDValue Sub2 = DAG.getBitcast(VT, + DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi})); + + SDValue Add3 = DAG.getNode(ISD::ADD, DL, VT, Mulhi3, One64); + + SDValue C4 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, MinusOne, Zero, + ISD::SETUGE); + SDValue C5 = DAG.getSelectCC(DL, Sub2_Lo, RHS_Lo, MinusOne, Zero, + ISD::SETUGE); + SDValue C6 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, C5, C4, ISD::SETEQ); + + // if (C6 != 0) + SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64); + + SDValue Sub3_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Lo, + RHS_Lo, Zero1); + SDValue Sub3_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi, + RHS_Hi, Sub2_Lo.getValue(1)); + SDValue Sub3_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub3_Mi, + Zero, Sub3_Lo.getValue(1)); + SDValue Sub3 = DAG.getBitcast(VT, + DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi})); + + // endif C6 + // endif C3 + + SDValue Sel1 = DAG.getSelectCC(DL, C6, Zero, Add4, Add3, ISD::SETNE); + SDValue Div = DAG.getSelectCC(DL, C3, Zero, Sel1, Mulhi3, ISD::SETNE); + + SDValue Sel2 = DAG.getSelectCC(DL, C6, Zero, Sub3, Sub2, ISD::SETNE); + SDValue Rem = DAG.getSelectCC(DL, C3, Zero, Sel2, Sub1, ISD::SETNE); + + Results.push_back(Div); + Results.push_back(Rem); + + return; + } + + // r600 expandion. // Get Speculative values SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); - SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ); - SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, zero}); + SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, Zero, REM_Part, LHS_Hi, ISD::SETEQ); + SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, Zero}); REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM); - SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ); - SDValue DIV_Lo = zero; + SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, Zero, DIV_Part, Zero, ISD::SETEQ); + SDValue DIV_Lo = Zero; const unsigned halfBitWidth = HalfVT.getSizeInBits(); @@ -1550,7 +1703,7 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, SDValue POS = DAG.getConstant(bitPos, DL, HalfVT); // Get value of high bit SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); - HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one); + HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, One); HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit); // Shift @@ -1559,7 +1712,7 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit); SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT); - SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETUGE); + SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, Zero, ISD::SETUGE); DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT); @@ -2016,13 +2169,33 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); } -SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { +static bool isCtlzOpc(unsigned Opc) { + return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF; +} + +static bool isCttzOpc(unsigned Opc) { + return Opc == ISD::CTTZ || Opc == ISD::CTTZ_ZERO_UNDEF; +} + +SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const { SDLoc SL(Op); SDValue Src = Op.getOperand(0); - bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF; + bool ZeroUndef = Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF || + Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF; + + unsigned ISDOpc, NewOpc; + if (isCtlzOpc(Op.getOpcode())) { + ISDOpc = ISD::CTLZ_ZERO_UNDEF; + NewOpc = AMDGPUISD::FFBH_U32; + } else if (isCttzOpc(Op.getOpcode())) { + ISDOpc = ISD::CTTZ_ZERO_UNDEF; + NewOpc = AMDGPUISD::FFBL_B32; + } else + llvm_unreachable("Unexpected OPCode!!!"); + if (ZeroUndef && Src.getValueType() == MVT::i32) - return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Src); + return DAG.getNode(NewOpc, SL, MVT::i32, Src); SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src); @@ -2035,24 +2208,32 @@ SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); - SDValue Hi0 = DAG.getSetCC(SL, SetCCVT, Hi, Zero, ISD::SETEQ); + SDValue HiOrLo = isCtlzOpc(Op.getOpcode()) ? Hi : Lo; + SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, Zero, ISD::SETEQ); - SDValue CtlzLo = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Lo); - SDValue CtlzHi = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Hi); + SDValue OprLo = DAG.getNode(ISDOpc, SL, MVT::i32, Lo); + SDValue OprHi = DAG.getNode(ISDOpc, SL, MVT::i32, Hi); const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32); - SDValue Add = DAG.getNode(ISD::ADD, SL, MVT::i32, CtlzLo, Bits32); - - // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x)) - SDValue NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0, Add, CtlzHi); + SDValue Add, NewOpr; + if (isCtlzOpc(Op.getOpcode())) { + Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprLo, Bits32); + // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x)) + NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprHi); + } else { + Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprHi, Bits32); + // cttz(x) = lo_32(x) == 0 ? cttz(hi_32(x)) + 32 : cttz(lo_32(x)) + NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprLo); + } if (!ZeroUndef) { // Test if the full 64-bit input is zero. // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32, // which we probably don't want. - SDValue Lo0 = DAG.getSetCC(SL, SetCCVT, Lo, Zero, ISD::SETEQ); - SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0, Hi0); + SDValue LoOrHi = isCtlzOpc(Op.getOpcode()) ? Lo : Hi; + SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, Zero, ISD::SETEQ); + SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0OrHi0, Hi0orLo0); // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction // with the same cycles, otherwise it is slower. @@ -2063,11 +2244,11 @@ SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { // The instruction returns -1 for 0 input, but the defined intrinsic // behavior is to return the number of bits. - NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32, - SrcIsZero, Bits32, NewCtlz); + NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, + SrcIsZero, Bits32, NewOpr); } - return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewCtlz); + return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr); } SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, @@ -2979,13 +3160,10 @@ static bool isNegativeOne(SDValue Val) { return false; } -static bool isCtlzOpc(unsigned Opc) { - return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF; -} - -SDValue AMDGPUTargetLowering::getFFBH_U32(SelectionDAG &DAG, +SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG, SDValue Op, - const SDLoc &DL) const { + const SDLoc &DL, + unsigned Opc) const { EVT VT = Op.getValueType(); EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT); if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() && @@ -2995,11 +3173,11 @@ SDValue AMDGPUTargetLowering::getFFBH_U32(SelectionDAG &DAG, if (VT != MVT::i32) Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op); - SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, DL, MVT::i32, Op); + SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op); if (VT != MVT::i32) - FFBH = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBH); + FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX); - return FFBH; + return FFBX; } // The native instructions return -1 on 0 input. Optimize out a select that @@ -3009,7 +3187,7 @@ SDValue AMDGPUTargetLowering::getFFBH_U32(SelectionDAG &DAG, // against the bitwidth. // // TODO: Should probably combine against FFBH_U32 instead of ctlz directly. -SDValue AMDGPUTargetLowering::performCtlzCombine(const SDLoc &SL, SDValue Cond, +SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const { ConstantSDNode *CmpRhs = dyn_cast(Cond.getOperand(1)); @@ -3020,20 +3198,25 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(const SDLoc &SL, SDValue Cond, ISD::CondCode CCOpcode = cast(Cond.getOperand(2))->get(); SDValue CmpLHS = Cond.getOperand(0); + unsigned Opc = isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : + AMDGPUISD::FFBH_U32; + // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x + // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x if (CCOpcode == ISD::SETEQ && - isCtlzOpc(RHS.getOpcode()) && + (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) { - return getFFBH_U32(DAG, CmpLHS, SL); + return getFFBX_U32(DAG, CmpLHS, SL, Opc); } // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x + // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x if (CCOpcode == ISD::SETNE && - isCtlzOpc(LHS.getOpcode()) && + (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) { - return getFFBH_U32(DAG, CmpLHS, SL); + return getFFBX_U32(DAG, CmpLHS, SL, Opc); } return SDValue(); @@ -3166,7 +3349,7 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, } // There's no reason to not do this if the condition has other uses. - return performCtlzCombine(SDLoc(N), Cond, True, False, DCI); + return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI); } static bool isConstantFPZero(SDValue N) { @@ -3754,6 +3937,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BFM) NODE_NAME_CASE(FFBH_U32) NODE_NAME_CASE(FFBH_I32) + NODE_NAME_CASE(FFBL_B32) NODE_NAME_CASE(MUL_U24) NODE_NAME_CASE(MUL_I24) NODE_NAME_CASE(MULHI_U24) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index a2af7c3d79043..cdb15186f86e7 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -32,7 +32,7 @@ class AMDGPUTargetLowering : public TargetLowering { /// legalized from a smaller type VT. Need to match pre-legalized type because /// the generic legalization inserts the add/sub between the select and /// compare. - SDValue getFFBH_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL) const; + SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const; public: static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op); @@ -57,7 +57,7 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const; SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const; @@ -88,7 +88,7 @@ class AMDGPUTargetLowering : public TargetLowering { SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulLoHi24Combine(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue performCtlzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, + SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, SDValue RHS, DAGCombinerInfo &DCI) const; SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -143,6 +143,7 @@ class AMDGPUTargetLowering : public TargetLowering { bool isZExtFree(Type *Src, Type *Dest) const override; bool isZExtFree(EVT Src, EVT Dest) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; + bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const override; bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; @@ -371,6 +372,7 @@ enum NodeType : unsigned { BFM, // Insert a range of bits into a 32-bit word. FFBH_U32, // ctlz with -1 if input is zero. FFBH_I32, + FFBL_B32, // cttz with -1 if input is zero. MUL_U24, MUL_I24, MULHI_U24, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index d8b03c6aab483..c024010f3e96e 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -298,6 +298,8 @@ def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; +def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>; + // Signed and unsigned 24-bit multiply. The highest 8-bits are ignore // when performing the mulitply. The result is a 32-bit value. def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index 6d388e48b76f5..6498aafc6acf9 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -167,7 +167,6 @@ def COND_OLE : PatLeaf < [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] >; - def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; @@ -399,26 +398,14 @@ def mskor_global : PatFrag<(ops node:$val, node:$ptr), return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }]>; -multiclass AtomicCmpSwapLocal { - - def _32_local : PatFrag < - (ops node:$ptr, node:$cmp, node:$swap), - (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ - AtomicSDNode *AN = cast(N); - return AN->getMemoryVT() == MVT::i32 && - AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; - }]>; - - def _64_local : PatFrag< +class AtomicCmpSwapLocal : PatFrag< (ops node:$ptr, node:$cmp, node:$swap), (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ AtomicSDNode *AN = cast(N); - return AN->getMemoryVT() == MVT::i64 && - AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; - }]>; -} + return AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; +}]>; -defm atomic_cmp_swap : AtomicCmpSwapLocal ; +def atomic_cmp_swap_local : AtomicCmpSwapLocal ; multiclass global_binary_atomic_op { def "" : PatFrag< @@ -505,7 +492,7 @@ def FP_HALF : PatLeaf < /* -------------------------------------- */ class POW_Common - : Pat < + : AMDGPUPat < (fpow f32:$src0, f32:$src1), (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) >; @@ -516,30 +503,34 @@ class POW_Common /* Extract element pattern */ class Extract_Element - : Pat< + : AMDGPUPat< (sub_type (extractelt vec_type:$src, sub_idx)), (EXTRACT_SUBREG $src, sub_reg) ->; +> { + let SubtargetPredicate = TruePredicate; +} /* Insert element pattern */ class Insert_Element - : Pat < + : AMDGPUPat < (insertelt vec_type:$vec, elem_type:$elem, sub_idx), (INSERT_SUBREG $vec, $elem, sub_reg) ->; +> { + let SubtargetPredicate = TruePredicate; +} // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer // can handle COPY instructions. // bitconvert pattern -class BitConvert : Pat < +class BitConvert : AMDGPUPat < (dt (bitconvert (st rc:$src0))), (dt rc:$src0) >; // XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer // can handle COPY instructions. -class DwordAddrPat : Pat < +class DwordAddrPat : AMDGPUPat < (vt (AMDGPUdwordaddr (vt rc:$addr))), (vt rc:$addr) >; @@ -551,30 +542,30 @@ multiclass BFIPatterns { // Definition from ISA doc: // (y & x) | (z & ~x) - def : Pat < + def : AMDGPUPat < (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), (BFI_INT $x, $y, $z) >; // SHA-256 Ch function // z ^ (x & (y ^ z)) - def : Pat < + def : AMDGPUPat < (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), (BFI_INT $x, $y, $z) >; - def : Pat < + def : AMDGPUPat < (fcopysign f32:$src0, f32:$src1), (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1) >; - def : Pat < + def : AMDGPUPat < (f32 (fcopysign f32:$src0, f64:$src1)), (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, (i32 (EXTRACT_SUBREG $src1, sub1))) >; - def : Pat < + def : AMDGPUPat < (f64 (fcopysign f64:$src0, f64:$src1)), (REG_SEQUENCE RC64, (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, @@ -583,7 +574,7 @@ multiclass BFIPatterns ; - def : Pat < + def : AMDGPUPat < (f64 (fcopysign f64:$src0, f32:$src1)), (REG_SEQUENCE RC64, (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, @@ -596,7 +587,7 @@ multiclass BFIPatterns BFI_INT (XOR x, y), z, y -class SHA256MaPattern : Pat < +class SHA256MaPattern : AMDGPUPat < (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))), (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y) >; @@ -613,24 +604,24 @@ def IMMPopCount : SDNodeXForm; multiclass BFEPattern { - def : Pat < + def : AMDGPUPat < (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), (UBFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) >; - def : Pat < + def : AMDGPUPat < (srl (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), (UBFE $src, (i32 0), $width) >; - def : Pat < + def : AMDGPUPat < (sra (shl_oneuse i32:$src, (sub 32, i32:$width)), (sub 32, i32:$width)), (SBFE $src, (i32 0), $width) >; } // rotr pattern -class ROTRPattern : Pat < +class ROTRPattern : AMDGPUPat < (rotr i32:$src0, i32:$src1), (BIT_ALIGN $src0, $src0, $src1) >; @@ -641,7 +632,7 @@ class IntMed3Pat : Pat< + ValueType vt = i32> : AMDGPUPat< (max (min_oneuse vt:$src0, vt:$src1), (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), (med3Inst $src0, $src1, $src2) @@ -661,24 +652,24 @@ def cvt_flr_i32_f32 : PatFrag < [{ (void)N; return TM.Options.NoNaNsFPMath; }] >; -class IMad24Pat : Pat < +class IMad24Pat : AMDGPUPat < (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), (Inst $src0, $src1, $src2)) >; -class UMad24Pat : Pat < +class UMad24Pat : AMDGPUPat < (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), (Inst $src0, $src1, $src2)) >; -class RcpPat : Pat < +class RcpPat : AMDGPUPat < (fdiv FP_ONE, vt:$src), (RcpInst $src) >; -class RsqPat : Pat < +class RsqPat : AMDGPUPat < (AMDGPUrcp (fsqrt vt:$src)), (RsqInst $src) >; diff --git a/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/lib/Target/AMDGPU/AMDGPULibCalls.cpp index d3d5c6dc9d932..e7e54750fe667 100644 --- a/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" #include #include @@ -168,10 +169,13 @@ namespace { AMDGPULibCalls Simplifier; + const TargetOptions Options; + public: static char ID; // Pass identification - AMDGPUSimplifyLibCalls() : FunctionPass(ID) { + AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions()) + : FunctionPass(ID), Options(Opt) { initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry()); } @@ -1680,14 +1684,34 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) { } // Public interface to the Simplify LibCalls pass. -FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass() { - return new AMDGPUSimplifyLibCalls(); +FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) { + return new AMDGPUSimplifyLibCalls(Opt); } FunctionPass *llvm::createAMDGPUUseNativeCallsPass() { return new AMDGPUUseNativeCalls(); } +static bool setFastFlags(Function &F, const TargetOptions &Options) { + AttrBuilder B; + + if (Options.UnsafeFPMath || Options.NoInfsFPMath) + B.addAttribute("no-infs-fp-math", "true"); + if (Options.UnsafeFPMath || Options.NoNaNsFPMath) + B.addAttribute("no-nans-fp-math", "true"); + if (Options.UnsafeFPMath) { + B.addAttribute("less-precise-fpmad", "true"); + B.addAttribute("unsafe-fp-math", "true"); + } + + if (!B.hasAttributes()) + return false; + + F.addAttributes(AttributeList::FunctionIndex, B); + + return true; +} + bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { if (skipFunction(F)) return false; @@ -1699,6 +1723,9 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); + if (!EnablePreLink) + Changed |= setFastFlags(F, Options); + for (auto &BB : F) { for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) { // Ignore non-calls. diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 1d8fc70fe3d3d..c15b37f9e9cd8 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -230,7 +230,7 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); Expr->print(Str, MAI); - OutStreamer->emitRawComment(" mask branch " + BBStr); + OutStreamer->emitRawComment(Twine(" mask branch ") + BBStr); } return; diff --git a/lib/Target/AMDGPU/AMDGPUOCL12Adapter.cpp b/lib/Target/AMDGPU/AMDGPUOCL12Adapter.cpp index c5e416d0a8fc0..35c73a11b554a 100644 --- a/lib/Target/AMDGPU/AMDGPUOCL12Adapter.cpp +++ b/lib/Target/AMDGPU/AMDGPUOCL12Adapter.cpp @@ -78,7 +78,8 @@ static bool isNonDefaultAddrSpacePtr(Type *Ty, AMDGPUAS AMDGPUASI) { static bool hasNonDefaultAddrSpaceArg(const Function *F, AMDGPUAS AMDGPUASI) { for (const Argument &AI: F->args()) - if (isNonDefaultAddrSpacePtr(AI.getType(), AMDGPUASI)) + if (!AI.hasStructRetAttr() && + isNonDefaultAddrSpacePtr(AI.getType(), AMDGPUASI)) return true; return false; } @@ -224,6 +225,7 @@ static bool findAndDefineBuiltinCalls(Module &M) { if (!F.empty() || F.use_empty() || !F.getName().startswith("_Z") || !hasNonDefaultAddrSpaceArg(&F, AMDGPUASI)) continue; + // These functions should not be modified. if (F.getName().find("async_work_group", 0) == StringRef::npos && F.getName().find("prefetch", 0) == StringRef::npos) { isModified = true; diff --git a/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp new file mode 100644 index 0000000000000..68a204fca23e3 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp @@ -0,0 +1,98 @@ +//===- AMDGPUOpenCLEnqueuedBlockLowering.cpp - Lower enqueued block -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// \brief This post-linking pass replaces the function pointer of enqueued +// block kernel with a global variable (runtime handle) and adds +// "runtime-handle" attribute to the enqueued block kernel. +// +// In LLVM CodeGen the runtime-handle metadata will be translated to +// RuntimeHandle metadata in code object. Runtime allocates a global buffer +// for each kernel with RuntimeHandel metadata and saves the kernel address +// required for the AQL packet into the buffer. __enqueue_kernel function +// in device library knows that the invoke function pointer in the block +// literal is actually runtime handle and loads the kernel address from it +// and put it into AQL packet for dispatching. +// +// This cannot be done in FE since FE cannot create a unique global variable +// with external linkage across LLVM modules. The global variable with internal +// linkage does not work since optimization passes will try to replace loads +// of the global variable with its initialization value. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "amdgpu-lower-enqueued-block" + +using namespace llvm; + +namespace { + +/// \brief Lower enqueued blocks. +class AMDGPUOpenCLEnqueuedBlockLowering : public ModulePass { +public: + static char ID; + + explicit AMDGPUOpenCLEnqueuedBlockLowering() : ModulePass(ID) {} + +private: + bool runOnModule(Module &M) override; +}; + +} // end anonymous namespace + +char AMDGPUOpenCLEnqueuedBlockLowering::ID = 0; + +char &llvm::AMDGPUOpenCLEnqueuedBlockLoweringID = + AMDGPUOpenCLEnqueuedBlockLowering::ID; + +INITIALIZE_PASS(AMDGPUOpenCLEnqueuedBlockLowering, DEBUG_TYPE, + "Lower OpenCL enqueued blocks", false, false) + +ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() { + return new AMDGPUOpenCLEnqueuedBlockLowering(); +} + +bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) { + auto &C = M.getContext(); + auto AS = AMDGPU::getAMDGPUAS(M); + bool Changed = false; + for (auto &F : M.functions()) { + if (F.hasFnAttribute("enqueued-block")) { + if (!F.hasOneUse() || !F.user_begin()->hasOneUse() || + !isa(*F.user_begin()) || + !isa(*F.user_begin()->user_begin())) { + continue; + } + auto *BitCast = cast(*F.user_begin()); + auto *AddrCast = cast(*BitCast->user_begin()); + auto RuntimeHandle = (F.getName() + "_runtime_handle").str(); + auto *GV = new GlobalVariable( + M, Type::getInt8Ty(C)->getPointerTo(AS.GLOBAL_ADDRESS), + /*IsConstant=*/true, GlobalValue::ExternalLinkage, + /*Initializer=*/nullptr, RuntimeHandle, /*InsertBefore=*/nullptr, + GlobalValue::NotThreadLocal, AS.GLOBAL_ADDRESS, + /*IsExternallyInitialized=*/true); + DEBUG(dbgs() << "runtime handle created: " << *GV << '\n'); + auto *NewPtr = ConstantExpr::getPointerCast(GV, AddrCast->getType()); + AddrCast->replaceAllUsesWith(NewPtr); + F.addFnAttr("runtime-handle", RuntimeHandle); + F.setLinkage(GlobalValue::ExternalLinkage); + Changed = true; + } + } + return Changed; +} diff --git a/lib/Target/AMDGPU/AMDGPUPTNote.h b/lib/Target/AMDGPU/AMDGPUPTNote.h index 71b9ab699b96f..b50a2eb8e9e71 100644 --- a/lib/Target/AMDGPU/AMDGPUPTNote.h +++ b/lib/Target/AMDGPU/AMDGPUPTNote.h @@ -25,18 +25,22 @@ const char SectionName[] = ".note"; const char NoteName[] = "AMD"; -// TODO: Move this enum to include/llvm/Support so it can be used in tools? +// TODO: Remove this file once we drop code object v2. enum NoteType{ + NT_AMDGPU_HSA_RESERVED_0 = 0, NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1, NT_AMDGPU_HSA_HSAIL = 2, NT_AMDGPU_HSA_ISA = 3, NT_AMDGPU_HSA_PRODUCER = 4, NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5, NT_AMDGPU_HSA_EXTENSION = 6, - NT_AMDGPU_HSA_CODE_OBJECT_METADATA = 10, + NT_AMDGPU_HSA_RESERVED_7 = 7, + NT_AMDGPU_HSA_RESERVED_8 = 8, + NT_AMDGPU_HSA_RESERVED_9 = 9, NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101, NT_AMDGPU_HSA_HLDEBUG_TARGET = 102 }; + } } diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 59f9baf9af04f..c3789742b84f5 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -96,7 +96,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, TargetTriple(TT), Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600), IsaVersion(ISAVersion0_0_0), - WavefrontSize(64), + WavefrontSize(0), LocalMemorySize(0), LDSBankCount(0), MaxPrivateElementSize(0), @@ -110,6 +110,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, DX10Clamp(false), FlatForGlobal(false), AutoWaitcntBeforeBarrier(false), + CodeObjectV3(false), UnalignedScratchAccess(false), UnalignedBufferAccess(false), @@ -137,6 +138,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, Has16BitInsts(false), HasIntClamp(false), HasVOP3PInsts(false), + HasMadMixInsts(false), HasMovrel(false), HasVGPRIndexMode(false), HasScalarStores(false), @@ -189,14 +191,31 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes, return NumWaves; } +std::pair +AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const { + switch (CC) { + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_KERNEL: + case CallingConv::SPIR_KERNEL: + return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4); + case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_LS: + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_PS: + return std::make_pair(1, getWavefrontSize()); + default: + return std::make_pair(1, 16 * getWavefrontSize()); + } +} + std::pair AMDGPUSubtarget::getFlatWorkGroupSizes( const Function &F) const { + // FIXME: 1024 if function. // Default minimum/maximum flat work group sizes. std::pair Default = - AMDGPU::isCompute(F.getCallingConv()) ? - std::pair(getWavefrontSize() * 2, - getWavefrontSize() * 4) : - std::pair(1, getWavefrontSize()); + getDefaultFlatWorkGroupSize(F.getCallingConv()); // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa // starts using "amdgpu-flat-work-group-size" attribute. diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 7e7a09648ed11..56a5fa634b55c 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -119,6 +119,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { bool DX10Clamp; bool FlatForGlobal; bool AutoWaitcntBeforeBarrier; + bool CodeObjectV3; bool UnalignedScratchAccess; bool UnalignedBufferAccess; bool HasApertureRegs; @@ -147,6 +148,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { bool Has16BitInsts; bool HasIntClamp; bool HasVOP3PInsts; + bool HasMadMixInsts; bool HasMovrel; bool HasVGPRIndexMode; bool HasScalarStores; @@ -214,6 +216,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { TargetTriple.getEnvironmentName() == "amdgizcl"; } + bool isAmdPalOS() const { + return TargetTriple.getOS() == Triple::AMDPAL; + } + Generation getGeneration() const { return Gen; } @@ -314,7 +320,7 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { } bool hasMadMixInsts() const { - return getGeneration() >= GFX9; + return HasMadMixInsts; } bool hasCARRY() const { @@ -395,6 +401,10 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { return AutoWaitcntBeforeBarrier; } + bool hasCodeObjectV3() const { + return CodeObjectV3; + } + bool hasUnalignedBufferAccess() const { return UnalignedBufferAccess; } @@ -578,6 +588,9 @@ class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { FlatWorkGroupSize); } + /// \returns Default range flat work group size for a calling convention. + std::pair getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; + /// \returns Subtarget's default pair of minimum/maximum flat work group sizes /// for function \p F, or minimum/maximum flat work group sizes explicitly /// requested using "amdgpu-flat-work-group-size" attribute attached to diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index dd4e663202434..97faece8c8dfd 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -117,10 +117,10 @@ static cl::opt EnableSIInsertWaitcntsPass( cl::init(true)); // Option to run late CFG structurizer -static cl::opt LateCFGStructurize( +static cl::opt LateCFGStructurize( "amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), - cl::init(false), + cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden); static cl::opt EnableAMDGPUFunctionCalls( @@ -161,6 +161,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() { initializeAMDGPUAnnotateUniformValuesPass(*PR); initializeAMDGPUArgumentUsageInfoPass(*PR); initializeAMDGPULowerIntrinsicsPass(*PR); + initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR); initializeAMDGPUPromoteAllocaPass(*PR); initializeAMDGPUCodeGenPreparePass(*PR); initializeAMDGPURewriteOutArgumentsPass(*PR); @@ -302,6 +303,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; +bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false; + StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const { Attribute GPUAttr = F.getFnAttribute("target-cpu"); return GPUAttr.hasAttribute(Attribute::None) ? @@ -378,17 +381,18 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { PM.add(createAMDGPUAlwaysInlinePass(false)); }); + const auto &Opt = Options; Builder.addExtension( PassManagerBuilder::EP_EarlyAsPossible, - [AMDGPUAA, LibCallSimplify](const PassManagerBuilder &, - legacy::PassManagerBase &PM) { + [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &, + legacy::PassManagerBase &PM) { if (AMDGPUAA) { PM.add(createAMDGPUAAWrapperPass()); PM.add(createAMDGPUExternalAAWrapperPass()); } PM.add(llvm::createAMDGPUUseNativeCallsPass()); if (LibCallSimplify) - PM.add(llvm::createAMDGPUSimplifyLibCallsPass()); + PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt)); }); Builder.addExtension( @@ -619,6 +623,9 @@ void AMDGPUPassConfig::addIRPasses() { // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. addPass(createAMDGPUOpenCLImageTypeLoweringPass()); + // Replace OpenCL enqueued block function pointers with global variables. + addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass()); + if (TM.getOptLevel() > CodeGenOpt::None) { addPass(createInferAddressSpacesPass()); addPass(createAMDGPUPromoteAlloca()); diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 9a675b15d54ec..5627b4cb412e1 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -41,6 +41,8 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { StringRef getFeatureString(const Function &F) const; public: + static bool EnableLateStructurizeCFG; + AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional RM, Optional CM, diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index d607fc54e8e42..153a4a8ddb7e3 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -491,7 +491,9 @@ static bool isArgPassedInSGPR(const Argument *A) { case CallingConv::SPIR_KERNEL: return true; case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_LS: case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: diff --git a/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index f0ebfa3ce776e..6107f3a7dd18e 100644 --- a/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -21,18 +21,26 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" + using namespace llvm; #define DEBUG_TYPE "amdgpu-unify-divergent-exit-nodes" @@ -42,6 +50,7 @@ namespace { class AMDGPUUnifyDivergentExitNodes : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid + AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) { initializeAMDGPUUnifyDivergentExitNodesPass(*PassRegistry::getPassRegistry()); } @@ -51,9 +60,12 @@ class AMDGPUUnifyDivergentExitNodes : public FunctionPass { bool runOnFunction(Function &F) override; }; -} +} // end anonymous namespace char AMDGPUUnifyDivergentExitNodes::ID = 0; + +char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID; + INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) @@ -61,8 +73,6 @@ INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis) INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) -char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID; - void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ // TODO: Preserve dominator tree. AU.addRequired(); @@ -113,7 +123,6 @@ static BasicBlock *unifyReturnBlockSet(Function &F, // Otherwise, we need to insert a new basic block into the function, add a PHI // nodes (if the function returns values), and convert all of the return // instructions into unconditional branches. - // BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), Name, &F); PHINode *PN = nullptr; @@ -129,7 +138,6 @@ static BasicBlock *unifyReturnBlockSet(Function &F, // Loop over all of the blocks, replacing the return instruction with an // unconditional branch. - // for (BasicBlock *BB : ReturningBlocks) { // Add an incoming element to the PHI node for every return instruction that // is merging into this new block... @@ -142,7 +150,7 @@ static BasicBlock *unifyReturnBlockSet(Function &F, for (BasicBlock *BB : ReturningBlocks) { // Cleanup possible branch to unconditional branch to the return. - SimplifyCFG(BB, TTI, nullptr, {2}); + simplifyCFG(BB, TTI, {2}); } return NewRetBlock; @@ -157,7 +165,6 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { // Loop over all of the blocks in a function, tracking all of the blocks that // return. - // SmallVector ReturningBlocks; SmallVector UnreachableBlocks; diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 6b5e4da50f53b..806aa420c50fa 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -41,7 +41,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/AMDGPUCodeObjectMetadata.h" +#include "llvm/Support/AMDGPUMetadata.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -807,7 +807,6 @@ class KernelScopeInfo { }; class AMDGPUAsmParser : public MCTargetAsmParser { - const MCInstrInfo &MII; MCAsmParser &Parser; unsigned ForcedEncodingSize = 0; @@ -828,11 +827,15 @@ class AMDGPUAsmParser : public MCTargetAsmParser { bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); bool ParseDirectiveHSACodeObjectVersion(); bool ParseDirectiveHSACodeObjectISA(); - bool ParseDirectiveCodeObjectMetadata(); bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); bool ParseDirectiveAMDKernelCodeT(); bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; bool ParseDirectiveAMDGPUHsaKernel(); + + bool ParseDirectiveISAVersion(); + bool ParseDirectiveHSAMetadata(); + bool ParseDirectivePALMetadata(); + bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum); @@ -854,7 +857,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser { AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser) { + : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { MCAsmParserExtension::Initialize(Parser); if (getFeatureBits().none()) { @@ -2398,49 +2401,6 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { return false; } -bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() { - std::string YamlString; - raw_string_ostream YamlStream(YamlString); - - getLexer().setSkipSpace(false); - - bool FoundEnd = false; - while (!getLexer().is(AsmToken::Eof)) { - while (getLexer().is(AsmToken::Space)) { - YamlStream << getLexer().getTok().getString(); - Lex(); - } - - if (getLexer().is(AsmToken::Identifier)) { - StringRef ID = getLexer().getTok().getIdentifier(); - if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) { - Lex(); - FoundEnd = true; - break; - } - } - - YamlStream << Parser.parseStringToEndOfStatement() - << getContext().getAsmInfo()->getSeparatorString(); - - Parser.eatToEndOfStatement(); - } - - getLexer().setSkipSpace(true); - - if (getLexer().is(AsmToken::Eof) && !FoundEnd) { - return TokError( - "expected directive .end_amdgpu_code_object_metadata not found"); - } - - YamlStream.flush(); - - if (!getTargetStreamer().EmitCodeObjectMetadata(YamlString)) - return Error(getParser().getTok().getLoc(), "invalid code object metadata"); - - return false; -} - bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header) { SmallString<40> ErrStr; @@ -2493,6 +2453,103 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { return false; } +bool AMDGPUAsmParser::ParseDirectiveISAVersion() { + if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { + return Error(getParser().getTok().getLoc(), + ".amd_amdgpu_isa directive is not available on non-amdgcn " + "architectures"); + } + + auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); + + std::string ISAVersionStringFromSTI; + raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); + IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); + + if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { + return Error(getParser().getTok().getLoc(), + ".amd_amdgpu_isa directive does not match triple and/or mcpu " + "arguments specified through the command line"); + } + + getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); + Lex(); + + return false; +} + +bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { + if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { + return Error(getParser().getTok().getLoc(), + (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is " + "not available on non-amdhsa OSes")).str()); + } + + std::string HSAMetadataString; + raw_string_ostream YamlStream(HSAMetadataString); + + getLexer().setSkipSpace(false); + + bool FoundEnd = false; + while (!getLexer().is(AsmToken::Eof)) { + while (getLexer().is(AsmToken::Space)) { + YamlStream << getLexer().getTok().getString(); + Lex(); + } + + if (getLexer().is(AsmToken::Identifier)) { + StringRef ID = getLexer().getTok().getIdentifier(); + if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) { + Lex(); + FoundEnd = true; + break; + } + } + + YamlStream << Parser.parseStringToEndOfStatement() + << getContext().getAsmInfo()->getSeparatorString(); + + Parser.eatToEndOfStatement(); + } + + getLexer().setSkipSpace(true); + + if (getLexer().is(AsmToken::Eof) && !FoundEnd) { + return TokError(Twine("expected directive ") + + Twine(HSAMD::AssemblerDirectiveEnd) + Twine(" not found")); + } + + YamlStream.flush(); + + if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString)) + return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); + + return false; +} + +bool AMDGPUAsmParser::ParseDirectivePALMetadata() { + if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { + return Error(getParser().getTok().getLoc(), + (Twine(PALMD::AssemblerDirective) + Twine(" directive is " + "not available on non-amdpal OSes")).str()); + } + + PALMD::Metadata PALMetadata; + for (;;) { + uint32_t Value; + if (ParseAsAbsoluteExpression(Value)) { + return TokError(Twine("invalid value in ") + + Twine(PALMD::AssemblerDirective)); + } + PALMetadata.push_back(Value); + if (getLexer().isNot(AsmToken::Comma)) + break; + Lex(); + } + getTargetStreamer().EmitPALMetadata(PALMetadata); + return false; +} + bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -2502,15 +2559,21 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".hsa_code_object_isa") return ParseDirectiveHSACodeObjectISA(); - if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin) - return ParseDirectiveCodeObjectMetadata(); - if (IDVal == ".amd_kernel_code_t") return ParseDirectiveAMDKernelCodeT(); if (IDVal == ".amdgpu_hsa_kernel") return ParseDirectiveAMDGPUHsaKernel(); + if (IDVal == ".amd_amdgpu_isa") + return ParseDirectiveISAVersion(); + + if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) + return ParseDirectiveHSAMetadata(); + + if (IDVal == PALMD::AssemblerDirective) + return ParseDirectivePALMetadata(); + return true; } diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index 076ce0f0cc444..6eb39aee893ee 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -647,8 +647,6 @@ multiclass MUBUF_Pseudo_Atomics ; @@ -862,8 +860,6 @@ defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_128>; defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>; -} // End let SubtargetPredicate = isGCN - let SubtargetPredicate = isCIVI in { //===----------------------------------------------------------------------===// @@ -882,10 +878,8 @@ def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol", // MUBUF Patterns //===----------------------------------------------------------------------===// -let Predicates = [isGCN] in { - // Offset in an 32-bit VGPR -def : Pat < +def : GCNPat < (SIload_constant v4i32:$sbase, i32:$voff), (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, (i32 0), 0, 0, 0, 0) >; @@ -897,7 +891,7 @@ def : Pat < multiclass MUBUF_LoadIntrinsicPat { - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc)), @@ -905,7 +899,7 @@ multiclass MUBUF_LoadIntrinsicPat; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc)), @@ -913,7 +907,7 @@ multiclass MUBUF_LoadIntrinsicPat; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc)), @@ -921,7 +915,7 @@ multiclass MUBUF_LoadIntrinsicPat; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc)), @@ -941,7 +935,7 @@ defm : MUBUF_LoadIntrinsicPat; multiclass MUBUF_StoreIntrinsicPat { - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc), @@ -949,7 +943,7 @@ multiclass MUBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$glc, imm:$slc), @@ -958,7 +952,7 @@ multiclass MUBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc), @@ -967,7 +961,7 @@ multiclass MUBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$glc, imm:$slc), @@ -991,7 +985,7 @@ defm : MUBUF_StoreIntrinsicPat { - def : Pat< + def : GCNPat< (name i32:$vdata_in, v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$slc), @@ -999,7 +993,7 @@ multiclass BufferAtomicPatterns { (as_i16imm $offset), (as_i1imm $slc)) >; - def : Pat< + def : GCNPat< (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), imm:$slc), @@ -1007,7 +1001,7 @@ multiclass BufferAtomicPatterns { (as_i16imm $offset), (as_i1imm $slc)) >; - def : Pat< + def : GCNPat< (name i32:$vdata_in, v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$slc), @@ -1015,7 +1009,7 @@ multiclass BufferAtomicPatterns { (as_i16imm $offset), (as_i1imm $slc)) >; - def : Pat< + def : GCNPat< (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), imm:$slc), @@ -1037,7 +1031,7 @@ defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; defm : BufferAtomicPatterns; -def : Pat< +def : GCNPat< (int_amdgcn_buffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, 0, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), @@ -1049,7 +1043,7 @@ def : Pat< sub0) >; -def : Pat< +def : GCNPat< (int_amdgcn_buffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicOffset i32:$soffset, i16:$offset), @@ -1061,7 +1055,7 @@ def : Pat< sub0) >; -def : Pat< +def : GCNPat< (int_amdgcn_buffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, 0, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), @@ -1073,7 +1067,7 @@ def : Pat< sub0) >; -def : Pat< +def : GCNPat< (int_amdgcn_buffer_atomic_cmpswap i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex, (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset), @@ -1088,7 +1082,7 @@ def : Pat< class MUBUFLoad_PatternADDR64 : Pat < + PatFrag constant_ld> : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe) @@ -1096,19 +1090,19 @@ class MUBUFLoad_PatternADDR64 { - def : Pat < + def : GCNPat < (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc))), (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) >; - def : Pat < + def : GCNPat < (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))), (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } -let Predicates = [isSICI] in { +let SubtargetPredicate = isSICI in { def : MUBUFLoad_PatternADDR64 ; def : MUBUFLoad_PatternADDR64 ; def : MUBUFLoad_PatternADDR64 ; @@ -1116,19 +1110,19 @@ def : MUBUFLoad_PatternADDR64 ; defm : MUBUFLoad_Atomic_Pattern ; -} // End Predicates = [isSICI] +} // End SubtargetPredicate = isSICI multiclass MUBUFLoad_Pattern { - def : Pat < + def : GCNPat < (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe))), (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe) >; } -let Predicates = [Has16BitInsts] in { +let OtherPredicates = [Has16BitInsts] in { defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; @@ -1137,18 +1131,18 @@ defm : MUBUFLoad_Pattern ; defm : MUBUFLoad_Pattern ; -} // End Predicates = [Has16BitInsts] +} // End OtherPredicates = [Has16BitInsts] multiclass MUBUFScratchLoadPat { - def : Pat < + def : GCNPat < (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset))), (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; - def : Pat < + def : GCNPat < (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))), (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0) >; @@ -1158,25 +1152,25 @@ multiclass MUBUFScratchLoadPat { - def : Pat < + def : GCNPat < (build_vector vt:$lo, (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)))), (v2i16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $lo)) >; - def : Pat < + def : GCNPat < (build_vector f16:$lo, (f16 (bitconvert (vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)))))), (v2f16 (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $lo)) >; - def : Pat < + def : GCNPat < (build_vector vt:$lo, (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)))), (v2i16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $lo)) >; - def : Pat < + def : GCNPat < (build_vector f16:$lo, (f16 (bitconvert (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)))))), (v2f16 (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $lo)) >; @@ -1193,7 +1187,7 @@ defm : MUBUFScratchLoadPat ; defm : MUBUFScratchLoadPat ; -let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { defm : MUBUFScratchLoadPat_Hi16; defm : MUBUFScratchLoadPat_Hi16; defm : MUBUFScratchLoadPat_Hi16; @@ -1206,7 +1200,7 @@ multiclass MUBUF_Load_Dword { - def : Pat < + def : GCNPat < (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset, imm:$offset, 0, 0, imm:$glc, imm:$slc, imm:$tfe)), @@ -1214,7 +1208,7 @@ multiclass MUBUF_Load_Dword ; - def : Pat < + def : GCNPat < (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 1, 0, imm:$glc, imm:$slc, imm:$tfe)), @@ -1222,7 +1216,7 @@ multiclass MUBUF_Load_Dword ; - def : Pat < + def : GCNPat < (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 0, 1, imm:$glc, imm:$slc, imm:$tfe)), @@ -1230,7 +1224,7 @@ multiclass MUBUF_Load_Dword ; - def : Pat < + def : GCNPat < (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset, imm:$offset, 1, 1, imm:$glc, imm:$slc, imm:$tfe)), @@ -1249,27 +1243,27 @@ defm : MUBUF_Load_Dword { // Store follows atomic op convention so address is forst - def : Pat < + def : GCNPat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc), vt:$val), (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0) >; - def : Pat < + def : GCNPat < (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) >; } -let Predicates = [isSICI] in { +let SubtargetPredicate = isSICI in { defm : MUBUFStore_Atomic_Pattern ; defm : MUBUFStore_Atomic_Pattern ; -} // End Predicates = [isSICI] +} // End Predicates = isSICI multiclass MUBUFStore_Pattern { - def : Pat < + def : GCNPat < (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)), (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe) @@ -1282,13 +1276,13 @@ defm : MUBUFStore_Pattern ; multiclass MUBUFScratchStorePat { - def : Pat < + def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) >; - def : Pat < + def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0) @@ -1304,7 +1298,7 @@ defm : MUBUFScratchStorePat ; -let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { // Hiding the extract high pattern in the PatFrag seems to not // automatically increase the complexity. let AddedComplexity = 1 in { @@ -1323,28 +1317,28 @@ defm : MUBUFScratchStorePat { - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), (!cast(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) >; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), (!cast(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) >; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), (!cast(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i8imm $dfmt), (as_i8imm $nfmt), (as_i1imm $glc), (as_i1imm $slc), 0) >; - def : Pat< + def : GCNPat< (vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc)), (!cast(opcode # _BOTHEN) @@ -1363,7 +1357,7 @@ defm : MTBUF_LoadIntrinsicPat multiclass MTBUF_StoreIntrinsicPat { - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), (!cast(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, @@ -1372,7 +1366,7 @@ multiclass MTBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), (!cast(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset, @@ -1381,7 +1375,7 @@ multiclass MTBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), (!cast(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset, @@ -1390,7 +1384,7 @@ multiclass MTBUF_StoreIntrinsicPat; - def : Pat< + def : GCNPat< (name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset, imm:$dfmt, imm:$nfmt, imm:$glc, imm:$slc), (!cast(opcode # _BOTHEN_exact) @@ -1410,8 +1404,6 @@ defm : MTBUF_StoreIntrinsicPat; defm : MTBUF_StoreIntrinsicPat; -} // End let Predicates = [isGCN] - //===----------------------------------------------------------------------===// // Target instructions, move to the appropriate target TD file //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 1f6d5d5d187ef..4030aef85e80b 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -40,6 +40,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUMachineModuleInfo.cpp AMDGPUMacroFusion.cpp AMDGPUMCInstLower.cpp + AMDGPUOpenCLEnqueuedBlockLowering.cpp AMDGPUOpenCLImageTypeLoweringPass.cpp AMDGPUPromoteAlloca.cpp AMDGPURegAsmNames.inc.cpp diff --git a/lib/Target/AMDGPU/CaymanInstructions.td b/lib/Target/AMDGPU/CaymanInstructions.td index dd21946c7c386..0ba5acad680ff 100644 --- a/lib/Target/AMDGPU/CaymanInstructions.td +++ b/lib/Target/AMDGPU/CaymanInstructions.td @@ -18,7 +18,7 @@ def isCayman : Predicate<"Subtarget->hasCaymanISA()">; // Cayman Instructions //===----------------------------------------------------------------------===// -let Predicates = [isCayman] in { +let SubtargetPredicate = isCayman in { def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24", [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))], VecALU @@ -57,20 +57,21 @@ defm DIV_cm : DIV_Common; // RECIP_UINT emulation for Cayman // The multiplication scales from [0,1] to the unsigned integer range -def : Pat < +def : R600Pat < (AMDGPUurecip i32:$src0), (FLT_TO_UINT_eg (MUL_IEEE (RECIP_IEEE_cm (UINT_TO_FLT_eg $src0)), (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) >; - def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { +def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> { let ADDR = 0; let POP_COUNT = 0; let COUNT = 0; } -def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; + +def : R600Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>; class RAT_STORE_DWORD mask> : CF_MEM_RAT_CACHELESS <0x14, 0, mask, @@ -179,44 +180,43 @@ def VTX_READ_128_cm //===----------------------------------------------------------------------===// // VTX Read from parameter memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_cm MEMxi:$src_gpr, 3)>; -def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_cm MEMxi:$src_gpr, 3)>; -def : Pat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_cm MEMxi:$src_gpr, 3)>; -def : Pat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_cm MEMxi:$src_gpr, 3)>; -def : Pat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_cm MEMxi:$src_gpr, 3)>; //===----------------------------------------------------------------------===// // VTX Read from constant memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_cm MEMxi:$src_gpr, 2)>; -def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_cm MEMxi:$src_gpr, 2)>; -def : Pat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_cm MEMxi:$src_gpr, 2)>; -def : Pat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_cm MEMxi:$src_gpr, 2)>; -def : Pat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_cm MEMxi:$src_gpr, 2)>; //===----------------------------------------------------------------------===// // VTX Read from global memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_cm MEMxi:$src_gpr, 1)>; -def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_cm MEMxi:$src_gpr, 1)>; -def : Pat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_cm MEMxi:$src_gpr, 1)>; -def : Pat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_cm MEMxi:$src_gpr, 1)>; -def : Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : R600Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_cm MEMxi:$src_gpr, 1)>; -} // End isCayman - +} // End let SubtargetPredicate = isCayman diff --git a/lib/Target/AMDGPU/DSInstructions.td b/lib/Target/AMDGPU/DSInstructions.td index e66bf402178b0..15260d0bae1eb 100644 --- a/lib/Target/AMDGPU/DSInstructions.td +++ b/lib/Target/AMDGPU/DSInstructions.td @@ -537,25 +537,23 @@ def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32", // DS Patterns //===----------------------------------------------------------------------===// -let Predicates = [isGCN] in { - -def : Pat < +def : GCNPat < (int_amdgcn_ds_swizzle i32:$src, imm:$offset16), (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0)) >; -class DSReadPat : Pat < +class DSReadPat : GCNPat < (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))), (inst $ptr, (as_i16imm $offset), (i1 0)) >; multiclass DSReadPat_Hi16 { - def : Pat < + def : GCNPat < (build_vector vt:$lo, (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset)))), (v2i16 (inst $ptr, (as_i16imm $offset), (i1 0), $lo)) >; - def : Pat < + def : GCNPat < (build_vector f16:$lo, (f16 (bitconvert (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset)))))), (v2f16 (inst $ptr, (as_i16imm $offset), (i1 0), $lo)) >; @@ -577,14 +575,14 @@ def : DSReadPat ; } // End AddedComplexity = 100 -def : Pat < +def : GCNPat < (v2i32 (load_local_m0 (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1))), (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0)) >; -let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { let AddedComplexity = 100 in { defm : DSReadPat_Hi16; defm : DSReadPat_Hi16; @@ -592,7 +590,7 @@ defm : DSReadPat_Hi16; } } -class DSWritePat : Pat < +class DSWritePat : GCNPat < (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)), (inst $ptr, $value, (as_i16imm $offset), (i1 0)) >; @@ -603,7 +601,7 @@ def : DSWritePat ; def : DSWritePat ; def : DSWritePat ; -let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { def : DSWritePat ; def : DSWritePat ; } @@ -613,7 +611,7 @@ let AddedComplexity = 100 in { def : DSWritePat ; } // End AddedComplexity = 100 -def : Pat < +def : GCNPat < (store_local_m0 v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0, i8:$offset1)), (DS_WRITE2_B32 $ptr, (i32 (EXTRACT_SUBREG $value, sub0)), @@ -621,49 +619,47 @@ def : Pat < (i1 0)) >; -class DSAtomicRetPat : Pat < +class DSAtomicRetPat : GCNPat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value), (inst $ptr, $value, (as_i16imm $offset), (i1 0)) >; -class DSAtomicCmpXChg : Pat < +class DSAtomicCmpXChg : GCNPat < (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap), (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0)) >; // 32-bit atomics. -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicCmpXChg; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicCmpXChg; // 64-bit atomics. -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; -def : DSAtomicRetPat; - -def : DSAtomicCmpXChg; - -} // let Predicates = [isGCN] +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; +def : DSAtomicRetPat; + +def : DSAtomicCmpXChg; //===----------------------------------------------------------------------===// // Real instructions diff --git a/lib/Target/AMDGPU/Disassembler/CodeObject.h b/lib/Target/AMDGPU/Disassembler/CodeObject.h index cc215e00aaae3..2cff9f1c85e95 100644 --- a/lib/Target/AMDGPU/Disassembler/CodeObject.h +++ b/lib/Target/AMDGPU/Disassembler/CodeObject.h @@ -223,12 +223,21 @@ class HSACodeObject : public object::ELF64LEObjectFile { void InitMarkers() const; -public: - HSACodeObject(MemoryBufferRef Buffer, std::error_code &EC) - : object::ELF64LEObjectFile(Buffer, EC) { + HSACodeObject(object::ELF64LEObjectFile &&Obj) + : object::ELF64LEObjectFile(std::move(Obj)) { InitMarkers(); } +public: + static Expected> + create(MemoryBufferRef Wrapper) { + auto Obj = object::ELF64LEObjectFile::create(Wrapper); + if (auto E = Obj.takeError()) + return std::move(E); + std::unique_ptr Ret(new HSACodeObject(std::move(*Obj))); + return std::move(Ret); + } + typedef const_varsize_item_iterator note_iterator; note_iterator notes_begin() const; diff --git a/lib/Target/AMDGPU/Disassembler/CodeObjectDisassembler.cpp b/lib/Target/AMDGPU/Disassembler/CodeObjectDisassembler.cpp index 63f9e220a524b..1f85a87ca74d0 100644 --- a/lib/Target/AMDGPU/Disassembler/CodeObjectDisassembler.cpp +++ b/lib/Target/AMDGPU/Disassembler/CodeObjectDisassembler.cpp @@ -275,16 +275,18 @@ std::error_code CodeObjectDisassembler::Disassemble(MemoryBufferRef Buffer, using namespace object; // Create ELF 64-bit low-endian object file - std::error_code EC; - HSACodeObject CodeObject(Buffer, EC); - if (EC) - return EC; + Expected> CodeObjectOrError = + HSACodeObject::create(Buffer); + if (Error E = CodeObjectOrError.takeError()) + return errorToErrorCode(std::move(E)); + + std::unique_ptr CodeObject = std::move(*CodeObjectOrError); - EC = printNotes(&CodeObject); + std::error_code EC = printNotes(CodeObject.get()); if (EC) return EC; - EC = printKernels(&CodeObject, ES); + EC = printKernels(CodeObject.get(), ES); if (EC) return EC; diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td index 52038db7150df..bccad826d18fb 100644 --- a/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/lib/Target/AMDGPU/EvergreenInstructions.td @@ -15,20 +15,28 @@ def isEG : Predicate< "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && " - "Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS && " + "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && " "!Subtarget->hasCaymanISA()" >; def isEGorCayman : Predicate< "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||" - "Subtarget->getGeneration() ==AMDGPUSubtarget::NORTHERN_ISLANDS" + "Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS" >; +class EGPat : AMDGPUPat { + let SubtargetPredicate = isEG; +} + +class EGOrCaymanPat : AMDGPUPat { + let SubtargetPredicate = isEGorCayman; +} + //===----------------------------------------------------------------------===// // Evergreen / Cayman store instructions //===----------------------------------------------------------------------===// -let Predicates = [isEGorCayman] in { +let SubtargetPredicate = isEGorCayman in { class CF_MEM_RAT_CACHELESS rat_inst, bits<4> rat_id, bits<4> mask, dag ins, string name, list pattern> @@ -88,13 +96,13 @@ defm RAT_ATOMIC_XOR : RAT_ATOMIC<16, 48, "ATOMIC_XOR">; defm RAT_ATOMIC_INC_UINT : RAT_ATOMIC<18, 50, "ATOMIC_INC_UINT">; defm RAT_ATOMIC_DEC_UINT : RAT_ATOMIC<19, 51, "ATOMIC_DEC_UINT">; -} // End let Predicates = [isEGorCayman] +} // End SubtargetPredicate = isEGorCayman //===----------------------------------------------------------------------===// // Evergreen Only instructions //===----------------------------------------------------------------------===// -let Predicates = [isEG] in { +let SubtargetPredicate = isEG in { def RECIP_IEEE_eg : RECIP_IEEE_Common<0x86>; defm DIV_eg : DIV_Common; @@ -116,7 +124,8 @@ def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; def : POW_Common ; -def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; +def : EGPat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; +} // End SubtargetPredicate = isEG //===----------------------------------------------------------------------===// // Memory read/write instructions @@ -241,58 +250,56 @@ def VTX_READ_128_eg //===----------------------------------------------------------------------===// // VTX Read from parameter memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_eg MEMxi:$src_gpr, 3)>; -def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_eg MEMxi:$src_gpr, 3)>; -def : Pat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_eg MEMxi:$src_gpr, 3)>; -def : Pat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_eg MEMxi:$src_gpr, 3)>; -def : Pat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_eg MEMxi:$src_gpr, 3)>; //===----------------------------------------------------------------------===// // VTX Read from constant memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_eg MEMxi:$src_gpr, 2)>; -def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_eg MEMxi:$src_gpr, 2)>; -def : Pat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_eg MEMxi:$src_gpr, 2)>; -def : Pat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_eg MEMxi:$src_gpr, 2)>; -def : Pat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_eg MEMxi:$src_gpr, 2)>; //===----------------------------------------------------------------------===// // VTX Read from global memory space //===----------------------------------------------------------------------===// -def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)), (VTX_READ_8_eg MEMxi:$src_gpr, 1)>; -def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)), (VTX_READ_16_eg MEMxi:$src_gpr, 1)>; -def : Pat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_32_eg MEMxi:$src_gpr, 1)>; -def : Pat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_64_eg MEMxi:$src_gpr, 1)>; -def : Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), +def : EGPat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)), (VTX_READ_128_eg MEMxi:$src_gpr, 1)>; -} // End Predicates = [isEG] - //===----------------------------------------------------------------------===// // Evergreen / Cayman Instructions //===----------------------------------------------------------------------===// -let Predicates = [isEGorCayman] in { +let SubtargetPredicate = isEGorCayman in { multiclass AtomicPat { // FIXME: Add _RTN version. We need per WI scratch location to store the old value // EXTRACT_SUBREG here is dummy, we know the node has no uses - def : Pat<(i32 (node_noret i32:$ptr, i32:$data)), + def : EGOrCaymanPat<(i32 (node_noret i32:$ptr, i32:$data)), (EXTRACT_SUBREG (inst_noret (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $data, sub0), $ptr), sub1)>; } @@ -300,7 +307,7 @@ multiclass AtomicIncDecPat { // FIXME: Add _RTN version. We need per WI scratch location to store the old value // EXTRACT_SUBREG here is dummy, we know the node has no uses - def : Pat<(i32 (node_noret i32:$ptr, C)), + def : EGOrCaymanPat<(i32 (node_noret i32:$ptr, C)), (EXTRACT_SUBREG (inst_noret (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (MOV_IMM_I32 -1), sub0), $ptr), sub1)>; } @@ -308,7 +315,7 @@ multiclass AtomicIncDecPat; -def : Pat<(i32 (sext_inreg i32:$src, i1)), +def : EGOrCaymanPat<(i32 (sext_inreg i32:$src, i1)), (BFE_INT_eg i32:$src, (i32 ZERO), (i32 ONE_INT))>; -def : Pat<(i32 (sext_inreg i32:$src, i8)), +def : EGOrCaymanPat<(i32 (sext_inreg i32:$src, i8)), (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 8))>; -def : Pat<(i32 (sext_inreg i32:$src, i16)), +def : EGOrCaymanPat<(i32 (sext_inreg i32:$src, i16)), (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>; defm : BFIPatterns ; @@ -442,7 +449,7 @@ def FLT32_TO_FLT16 : R600_1OP_Helper <0xA2, "FLT32_TO_FLT16", AMDGPUfp_to_f16, V def FLT16_TO_FLT32 : R600_1OP_Helper <0xA3, "FLT16_TO_FLT32", f16_to_fp, VecALU>; def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>; def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", AMDGPUffbh_u32, VecALU>; -def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>; +def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", AMDGPUffbl_b32, VecALU>; let hasSideEffects = 1 in { def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>; @@ -653,7 +660,7 @@ def LDS_WRXCHG_RET : R600_LDS_1A1D_RET <0x2d, "LDS_WRXCHG", [(set i32:$dst, (atomic_swap_local i32:$src0, i32:$src1))] >; def LDS_CMPST_RET : R600_LDS_1A2D_RET <0x30, "LDS_CMPST", - [(set i32:$dst, (atomic_cmp_swap_32_local i32:$src0, i32:$src1, i32:$src2))] + [(set i32:$dst, (atomic_cmp_swap_local i32:$src0, i32:$src1, i32:$src2))] >; def LDS_READ_RET : R600_LDS_1A <0x32, "LDS_READ_RET", [(set (i32 R600_Reg32:$dst), (load_local R600_Reg32:$src0))] @@ -681,9 +688,9 @@ def LDS_USHORT_READ_RET : R600_LDS_1A <0x39, "LDS_USHORT_READ_RET", // XXX: Lowering SELECT_CC will sometimes generate fp_to_[su]int nodes, // which do not need to be truncated since the fp values are 0.0f or 1.0f. // We should look into handling these cases separately. -def : Pat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>; +def : EGOrCaymanPat<(fp_to_sint f32:$src0), (FLT_TO_INT_eg (TRUNC $src0))>; -def : Pat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>; +def : EGOrCaymanPat<(fp_to_uint f32:$src0), (FLT_TO_UINT_eg (TRUNC $src0))>; // SHA-256 Patterns def : SHA256MaPattern ; diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index 8c32ce232dc31..af0147f69ef0a 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -625,63 +625,63 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor //===----------------------------------------------------------------------===// // Patterns for global loads with no offset. -class FlatLoadPat : Pat < +class FlatLoadPat : GCNPat < (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), (inst $vaddr, $offset, 0, $slc) >; multiclass FlatLoadPat_Hi16 { - def : Pat < + def : GCNPat < (build_vector vt:$elt0, (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)))), (v2i16 (inst $vaddr, $offset, 0, $slc, $elt0)) >; - def : Pat < + def : GCNPat < (build_vector f16:$elt0, (f16 (bitconvert (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)))))), (v2f16 (inst $vaddr, $offset, 0, $slc, $elt0)) >; } multiclass FlatSignedLoadPat_Hi16 { - def : Pat < + def : GCNPat < (build_vector vt:$elt0, (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)))), (v2i16 (inst $vaddr, $offset, 0, $slc, $elt0)) >; - def : Pat < + def : GCNPat < (build_vector f16:$elt0, (f16 (bitconvert (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)))))), (v2f16 (inst $vaddr, $offset, 0, $slc, $elt0)) >; } -class FlatLoadAtomicPat : Pat < +class FlatLoadAtomicPat : GCNPat < (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc))), (inst $vaddr, $offset, 0, $slc) >; -class FlatLoadSignedPat : Pat < +class FlatLoadSignedPat : GCNPat < (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc))), (inst $vaddr, $offset, 0, $slc) >; -class FlatStorePat : Pat < +class FlatStorePat : GCNPat < (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)), (inst $vaddr, $data, $offset, 0, $slc) >; -class FlatStoreSignedPat : Pat < +class FlatStoreSignedPat : GCNPat < (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)), (inst $vaddr, $data, $offset, 0, $slc) >; -class FlatStoreAtomicPat : Pat < +class FlatStoreAtomicPat : GCNPat < // atomic store follows atomic binop convention so the address comes // first. (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), (inst $vaddr, $data, $offset, 0, $slc) >; -class FlatStoreSignedAtomicPat : Pat < +class FlatStoreSignedAtomicPat : GCNPat < // atomic store follows atomic binop convention so the address comes // first. (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), @@ -689,18 +689,18 @@ class FlatStoreSignedAtomicPat ; class FlatAtomicPat : Pat < + ValueType data_vt = vt> : GCNPat < (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), (inst $vaddr, $data, $offset, $slc) >; class FlatSignedAtomicPat : Pat < + ValueType data_vt = vt> : GCNPat < (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), (inst $vaddr, $data, $offset, $slc) >; -let Predicates = [HasFlatAddressSpace] in { +let OtherPredicates = [HasFlatAddressSpace] in { def : FlatLoadPat ; def : FlatLoadPat ; @@ -756,7 +756,7 @@ def : FlatAtomicPat ; def : FlatStorePat ; def : FlatStorePat ; - let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { def : FlatStorePat ; def : FlatStorePat ; @@ -767,9 +767,9 @@ defm : FlatLoadPat_Hi16 ; } } -} // End Predicates = [HasFlatAddressSpace] +} // End OtherPredicates = [HasFlatAddressSpace] -let Predicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { +let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { def : FlatLoadSignedPat ; def : FlatLoadSignedPat ; @@ -794,7 +794,7 @@ def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; - let Predicates = [HasD16LoadStore] in { +let OtherPredicates = [HasD16LoadStore] in { def : FlatStoreSignedPat ; def : FlatStoreSignedPat ; @@ -834,7 +834,7 @@ def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; def : FlatSignedAtomicPat ; -} // End Predicates = [HasFlatGlobalInsts] +} // End OtherPredicates = [HasFlatGlobalInsts] //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 63a984a7140e1..778d4a7ba9d01 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -167,14 +168,30 @@ namespace { class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { bool Is64Bit; bool HasRelocationAddend; + uint8_t OSABI = ELF::ELFOSABI_NONE; public: ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) : AMDGPUAsmBackend(T), Is64Bit(TT.getArch() == Triple::amdgcn), - HasRelocationAddend(TT.getOS() == Triple::AMDHSA) { } + HasRelocationAddend(TT.getOS() == Triple::AMDHSA) { + switch (TT.getOS()) { + case Triple::AMDHSA: + OSABI = ELF::ELFOSABI_AMDGPU_HSA; + break; + case Triple::AMDPAL: + OSABI = ELF::ELFOSABI_AMDGPU_PAL; + break; + case Triple::Mesa3D: + OSABI = ELF::ELFOSABI_AMDGPU_MESA3D; + break; + default: + break; + } + } - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { - return createAMDGPUELFObjectWriter(Is64Bit, HasRelocationAddend, OS); + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { + return createAMDGPUELFObjectWriter(Is64Bit, OSABI, HasRelocationAddend, OS); } }; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp index 6abe7f3d37d5e..e443b0729606a 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -12,6 +12,7 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" @@ -22,7 +23,7 @@ namespace { class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter { public: - AMDGPUELFObjectWriter(bool Is64Bit, bool HasRelocationAddend); + AMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, bool HasRelocationAddend); protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, @@ -33,10 +34,9 @@ class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter { } // end anonymous namespace AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit, + uint8_t OSABI, bool HasRelocationAddend) - : MCELFObjectTargetWriter(Is64Bit, - ELF::ELFOSABI_AMDGPU_HSA, - ELF::EM_AMDGPU, + : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_AMDGPU, HasRelocationAddend) {} unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, @@ -82,10 +82,11 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx, llvm_unreachable("unhandled relocation type"); } -MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, - bool HasRelocationAddend, - raw_pwrite_stream &OS) { - MCELFObjectTargetWriter *MOTW = - new AMDGPUELFObjectWriter(Is64Bit, HasRelocationAddend); - return createELFObjectWriter(MOTW, OS, true); +std::unique_ptr +llvm::createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, + bool HasRelocationAddend, + raw_pwrite_stream &OS) { + auto MOTW = llvm::make_unique(Is64Bit, OSABI, + HasRelocationAddend); + return createELFObjectWriter(std::move(MOTW), OS, true); } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp index 43338a5bebd26..1497edc7a054c 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp @@ -9,13 +9,40 @@ #include "AMDGPUELFStreamer.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" using namespace llvm; -MCELFStreamer *llvm::createAMDGPUELFStreamer(MCContext &Context, - MCAsmBackend &MAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll) { - return new AMDGPUELFStreamer(Context, MAB, OS, Emitter); +AMDGPUELFStreamer::AMDGPUELFStreamer(const Triple &T, MCContext &Context, + std::unique_ptr MAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter) + : MCELFStreamer(Context, std::move(MAB), OS, std::move(Emitter)) { + unsigned Arch = ELF::EF_AMDGPU_ARCH_NONE; + switch (T.getArch()) { + case Triple::r600: + Arch = ELF::EF_AMDGPU_ARCH_R600; + break; + case Triple::amdgcn: + Arch = ELF::EF_AMDGPU_ARCH_GCN; + break; + default: + break; + } + + MCAssembler &MCA = getAssembler(); + unsigned EFlags = MCA.getELFHeaderEFlags(); + EFlags &= ~ELF::EF_AMDGPU_ARCH; + EFlags |= Arch; + MCA.setELFHeaderEFlags(EFlags); +} + +MCELFStreamer *llvm::createAMDGPUELFStreamer( + const Triple &T, MCContext &Context, std::unique_ptr MAB, + raw_pwrite_stream &OS, std::unique_ptr Emitter, + bool RelaxAll) { + return new AMDGPUELFStreamer(T, Context, std::move(MAB), OS, + std::move(Emitter)); } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h index 5319b65d65f92..0cc0a4c5cd5d9 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h @@ -25,15 +25,16 @@ class MCSubtargetInfo; class AMDGPUELFStreamer : public MCELFStreamer { public: - AMDGPUELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter) - : MCELFStreamer(Context, MAB, OS, Emitter) { } - + AMDGPUELFStreamer(const Triple &T, MCContext &Context, + std::unique_ptr MAB, raw_pwrite_stream &OS, + std::unique_ptr Emitter); }; -MCELFStreamer *createAMDGPUELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll); +MCELFStreamer *createAMDGPUELFStreamer(const Triple &T, MCContext &Context, + std::unique_ptr MAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter, + bool RelaxAll); } // namespace llvm. #endif diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp similarity index 71% rename from lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp rename to lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp index 4e828a791e09f..dacf5d37aa1eb 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp @@ -1,4 +1,4 @@ -//===--- AMDGPUCodeObjectMetadataStreamer.cpp -------------------*- C++ -*-===// +//===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,12 +8,12 @@ //===----------------------------------------------------------------------===// // /// \file -/// \brief AMDGPU Code Object Metadata Streamer. +/// \brief AMDGPU HSA Metadata Streamer. /// // //===----------------------------------------------------------------------===// -#include "AMDGPUCodeObjectMetadataStreamer.h" +#include "AMDGPUHSAMetadataStreamer.h" #include "AMDGPU.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" @@ -22,39 +22,40 @@ namespace llvm { -static cl::opt DumpCodeObjectMetadata( - "amdgpu-dump-comd", - cl::desc("Dump AMDGPU Code Object Metadata")); -static cl::opt VerifyCodeObjectMetadata( - "amdgpu-verify-comd", - cl::desc("Verify AMDGPU Code Object Metadata")); +static cl::opt DumpHSAMetadata( + "amdgpu-dump-hsa-metadata", + cl::desc("Dump AMDGPU HSA Metadata")); +static cl::opt VerifyHSAMetadata( + "amdgpu-verify-hsa-metadata", + cl::desc("Verify AMDGPU HSA Metadata")); namespace AMDGPU { -namespace CodeObject { +namespace HSAMD { -void MetadataStreamer::dump(StringRef YamlString) const { - errs() << "AMDGPU Code Object Metadata:\n" << YamlString << '\n'; +void MetadataStreamer::dump(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n'; } -void MetadataStreamer::verify(StringRef YamlString) const { - errs() << "AMDGPU Code Object Metadata Parser Test: "; +void MetadataStreamer::verify(StringRef HSAMetadataString) const { + errs() << "AMDGPU HSA Metadata Parser Test: "; - CodeObject::Metadata FromYamlString; - if (Metadata::fromYamlString(YamlString, FromYamlString)) { + HSAMD::Metadata FromHSAMetadataString; + if (fromString(HSAMetadataString, FromHSAMetadataString)) { errs() << "FAIL\n"; return; } - std::string ToYamlString; - if (Metadata::toYamlString(FromYamlString, ToYamlString)) { + std::string ToHSAMetadataString; + if (toString(FromHSAMetadataString, ToHSAMetadataString)) { errs() << "FAIL\n"; return; } - errs() << (YamlString == ToYamlString ? "PASS" : "FAIL") << '\n'; - if (YamlString != ToYamlString) { - errs() << "Original input: " << YamlString << '\n' - << "Produced output: " << ToYamlString << '\n'; + errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL") + << '\n'; + if (HSAMetadataString != ToHSAMetadataString) { + errs() << "Original input: " << HSAMetadataString << '\n' + << "Produced output: " << ToHSAMetadataString << '\n'; } } @@ -196,14 +197,14 @@ std::vector MetadataStreamer::getWorkGroupDimensions( } void MetadataStreamer::emitVersion() { - auto &Version = CodeObjectMetadata.mVersion; + auto &Version = HSAMetadata.mVersion; - Version.push_back(MetadataVersionMajor); - Version.push_back(MetadataVersionMinor); + Version.push_back(VersionMajor); + Version.push_back(VersionMinor); } void MetadataStreamer::emitPrintf(const Module &Mod) { - auto &Printf = CodeObjectMetadata.mPrintf; + auto &Printf = HSAMetadata.mPrintf; auto Node = Mod.getNamedMetadata("llvm.printf.fmts"); if (!Node) @@ -215,7 +216,7 @@ void MetadataStreamer::emitPrintf(const Module &Mod) { } void MetadataStreamer::emitKernelLanguage(const Function &Func) { - auto &Kernel = CodeObjectMetadata.mKernels.back(); + auto &Kernel = HSAMetadata.mKernels.back(); // TODO: What about other languages? auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version"); @@ -233,7 +234,7 @@ void MetadataStreamer::emitKernelLanguage(const Function &Func) { } void MetadataStreamer::emitKernelAttrs(const Function &Func) { - auto &Attrs = CodeObjectMetadata.mKernels.back().mAttrs; + auto &Attrs = HSAMetadata.mKernels.back().mAttrs; if (auto Node = Func.getMetadata("reqd_work_group_size")) Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node); @@ -244,6 +245,10 @@ void MetadataStreamer::emitKernelAttrs(const Function &Func) { cast(Node->getOperand(0))->getType(), mdconst::extract(Node->getOperand(1))->getZExtValue()); } + if (Func.hasFnAttribute("runtime-handle")) { + Attrs.mRuntimeHandle = + Func.getFnAttribute("runtime-handle").getValueAsString().str(); + } } void MetadataStreamer::emitKernelArgs(const Function &Func) { @@ -274,10 +279,15 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) { auto ArgNo = Arg.getArgNo(); const MDNode *Node; - StringRef TypeQual; - Node = Func->getMetadata("kernel_arg_type_qual"); + StringRef Name; + Node = Func->getMetadata("kernel_arg_name"); if (Node && ArgNo < Node->getNumOperands()) - TypeQual = cast(Node->getOperand(ArgNo))->getString(); + Name = cast(Node->getOperand(ArgNo))->getString(); + + StringRef TypeName; + Node = Func->getMetadata("kernel_arg_type"); + if (Node && ArgNo < Node->getNumOperands()) + TypeName = cast(Node->getOperand(ArgNo))->getString(); StringRef BaseTypeName; Node = Func->getMetadata("kernel_arg_base_type"); @@ -294,28 +304,25 @@ void MetadataStreamer::emitKernelArg(const Argument &Arg) { AccQual = cast(Node->getOperand(ArgNo))->getString(); } - StringRef Name; - Node = Func->getMetadata("kernel_arg_name"); - if (Node && ArgNo < Node->getNumOperands()) - Name = cast(Node->getOperand(ArgNo))->getString(); - - StringRef TypeName; - Node = Func->getMetadata("kernel_arg_type"); + StringRef TypeQual; + Node = Func->getMetadata("kernel_arg_type_qual"); if (Node && ArgNo < Node->getNumOperands()) - TypeName = cast(Node->getOperand(ArgNo))->getString(); + TypeQual = cast(Node->getOperand(ArgNo))->getString(); emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(), - getValueKind(Arg.getType(), TypeQual, BaseTypeName), TypeQual, - BaseTypeName, AccQual, Name, TypeName); + getValueKind(Arg.getType(), TypeQual, BaseTypeName), Name, + TypeName, BaseTypeName, AccQual, TypeQual); } void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, - ValueKind ValueKind, StringRef TypeQual, - StringRef BaseTypeName, StringRef AccQual, - StringRef Name, StringRef TypeName) { - CodeObjectMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); - auto &Arg = CodeObjectMetadata.mKernels.back().mArgs.back(); + ValueKind ValueKind, StringRef Name, + StringRef TypeName, StringRef BaseTypeName, + StringRef AccQual, StringRef TypeQual) { + HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata()); + auto &Arg = HSAMetadata.mKernels.back().mArgs.back(); + Arg.mName = Name; + Arg.mTypeName = TypeName; Arg.mSize = DL.getTypeAllocSize(Ty); Arg.mAlign = DL.getABITypeAlignment(Ty); Arg.mValueKind = ValueKind; @@ -327,62 +334,25 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy); } - Arg.mAccQual = getAccessQualifier(AccQual); - if (auto PtrTy = dyn_cast(Ty)) Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace()); + Arg.mAccQual = getAccessQualifier(AccQual); + + // TODO: Emit Arg.mActualAccQual. + SmallVector SplitTypeQuals; TypeQual.split(SplitTypeQuals, " ", -1, false); for (StringRef Key : SplitTypeQuals) { auto P = StringSwitch(Key) .Case("const", &Arg.mIsConst) - .Case("pipe", &Arg.mIsPipe) .Case("restrict", &Arg.mIsRestrict) .Case("volatile", &Arg.mIsVolatile) + .Case("pipe", &Arg.mIsPipe) .Default(nullptr); if (P) *P = true; } - - Arg.mName = Name; - Arg.mTypeName = TypeName; -} - -void MetadataStreamer::emitKernelCodeProps( - const amd_kernel_code_t &KernelCode) { - auto &CodeProps = CodeObjectMetadata.mKernels.back().mCodeProps; - - CodeProps.mKernargSegmentSize = KernelCode.kernarg_segment_byte_size; - CodeProps.mWorkgroupGroupSegmentSize = - KernelCode.workgroup_group_segment_byte_size; - CodeProps.mWorkitemPrivateSegmentSize = - KernelCode.workitem_private_segment_byte_size; - CodeProps.mWavefrontNumSGPRs = KernelCode.wavefront_sgpr_count; - CodeProps.mWorkitemNumVGPRs = KernelCode.workitem_vgpr_count; - CodeProps.mKernargSegmentAlign = KernelCode.kernarg_segment_alignment; - CodeProps.mGroupSegmentAlign = KernelCode.group_segment_alignment; - CodeProps.mPrivateSegmentAlign = KernelCode.private_segment_alignment; - CodeProps.mWavefrontSize = KernelCode.wavefront_size; -} - -void MetadataStreamer::emitKernelDebugProps( - const amd_kernel_code_t &KernelCode) { - if (!(KernelCode.code_properties & AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED)) - return; - - auto &DebugProps = CodeObjectMetadata.mKernels.back().mDebugProps; - - // FIXME: Need to pass down debugger ABI version through features. This is ok - // for now because we only have one version. - DebugProps.mDebuggerABIVersion.push_back(1); - DebugProps.mDebuggerABIVersion.push_back(0); - DebugProps.mReservedNumVGPRs = KernelCode.reserved_vgpr_count; - DebugProps.mReservedFirstVGPR = KernelCode.reserved_vgpr_first; - DebugProps.mPrivateSegmentBufferSGPR = - KernelCode.debug_private_segment_buffer_sgpr; - DebugProps.mWavefrontPrivateSegmentOffsetSGPR = - KernelCode.debug_wavefront_private_segment_offset_sgpr; } void MetadataStreamer::begin(const Module &Mod) { @@ -391,42 +361,36 @@ void MetadataStreamer::begin(const Module &Mod) { emitPrintf(Mod); } -void MetadataStreamer::emitKernel(const Function &Func, - const amd_kernel_code_t &KernelCode) { +void MetadataStreamer::end() { + std::string HSAMetadataString; + if (toString(HSAMetadata, HSAMetadataString)) + return; + + if (DumpHSAMetadata) + dump(HSAMetadataString); + if (VerifyHSAMetadata) + verify(HSAMetadataString); +} + +void MetadataStreamer::emitKernel( + const Function &Func, + const Kernel::CodeProps::Metadata &CodeProps, + const Kernel::DebugProps::Metadata &DebugProps) { if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL) return; - CodeObjectMetadata.mKernels.push_back(Kernel::Metadata()); - auto &Kernel = CodeObjectMetadata.mKernels.back(); + HSAMetadata.mKernels.push_back(Kernel::Metadata()); + auto &Kernel = HSAMetadata.mKernels.back(); Kernel.mName = Func.getName(); + Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str(); emitKernelLanguage(Func); emitKernelAttrs(Func); emitKernelArgs(Func); - emitKernelCodeProps(KernelCode); - emitKernelDebugProps(KernelCode); -} - -ErrorOr MetadataStreamer::toYamlString() { - std::string YamlString; - if (auto Error = Metadata::toYamlString(CodeObjectMetadata, YamlString)) - return Error; - - if (DumpCodeObjectMetadata) - dump(YamlString); - if (VerifyCodeObjectMetadata) - verify(YamlString); - - return YamlString; -} - -ErrorOr MetadataStreamer::toYamlString(StringRef YamlString) { - if (auto Error = Metadata::fromYamlString(YamlString, CodeObjectMetadata)) - return Error; - - return toYamlString(); + HSAMetadata.mKernels.back().mCodeProps = CodeProps; + HSAMetadata.mKernels.back().mDebugProps = DebugProps; } -} // end namespace CodeObject +} // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h similarity index 58% rename from lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h rename to lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h index c6681431d74d4..bd6515521a742 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.h @@ -1,4 +1,4 @@ -//===--- AMDGPUCodeObjectMetadataStreamer.h ---------------------*- C++ -*-===// +//===--- AMDGPUHSAMetadataStreamer.h ----------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -8,19 +8,18 @@ //===----------------------------------------------------------------------===// // /// \file -/// \brief AMDGPU Code Object Metadata Streamer. +/// \brief AMDGPU HSA Metadata Streamer. /// // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H -#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H #include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/AMDGPUCodeObjectMetadata.h" -#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/AMDGPUMetadata.h" namespace llvm { @@ -32,16 +31,16 @@ class Module; class Type; namespace AMDGPU { -namespace CodeObject { +namespace HSAMD { class MetadataStreamer final { private: - Metadata CodeObjectMetadata; + Metadata HSAMetadata; AMDGPUAS AMDGPUASI; - void dump(StringRef YamlString) const; + void dump(StringRef HSAMetadataString) const; - void verify(StringRef YamlString) const; + void verify(StringRef HSAMetadataString) const; AccessQualifier getAccessQualifier(StringRef AccQual) const; @@ -69,31 +68,29 @@ class MetadataStreamer final { void emitKernelArg(const Argument &Arg); void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind, - StringRef TypeQual = "", StringRef BaseTypeName = "", - StringRef AccQual = "", StringRef Name = "", - StringRef TypeName = ""); - - void emitKernelCodeProps(const amd_kernel_code_t &KernelCode); - - void emitKernelDebugProps(const amd_kernel_code_t &KernelCode); + StringRef Name = "", StringRef TypeName = "", + StringRef BaseTypeName = "", StringRef AccQual = "", + StringRef TypeQual = ""); public: MetadataStreamer() = default; ~MetadataStreamer() = default; - void begin(const Module &Mod); + const Metadata &getHSAMetadata() const { + return HSAMetadata; + } - void end() {} - - void emitKernel(const Function &Func, const amd_kernel_code_t &KernelCode); + void begin(const Module &Mod); - ErrorOr toYamlString(); + void end(); - ErrorOr toYamlString(StringRef YamlString); + void emitKernel(const Function &Func, + const Kernel::CodeProps::Metadata &CodeProps, + const Kernel::DebugProps::Metadata &DebugProps); }; -} // end namespace CodeObject +} // end namespace HSAMD } // end namespace AMDGPU } // end namespace llvm -#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H +#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 2968d834a5eb3..2b321c04fb309 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -18,6 +18,8 @@ #include "AMDGPUTargetStreamer.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "SIDefines.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -78,12 +80,12 @@ static MCTargetStreamer * createAMDGPUObjectTargetStreamer( } static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, - MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll) { - if (T.getOS() == Triple::AMDHSA) - return createAMDGPUELFStreamer(Context, MAB, OS, Emitter, RelaxAll); - - return createELFStreamer(Context, MAB, OS, Emitter, RelaxAll); + std::unique_ptr &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, + bool RelaxAll) { + return createAMDGPUELFStreamer(T, Context, std::move(MAB), OS, + std::move(Emitter), RelaxAll); } extern "C" void LLVMInitializeAMDGPUTargetMC() { diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h index f80b5f3a6dba2..56bcff487174f 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -18,6 +18,8 @@ #include "llvm/Support/DataTypes.h" +#include + namespace llvm { class MCAsmBackend; class MCCodeEmitter; @@ -47,9 +49,9 @@ MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU, const MCTargetOptions &Options); -MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit, - bool HasRelocationAddend, - raw_pwrite_stream &OS); +std::unique_ptr +createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI, + bool HasRelocationAddend, raw_pwrite_stream &OS); } // End llvm namespace #define GET_REGINFO_ENUM diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index 2a0032fc9adcd..d897956daccf4 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -39,21 +39,12 @@ using namespace llvm::AMDGPU; // AMDGPUTargetStreamer //===----------------------------------------------------------------------===// -AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S) - : MCTargetStreamer(S) {} - -void AMDGPUTargetStreamer::EmitStartOfCodeObjectMetadata(const Module &Mod) { - CodeObjectMetadataStreamer.begin(Mod); -} - -void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata( - const Function &Func, const amd_kernel_code_t &KernelCode) { - CodeObjectMetadataStreamer.emitKernel(Func, KernelCode); -} +bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) { + HSAMD::Metadata HSAMetadata; + if (HSAMD::fromString(HSAMetadataString, HSAMetadata)) + return false; -void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata() { - CodeObjectMetadataStreamer.end(); - EmitCodeObjectMetadata(CodeObjectMetadataStreamer.toYamlString().get()); + return EmitHSAMetadata(HSAMetadata); } //===----------------------------------------------------------------------===// @@ -100,15 +91,30 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, } } -bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(StringRef YamlString) { - auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); - if (!VerifiedYamlString) +bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) { + OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n"; + return true; +} + +bool AMDGPUTargetAsmStreamer::EmitHSAMetadata( + const AMDGPU::HSAMD::Metadata &HSAMetadata) { + std::string HSAMetadataString; + if (HSAMD::toString(HSAMetadata, HSAMetadataString)) return false; - OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin << '\n'; - OS << VerifiedYamlString.get(); - OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd << '\n'; + OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n'; + OS << HSAMetadataString << '\n'; + OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n'; + return true; +} +bool AMDGPUTargetAsmStreamer::EmitPALMetadata( + const PALMD::Metadata &PALMetadata) { + std::string PALMetadataString; + if (PALMD::toString(PALMetadata, PALMetadataString)) + return false; + + OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n'; return true; } @@ -124,7 +130,7 @@ MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() { } void AMDGPUTargetELFStreamer::EmitAMDGPUNote( - const MCExpr *DescSZ, ElfNote::NoteType Type, + const MCExpr *DescSZ, unsigned NoteType, function_ref EmitDesc) { auto &S = getStreamer(); auto &Context = S.getContext(); @@ -136,7 +142,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUNote( ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC)); S.EmitIntValue(NameSZ, 4); // namesz S.EmitValue(DescSZ, 4); // descz - S.EmitIntValue(Type, 4); // type + S.EmitIntValue(NoteType, 4); // type S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name S.EmitValueToAlignment(4, 0, 1, 0); // padding 0 EmitDesc(S); // desc @@ -204,9 +210,32 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName, Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL); } -bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) { - auto VerifiedYamlString = CodeObjectMetadataStreamer.toYamlString(YamlString); - if (!VerifiedYamlString) +bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) { + // Create two labels to mark the beginning and end of the desc field + // and a MCExpr to calculate the size of the desc field. + auto &Context = getContext(); + auto *DescBegin = Context.createTempSymbol(); + auto *DescEnd = Context.createTempSymbol(); + auto *DescSZ = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(DescEnd, Context), + MCSymbolRefExpr::create(DescBegin, Context), Context); + + EmitAMDGPUNote( + DescSZ, + ELF::NT_AMD_AMDGPU_ISA, + [&](MCELFStreamer &OS) { + OS.EmitLabel(DescBegin); + OS.EmitBytes(IsaVersionString); + OS.EmitLabel(DescEnd); + } + ); + return true; +} + +bool AMDGPUTargetELFStreamer::EmitHSAMetadata( + const AMDGPU::HSAMD::Metadata &HSAMetadata) { + std::string HSAMetadataString; + if (HSAMD::toString(HSAMetadata, HSAMetadataString)) return false; // Create two labels to mark the beginning and end of the desc field @@ -220,13 +249,25 @@ bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(StringRef YamlString) { EmitAMDGPUNote( DescSZ, - ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_METADATA, + ELF::NT_AMD_AMDGPU_HSA_METADATA, [&](MCELFStreamer &OS) { OS.EmitLabel(DescBegin); - OS.EmitBytes(VerifiedYamlString.get()); + OS.EmitBytes(HSAMetadataString); OS.EmitLabel(DescEnd); } ); + return true; +} +bool AMDGPUTargetELFStreamer::EmitPALMetadata( + const PALMD::Metadata &PALMetadata) { + EmitAMDGPUNote( + MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()), + ELF::NT_AMD_AMDGPU_PAL_METADATA, + [&](MCELFStreamer &OS){ + for (auto I : PALMetadata) + OS.EmitIntValue(I, sizeof(uint32_t)); + } + ); return true; } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index 968128e94d0b2..0919b754480df 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -10,9 +10,10 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H -#include "AMDGPUCodeObjectMetadataStreamer.h" #include "AMDKernelCodeT.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/AMDGPUMetadata.h" namespace llvm { #include "AMDGPUPTNote.h" @@ -27,11 +28,11 @@ class Type; class AMDGPUTargetStreamer : public MCTargetStreamer { protected: - AMDGPU::CodeObject::MetadataStreamer CodeObjectMetadataStreamer; MCContext &getContext() const { return Streamer.getContext(); } public: - AMDGPUTargetStreamer(MCStreamer &S); + AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major, uint32_t Minor) = 0; @@ -44,15 +45,17 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0; - virtual void EmitStartOfCodeObjectMetadata(const Module &Mod); + /// \returns True on success, false on failure. + virtual bool EmitISAVersion(StringRef IsaVersionString) = 0; - virtual void EmitKernelCodeObjectMetadata( - const Function &Func, const amd_kernel_code_t &KernelCode); + /// \returns True on success, false on failure. + virtual bool EmitHSAMetadata(StringRef HSAMetadataString); - virtual void EmitEndOfCodeObjectMetadata(); + /// \returns True on success, false on failure. + virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0; /// \returns True on success, false on failure. - virtual bool EmitCodeObjectMetadata(StringRef YamlString) = 0; + virtual bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) = 0; }; class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { @@ -71,14 +74,19 @@ class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer { void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; /// \returns True on success, false on failure. - bool EmitCodeObjectMetadata(StringRef YamlString) override; + bool EmitISAVersion(StringRef IsaVersionString) override; + + /// \returns True on success, false on failure. + bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; + + /// \returns True on success, false on failure. + bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override; }; class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { MCStreamer &Streamer; - void EmitAMDGPUNote(const MCExpr *DescSize, - AMDGPU::ElfNote::NoteType Type, + void EmitAMDGPUNote(const MCExpr *DescSize, unsigned NoteType, function_ref EmitDesc); public: @@ -98,7 +106,13 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer { void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override; /// \returns True on success, false on failure. - bool EmitCodeObjectMetadata(StringRef YamlString) override; + bool EmitISAVersion(StringRef IsaVersionString) override; + + /// \returns True on success, false on failure. + bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override; + + /// \returns True on success, false on failure. + bool EmitPALMetadata(const AMDGPU::PALMD::Metadata &PALMetadata) override; }; } diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt index 09e3efad10af1..f9cb4678dc511 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -1,8 +1,8 @@ add_llvm_library(LLVMAMDGPUDesc AMDGPUAsmBackend.cpp - AMDGPUCodeObjectMetadataStreamer.cpp AMDGPUELFObjectWriter.cpp AMDGPUELFStreamer.cpp + AMDGPUHSAMetadataStreamer.cpp AMDGPUMCAsmInfo.cpp AMDGPUMCCodeEmitter.cpp AMDGPUMCTargetDesc.cpp diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td index 06e2c11b01935..99a018d2e245f 100644 --- a/lib/Target/AMDGPU/MIMGInstructions.td +++ b/lib/Target/AMDGPU/MIMGInstructions.td @@ -349,7 +349,7 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o" /********** ======================= **********/ // Image + sampler -class SampleRawPattern : Pat < +class SampleRawPattern : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm, i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe), (opcode $addr, $rsrc, $sampler, @@ -371,7 +371,7 @@ multiclass SampleRawPatterns { // 2. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128). // 3. Add A16 support when we pass address of half type. multiclass AMDGCNSamplePattern { - def : Pat< + def : GCNPat< (dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc, i1:$slc, i1:$lwe, i1:$da)), (opcode $addr, $rsrc, $sampler, @@ -396,7 +396,7 @@ multiclass AMDGCNSamplePatterns { } // Image only -class ImagePattern : Pat < +class ImagePattern : GCNPat < (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$unorm, imm:$r128, imm:$da, imm:$glc, imm:$slc, imm:$tfe, imm:$lwe), (opcode $addr, $rsrc, @@ -411,7 +411,7 @@ multiclass ImagePatterns { } multiclass ImageLoadPattern { - def : Pat < + def : GCNPat < (dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, i1:$da)), (opcode $addr, $rsrc, @@ -434,7 +434,7 @@ multiclass ImageLoadPatterns { } multiclass ImageStorePattern { - def : Pat < + def : GCNPat < (name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe, i1:$da), (opcode $data, $addr, $rsrc, @@ -456,7 +456,7 @@ multiclass ImageStorePatterns { defm : ImageStoreDataPatterns(opcode # _V4), v4f32>; } -class ImageAtomicPattern : Pat < +class ImageAtomicPattern : GCNPat < (name i32:$vdata, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc), (opcode $vdata, $addr, $rsrc, 1, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da)) >; @@ -467,7 +467,7 @@ multiclass ImageAtomicPatterns { def : ImageAtomicPattern(opcode # _V4), v4i32>; } -class ImageAtomicCmpSwapPattern : Pat < +class ImageAtomicCmpSwapPattern : GCNPat < (int_amdgcn_image_atomic_cmpswap i32:$vsrc, i32:$vcmp, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc), (EXTRACT_SUBREG @@ -584,34 +584,34 @@ defm : ImageAtomicPatterns; defm : ImageAtomicPatterns; /* SIsample for simple 1D texture lookup */ -def : Pat < +def : GCNPat < (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm), (IMAGE_SAMPLE_V4_V1 $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0) >; -class SamplePattern : Pat < +class SamplePattern : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm), (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0) >; -class SampleRectPattern : Pat < +class SampleRectPattern : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT), (opcode $addr, $rsrc, $sampler, 0xf, 1, 0, 0, 0, 0, 0, 0) >; -class SampleArrayPattern : Pat < +class SampleArrayPattern : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY), (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1) >; class SampleShadowPattern : Pat < + ValueType vt> : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW), (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0) >; class SampleShadowArrayPattern : Pat < + ValueType vt> : GCNPat < (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY), (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1) >; diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td index 1f8f5a7e8d421..c00383a176b45 100644 --- a/lib/Target/AMDGPU/Processors.td +++ b/lib/Target/AMDGPU/Processors.td @@ -13,7 +13,7 @@ class Proc Featur // The code produced for "generic" is only useful for tests and cannot // reasonably be expected to execute on any particular target. def : ProcessorModel<"generic", NoSchedModel, [ - FeatureGCN + FeatureGCN, FeatureWavefrontSize64 ]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/R600InstrFormats.td b/lib/Target/AMDGPU/R600InstrFormats.td index 68fcc545916a3..61106ed42e64f 100644 --- a/lib/Target/AMDGPU/R600InstrFormats.td +++ b/lib/Target/AMDGPU/R600InstrFormats.td @@ -11,9 +11,18 @@ // //===----------------------------------------------------------------------===// +def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">; + +def isR600toCayman : Predicate< + "Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">; + +class R600Pat : AMDGPUPat { + let SubtargetPredicate = isR600toCayman; +} + class InstR600 pattern, - InstrItinClass itin> - : AMDGPUInst { + InstrItinClass itin = NoItinerary> + : AMDGPUInst , PredicateControl { field bits<64> Inst; bit Trig = 0; @@ -31,6 +40,7 @@ class InstR600 pattern, bit IsExport = 0; bit LDS_1A2D = 0; + let SubtargetPredicate = isR600toCayman; let Namespace = "AMDGPU"; let OutOperandList = outs; let InOperandList = ins; diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp index c5da5e4042004..15dcf650d9afe 100644 --- a/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -1186,10 +1186,8 @@ int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const { } const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass(); - for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(), - LE = MRI.livein_end(); - LI != LE; ++LI) { - unsigned Reg = LI->first; + for (std::pair LI : MRI.liveins()) { + unsigned Reg = LI.first; if (TargetRegisterInfo::isVirtualRegister(Reg) || !IndirectRC->contains(Reg)) continue; diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index 63a35b6dc595d..f422f441af4f7 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -15,6 +15,13 @@ include "R600Intrinsics.td" include "R600InstrFormats.td" +// FIXME: Should not be arbitrarily split from other R600 inst classes. +class R600WrapperInst pattern = []> : + AMDGPUInst, PredicateControl { + let SubtargetPredicate = isR600toCayman; +} + + class InstR600ISA pattern = []> : InstR600 { @@ -346,12 +353,6 @@ def vtx_id2_az_extloadi8 : LoadVtxId2 ; def vtx_id2_az_extloadi16 : LoadVtxId2 ; def vtx_id2_load : LoadVtxId2 ; -def isR600 : Predicate<"Subtarget->getGeneration() <= R600Subtarget::R700">; - -def isR600toCayman - : Predicate< - "Subtarget->getGeneration() <= R600Subtarget::NORTHERN_ISLANDS">; - //===----------------------------------------------------------------------===// // R600 SDNodes //===----------------------------------------------------------------------===// @@ -393,7 +394,7 @@ def TEXTURE_FETCH_Type : SDTypeProfile<1, 19, [SDTCisFP<0>]>; def TEXTURE_FETCH: SDNode<"AMDGPUISD::TEXTURE_FETCH", TEXTURE_FETCH_Type, []>; multiclass TexPattern TextureOp, Instruction inst, ValueType vt = v4f32> { -def : Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, +def : R600Pat<(TEXTURE_FETCH (i32 TextureOp), vt:$SRC_GPR, (i32 imm:$srcx), (i32 imm:$srcy), (i32 imm:$srcz), (i32 imm:$srcw), (i32 imm:$offsetx), (i32 imm:$offsety), (i32 imm:$offsetz), (i32 imm:$DST_SEL_X), (i32 imm:$DST_SEL_Y), (i32 imm:$DST_SEL_Z), @@ -479,7 +480,7 @@ class ExportBufWord1 { } multiclass ExportPattern cf_inst> { - def : Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), + def : R600Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), (ExportInst R600_Reg128:$src, imm:$type, imm:$base, imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) @@ -490,22 +491,22 @@ multiclass ExportPattern cf_inst> { multiclass SteamOutputExportPattern buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> { // Stream0 - def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), + def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 0), (i32 imm:$mask)), (ExportInst R600_Reg128:$src, 0, imm:$arraybase, 4095, imm:$mask, buf0inst, 0)>; // Stream1 - def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), + def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 1), (i32 imm:$mask)), (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf1inst, 0)>; // Stream2 - def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), + def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 2), (i32 imm:$mask)), (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf2inst, 0)>; // Stream3 - def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), + def : R600Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src), (i32 imm:$arraybase), (i32 3), (i32 imm:$mask)), (ExportInst $src, 0, imm:$arraybase, 4095, imm:$mask, buf3inst, 0)>; @@ -549,7 +550,7 @@ class ExportBufInst : InstR600ISA<( def KCACHE : InstFlag<"printKCache">; -class ALU_CLAUSE inst, string OpName> : AMDGPUInst <(outs), +class ALU_CLAUSE inst, string OpName> : R600WrapperInst <(outs), (ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, KCACHE:$KCACHE_MODE0, KCACHE:$KCACHE_MODE1, i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, @@ -578,7 +579,7 @@ class CF_WORD0_R600 { let Word0 = ADDR; } -class CF_CLAUSE_R600 inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), +class CF_CLAUSE_R600 inst, dag ins, string AsmPrint> : R600WrapperInst <(outs), ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { field bits<64> Inst; bits<4> CNT; @@ -598,7 +599,7 @@ ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 { let Inst{63-32} = Word1; } -class CF_CLAUSE_EG inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), +class CF_CLAUSE_EG inst, dag ins, string AsmPrint> : R600WrapperInst <(outs), ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG { field bits<64> Inst; @@ -621,7 +622,7 @@ def CF_ALU_CONTINUE : ALU_CLAUSE<13, "ALU_CONTINUE">; def CF_ALU_BREAK : ALU_CLAUSE<14, "ALU_BREAK">; def CF_ALU_ELSE_AFTER : ALU_CLAUSE<15, "ALU_ELSE_AFTER">; -def FETCH_CLAUSE : AMDGPUInst <(outs), +def FETCH_CLAUSE : R600WrapperInst <(outs), (ins i32imm:$addr), "Fetch clause starting at $addr:", [] > { field bits<8> Inst; bits<8> num; @@ -629,7 +630,7 @@ def FETCH_CLAUSE : AMDGPUInst <(outs), let isCodeGenOnly = 1; } -def ALU_CLAUSE : AMDGPUInst <(outs), +def ALU_CLAUSE : R600WrapperInst <(outs), (ins i32imm:$addr), "ALU clause starting at $addr:", [] > { field bits<8> Inst; bits<8> num; @@ -637,7 +638,7 @@ def ALU_CLAUSE : AMDGPUInst <(outs), let isCodeGenOnly = 1; } -def LITERALS : AMDGPUInst <(outs), +def LITERALS : R600WrapperInst <(outs), (ins LITERAL:$literal1, LITERAL:$literal2), "$literal1, $literal2", [] > { let isCodeGenOnly = 1; @@ -649,12 +650,10 @@ def LITERALS : AMDGPUInst <(outs), let Inst{63-32} = literal2; } -def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > { +def PAD : R600WrapperInst <(outs), (ins), "PAD", [] > { field bits<64> Inst; } -let Predicates = [isR600toCayman] in { - //===----------------------------------------------------------------------===// // Common Instructions R600, R700, Evergreen, Cayman //===----------------------------------------------------------------------===// @@ -784,7 +783,7 @@ def MOV : R600_1OP <0x19, "MOV", []>; // Most DUMMY_CHAINs should be eliminated during legalization, but undef // values can sneak in some to selection. let isPseudo = 1, isCodeGenOnly = 1 in { -def DUMMY_CHAIN : AMDGPUInst < +def DUMMY_CHAIN : R600WrapperInst < (outs), (ins), "DUMMY_CHAIN", @@ -795,7 +794,7 @@ def DUMMY_CHAIN : AMDGPUInst < let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { -class MOV_IMM : AMDGPUInst < +class MOV_IMM : R600WrapperInst < (outs R600_Reg32:$dst), (ins immType:$imm), "", @@ -805,20 +804,20 @@ class MOV_IMM : AMDGPUInst < } // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 def MOV_IMM_I32 : MOV_IMM; -def : Pat < +def : R600Pat < (imm:$val), (MOV_IMM_I32 imm:$val) >; def MOV_IMM_GLOBAL_ADDR : MOV_IMM; -def : Pat < +def : R600Pat < (AMDGPUconstdata_ptr tglobaladdr:$addr), (MOV_IMM_GLOBAL_ADDR tglobaladdr:$addr) >; def MOV_IMM_F32 : MOV_IMM; -def : Pat < +def : R600Pat < (fpimm:$val), (MOV_IMM_F32 fpimm:$val) >; @@ -1201,7 +1200,7 @@ def FNEG_R600 : FNEG; // FIXME: Should be predicated on unsafe fp math. multiclass DIV_Common { -def : Pat< +def : R600Pat< (fdiv f32:$src0, f32:$src1), (MUL_IEEE $src0, (recip_ieee $src1)) >; @@ -1248,7 +1247,7 @@ let Predicates = [isR600] in { defm DIV_r600 : DIV_Common; def : POW_Common ; - def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; + def : R600Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_r600 $src))>; def : RsqPat; def R600_ExportSwz : ExportSwzInst { @@ -1336,11 +1335,11 @@ defm R600_ : RegisterLoadStore ; // Hardcode channel to 0 // NOTE: LSHR is not available here. LSHR is per family instruction -def : Pat < +def : R600Pat < (i32 (load_private ADDRIndirect:$addr) ), (R600_RegisterLoad FRAMEri:$addr, (i32 0)) >; -def : Pat < +def : R600Pat < (store_private i32:$val, ADDRIndirect:$addr), (R600_RegisterStore i32:$val, FRAMEri:$addr, (i32 0)) >; @@ -1691,7 +1690,7 @@ def R600_INSERT_ELT_V2 : InsertVertical ; def R600_INSERT_ELT_V4 : InsertVertical ; class ExtractVerticalPat : Pat < + ValueType scalar_ty> : R600Pat < (scalar_ty (extractelt vec_ty:$vec, i32:$index)), (inst $vec, $index) >; @@ -1702,7 +1701,7 @@ def : ExtractVerticalPat ; def : ExtractVerticalPat ; class InsertVerticalPat : Pat < + ValueType scalar_ty> : R600Pat < (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)), (inst $vec, $value, $index) >; @@ -1716,9 +1715,11 @@ def : InsertVerticalPat ; // ISel Patterns //===----------------------------------------------------------------------===// +let SubtargetPredicate = isR600toCayman in { + // CND*_INT Patterns for f32 True / False values -class CND_INT_f32 : Pat < +class CND_INT_f32 : R600Pat < (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc), (cnd $src0, $src1, $src2) >; @@ -1728,18 +1729,18 @@ def : CND_INT_f32 ; def : CND_INT_f32 ; //CNDGE_INT extra pattern -def : Pat < +def : R600Pat < (selectcc i32:$src0, -1, i32:$src1, i32:$src2, COND_SGT), (CNDGE_INT $src0, $src1, $src2) >; // KIL Patterns -def KILP : Pat < +def KILP : R600Pat < (int_AMDGPU_kilp), (MASK_WRITE (KILLGT (f32 ONE), (f32 ZERO))) >; -def KIL : Pat < +def KIL : R600Pat < (int_AMDGPU_kill f32:$src0), (MASK_WRITE (KILLGT (f32 ZERO), $src0)) >; @@ -1788,7 +1789,7 @@ def : BitConvert ; // DWORDADDR pattern def : DwordAddrPat ; -} // End isR600toCayman Predicate +} // End SubtargetPredicate = isR600toCayman def getLDSNoRetOp : InstrMapping { let FilterClass = "R600_LDS_1A1D"; diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index 98cf255d710d9..5a767882c955a 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -375,7 +375,9 @@ enum SDWA9EncValues{ #define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8) #define R_00B128_SPI_SHADER_PGM_RSRC1_VS 0x00B128 #define R_00B228_SPI_SHADER_PGM_RSRC1_GS 0x00B228 +#define R_00B328_SPI_SHADER_PGM_RSRC1_ES 0x00B328 #define R_00B428_SPI_SHADER_PGM_RSRC1_HS 0x00B428 +#define R_00B528_SPI_SHADER_PGM_RSRC1_LS 0x00B528 #define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 #define S_00B028_VGPRS(x) (((x) & 0x3F) << 0) #define S_00B028_SGPRS(x) (((x) & 0x0F) << 6) diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index 9fc38aeefaa21..0fa6712527fa2 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -628,7 +628,7 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, MachineOperand *NonInlineUse = nullptr; int NonInlineUseOpNo = -1; - MachineRegisterInfo::use_iterator NextUse, NextInstUse; + MachineRegisterInfo::use_iterator NextUse; for (MachineRegisterInfo::use_iterator Use = MRI->use_begin(Dst.getReg()), E = MRI->use_end(); Use != E; Use = NextUse) { @@ -723,6 +723,8 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI, } } +// Clamp patterns are canonically selected to v_max_* instructions, so only +// handle them. const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { unsigned Op = MI.getOpcode(); switch (Op) { @@ -737,6 +739,7 @@ const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const { const MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (!Src0->isReg() || !Src1->isReg() || + Src0->getReg() != Src1->getReg() || Src0->getSubReg() != Src1->getSubReg() || Src0->getSubReg() != AMDGPU::NoSubRegister) return nullptr; diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index ff6fed88e37fb..37f5665be5074 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -219,7 +219,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was // specified. const SISubtarget &ST = MF.getSubtarget(); - auto AMDGPUASI = ST.getAMDGPUAS(); if (ST.debuggerEmitPrologue()) emitDebuggerPrologue(MF, MBB); @@ -356,7 +355,65 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, .addReg(PreloadedPrivateBufferReg, RegState::Kill); } - if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) { + if (ResourceRegUsed) + emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I, + PreloadedPrivateBufferReg, ScratchRsrcReg); +} + +// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. +void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST, + MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, + MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg, + unsigned ScratchRsrcReg) const { + + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo *TRI = &TII->getRegisterInfo(); + DebugLoc DL; + auto AMDGPUASI = ST.getAMDGPUAS(); + + if (ST.isAmdPalOS()) { + // The pointer to the GIT is formed from the offset passed in and either + // the amdgpu-git-ptr-high function attribute or the top part of the PC + unsigned RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + + const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); + + if (MFI->getGITPtrHigh() != 0xffffffff) { + BuildMI(MBB, I, DL, SMovB32, RsrcHi) + .addImm(MFI->getGITPtrHigh()) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } else { + const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); + BuildMI(MBB, I, DL, GetPC64, Rsrc01); + } + BuildMI(MBB, I, DL, SMovB32, RsrcLo) + .addReg(AMDGPU::SGPR0) // Low address passed in + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + // We now have the GIT ptr - now get the scratch descriptor from the entry + // at offset 0. + PointerType *PtrTy = + PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), + AMDGPUAS::CONSTANT_ADDRESS); + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); + const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); + auto MMO = MF.getMachineMemOperand(PtrInfo, + MachineMemOperand::MOLoad | + MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable, + 0, 0); + BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) + .addReg(Rsrc01) + .addImm(0) // offset + .addImm(0) // glc + .addReg(ScratchRsrcReg, RegState::ImplicitDefine) + .addMemOperand(MMO); + return; + } + if (ST.isMesaGfxShader(MF) + || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) { assert(!ST.isAmdCodeObjectV2(MF)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h index cc1c85ff6bf35..df6f1632a3167 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.h +++ b/lib/Target/AMDGPU/SIFrameLowering.h @@ -69,6 +69,12 @@ class SIFrameLowering final : public AMDGPUFrameLowering { /// \brief Emits debugger prologue. void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const; + // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. + void emitEntryFunctionScratchSetup(const SISubtarget &ST, MachineFunction &MF, + MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, + MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg, + unsigned ScratchRsrcReg) const; + public: bool hasFP(const MachineFunction &MF) const override; bool hasSP(const MachineFunction &MF) const; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 4458321effaac..70a46dbbd4b5a 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -469,6 +469,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i16, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f16, Custom); + setOperationAction(ISD::ANY_EXTEND, MVT::v2i32, Expand); setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand); setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand); @@ -1493,14 +1494,31 @@ SDValue SITargetLowering::LowerFormalArguments( // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled. // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be // enabled too. - if (CallConv == CallingConv::AMDGPU_PS && - ((Info->getPSInputAddr() & 0x7F) == 0 || - ((Info->getPSInputAddr() & 0xF) == 0 && - Info->isPSInputAllocated(11)))) { - CCInfo.AllocateReg(AMDGPU::VGPR0); - CCInfo.AllocateReg(AMDGPU::VGPR1); - Info->markPSInputAllocated(0); - Info->markPSInputEnabled(0); + if (CallConv == CallingConv::AMDGPU_PS) { + if ((Info->getPSInputAddr() & 0x7F) == 0 || + ((Info->getPSInputAddr() & 0xF) == 0 && + Info->isPSInputAllocated(11))) { + CCInfo.AllocateReg(AMDGPU::VGPR0); + CCInfo.AllocateReg(AMDGPU::VGPR1); + Info->markPSInputAllocated(0); + Info->markPSInputEnabled(0); + } + if (Subtarget->isAmdPalOS()) { + // For isAmdPalOS, the user does not enable some bits after compilation + // based on run-time states; the register values being generated here are + // the final ones set in hardware. Therefore we need to apply the + // workaround to PSInputAddr and PSInputEnable together. (The case where + // a bit is set in PSInputAddr but not PSInputEnable is where the + // frontend set up an input arg for a particular interpolation mode, but + // nothing uses that input arg. Really we should have an earlier pass + // that removes such an arg.) + unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable(); + if ((PsInputBits & 0x7F) == 0 || + ((PsInputBits & 0xF) == 0 && + (PsInputBits >> 11 & 1))) + Info->markPSInputEnabled( + countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined)); + } } assert(!Info->hasDispatchPtr() && @@ -2432,7 +2450,7 @@ MachineBasicBlock *SITargetLowering::splitKillBlock(MachineInstr &MI, if (SplitPoint == BB->end()) { // Don't bother with a new block. - MI.setDesc(TII->get(AMDGPU::SI_KILL_TERMINATOR)); + MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode())); return BB; } @@ -2446,7 +2464,7 @@ MachineBasicBlock *SITargetLowering::splitKillBlock(MachineInstr &MI, SplitBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(SplitBB); - MI.setDesc(TII->get(AMDGPU::SI_KILL_TERMINATOR)); + MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode())); return SplitBB; } @@ -3000,7 +3018,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( case AMDGPU::SI_INDIRECT_DST_V8: case AMDGPU::SI_INDIRECT_DST_V16: return emitIndirectDst(MI, *BB, *getSubtarget()); - case AMDGPU::SI_KILL: + case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO: + case AMDGPU::SI_KILL_I1_PSEUDO: return splitKillBlock(MI, BB); case AMDGPU::V_CNDMASK_B64_PSEUDO: { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); @@ -3013,15 +3032,18 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned SrcCondCopy = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy) + .addReg(SrcCond); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo) .addReg(Src0, 0, AMDGPU::sub0) .addReg(Src1, 0, AMDGPU::sub0) - .addReg(SrcCond); + .addReg(SrcCondCopy); BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi) .addReg(Src0, 0, AMDGPU::sub1) .addReg(Src1, 0, AMDGPU::sub1) - .addReg(SrcCond); + .addReg(SrcCondCopy); BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst) .addReg(DstLo) @@ -3087,6 +3109,10 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( } } +bool SITargetLowering::hasBitPreservingFPLogic(EVT VT) const { + return isTypeLegal(VT.getScalarType()); +} + bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const { // This currently forces unfolding various combinations of fsub into fma with // free fneg'd operands. As long as we have fast FMA (controlled by @@ -6483,8 +6509,7 @@ SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, Node->getOperand(i)), 0)); } - DAG.UpdateNodeOperands(Node, Ops); - return Node; + return DAG.UpdateNodeOperands(Node, Ops); } /// \brief Fold the instructions after selecting them. diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 91380f8c58855..3e1d0a4a1f36a 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -246,6 +246,8 @@ class SITargetLowering final : public AMDGPUTargetLowering { MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; + + bool hasBitPreservingFPLogic(EVT VT) const override; bool enableAggressiveFMAFusion(EVT VT) const override; EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; diff --git a/lib/Target/AMDGPU/SIInsertSkips.cpp b/lib/Target/AMDGPU/SIInsertSkips.cpp index ba346d2fad02c..1b8c9f2771252 100644 --- a/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -132,6 +132,16 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From, I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ) return true; + // V_READFIRSTLANE/V_READLANE destination register may be used as operand + // by some SALU instruction. If exec mask is zero vector instruction + // defining the register that is used by the scalar one is not executed + // and scalar instruction will operate on undefined data. For + // V_READFIRSTLANE/V_READLANE we should avoid predicated execution. + if ((I->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) || + (I->getOpcode() == AMDGPU::V_READLANE_B32)) { + return true; + } + if (I->isInlineAsm()) { const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); const char *AsmStr = I->getOperand(0).getSymbolName(); @@ -190,25 +200,101 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) { void SIInsertSkips::kill(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); - const MachineOperand &Op = MI.getOperand(0); - -#ifndef NDEBUG - CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv(); - // Kill is only allowed in pixel / geometry shaders. - assert(CallConv == CallingConv::AMDGPU_PS || - CallConv == CallingConv::AMDGPU_GS); -#endif - // Clear this thread from the exec mask if the operand is negative. - if (Op.isImm()) { - // Constant operand: Set exec mask to 0 or do nothing - if (Op.getImm() & 0x80000000) { - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) - .addImm(0); + + switch (MI.getOpcode()) { + case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: { + unsigned Opcode = 0; + + // The opcodes are inverted because the inline immediate has to be + // the first operand, e.g. from "x < imm" to "imm > x" + switch (MI.getOperand(2).getImm()) { + case ISD::SETOEQ: + case ISD::SETEQ: + Opcode = AMDGPU::V_CMPX_EQ_F32_e32; + break; + case ISD::SETOGT: + case ISD::SETGT: + Opcode = AMDGPU::V_CMPX_LT_F32_e32; + break; + case ISD::SETOGE: + case ISD::SETGE: + Opcode = AMDGPU::V_CMPX_LE_F32_e32; + break; + case ISD::SETOLT: + case ISD::SETLT: + Opcode = AMDGPU::V_CMPX_GT_F32_e32; + break; + case ISD::SETOLE: + case ISD::SETLE: + Opcode = AMDGPU::V_CMPX_GE_F32_e32; + break; + case ISD::SETONE: + case ISD::SETNE: + Opcode = AMDGPU::V_CMPX_LG_F32_e32; + break; + case ISD::SETO: + Opcode = AMDGPU::V_CMPX_O_F32_e32; + break; + case ISD::SETUO: + Opcode = AMDGPU::V_CMPX_U_F32_e32; + break; + case ISD::SETUEQ: + Opcode = AMDGPU::V_CMPX_NLG_F32_e32; + break; + case ISD::SETUGT: + Opcode = AMDGPU::V_CMPX_NGE_F32_e32; + break; + case ISD::SETUGE: + Opcode = AMDGPU::V_CMPX_NGT_F32_e32; + break; + case ISD::SETULT: + Opcode = AMDGPU::V_CMPX_NLE_F32_e32; + break; + case ISD::SETULE: + Opcode = AMDGPU::V_CMPX_NLT_F32_e32; + break; + case ISD::SETUNE: + Opcode = AMDGPU::V_CMPX_NEQ_F32_e32; + break; + default: + llvm_unreachable("invalid ISD:SET cond code"); } - } else { - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32)) - .addImm(0) + + // TODO: Allow this: + if (!MI.getOperand(0).isReg() || + !TRI->isVGPR(MBB.getParent()->getRegInfo(), + MI.getOperand(0).getReg())) + llvm_unreachable("SI_KILL operand should be a VGPR"); + + BuildMI(MBB, &MI, DL, TII->get(Opcode)) + .add(MI.getOperand(1)) + .add(MI.getOperand(0)); + break; + } + case AMDGPU::SI_KILL_I1_TERMINATOR: { + const MachineOperand &Op = MI.getOperand(0); + int64_t KillVal = MI.getOperand(1).getImm(); + assert(KillVal == 0 || KillVal == -1); + + // Kill all threads if Op0 is an immediate and equal to the Kill value. + if (Op.isImm()) { + int64_t Imm = Op.getImm(); + assert(Imm == 0 || Imm == -1); + + if (Imm == KillVal) + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + .addImm(0); + break; + } + + unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64; + BuildMI(MBB, &MI, DL, TII->get(Opcode), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) .add(Op); + break; + } + default: + llvm_unreachable("invalid opcode, expected SI_KILL_*_TERMINATOR"); } } @@ -301,7 +387,8 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) { } break; - case AMDGPU::SI_KILL_TERMINATOR: + case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: + case AMDGPU::SI_KILL_I1_TERMINATOR: MadeChange = true; kill(MI); diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index faf14fff5b2ec..250fb9eda2a4c 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -11,9 +11,18 @@ // //===----------------------------------------------------------------------===// +def isGCN : Predicate<"Subtarget->getGeneration() " + ">= SISubtarget::SOUTHERN_ISLANDS">, + AssemblerPredicate<"FeatureGCN">; +def isSI : Predicate<"Subtarget->getGeneration() " + "== SISubtarget::SOUTHERN_ISLANDS">, + AssemblerPredicate<"FeatureSouthernIslands">; + + class InstSI pattern = []> : AMDGPUInst, PredicateControl { + let SubtargetPredicate = isGCN; // Low bits - basic encoding information. field bit SALU = 0; diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index b2fbcce66d59d..06de0658a7d43 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -649,15 +649,18 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, "Not a VGPR32 reg"); if (Cond.size() == 1) { + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) + .add(Cond[0]); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) - .add(Cond[0]); + .addReg(SReg); } else if (Cond.size() == 2) { assert(Cond[0].isImm() && "Cond[0] is not an immediate"); switch (Cond[0].getImm()) { case SIInstrInfo::SCC_TRUE: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) .addImm(-1) .addImm(0); @@ -668,7 +671,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::SCC_FALSE: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_CSELECT_B64), SReg) .addImm(0) .addImm(-1); @@ -681,23 +684,29 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, case SIInstrInfo::VCCNZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) + .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(FalseReg) .addReg(TrueReg) - .add(RegOp); + .addReg(SReg); break; } case SIInstrInfo::VCCZ: { MachineOperand RegOp = Cond[1]; RegOp.setImplicit(false); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg) + .add(RegOp); BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg) .addReg(TrueReg) .addReg(FalseReg) - .add(RegOp); + .addReg(SReg); break; } case SIInstrInfo::EXECNZ: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); @@ -711,7 +720,7 @@ void SIInstrInfo::insertVectorSelect(MachineBasicBlock &MBB, break; } case SIInstrInfo::EXECZ: { - unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); + unsigned SReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); unsigned SReg2 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); BuildMI(MBB, I, DL, get(AMDGPU::S_OR_SAVEEXEC_B64), SReg2) .addImm(0); @@ -4362,6 +4371,18 @@ unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI, return AMDGPU::NoRegister; } +unsigned SIInstrInfo::getInstBundleSize(const MachineInstr &MI) const { + unsigned Size = 0; + MachineBasicBlock::const_instr_iterator I = MI.getIterator(); + MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); + while (++I != E && I->isInsideBundle()) { + assert(!I->isBundle() && "No nested bundle!"); + Size += getInstSizeInBytes(*I); + } + + return Size; +} + unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc); @@ -4405,9 +4426,10 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: case TargetOpcode::DBG_VALUE: - case TargetOpcode::BUNDLE: case TargetOpcode::EH_LABEL: return 0; + case TargetOpcode::BUNDLE: + return getInstBundleSize(MI); case TargetOpcode::INLINEASM: { const MachineFunction *MF = MI.getParent()->getParent(); const char *AsmStr = MI.getOperand(0).getSymbolName(); @@ -4569,3 +4591,24 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB, return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg) .addReg(UnusedCarry, RegState::Define | RegState::Dead); } + +bool SIInstrInfo::isKillTerminator(unsigned Opcode) { + switch (Opcode) { + case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR: + case AMDGPU::SI_KILL_I1_TERMINATOR: + return true; + default: + return false; + } +} + +const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) const { + switch (Opcode) { + case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO: + return get(AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR); + case AMDGPU::SI_KILL_I1_PSEUDO: + return get(AMDGPU::SI_KILL_I1_TERMINATOR); + default: + llvm_unreachable("invalid opcode, expected SI_KILL_*_PSEUDO"); + } +} diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 93513e2de159c..f8de0efc5dd7b 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -818,6 +818,7 @@ class SIInstrInfo final : public AMDGPUInstrInfo { unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; + unsigned getInstBundleSize(const MachineInstr &MI) const; unsigned getInstSizeInBytes(const MachineInstr &MI) const override; bool mayAccessFlatAddressSpace(const MachineInstr &MI) const; @@ -856,6 +857,9 @@ class SIInstrInfo final : public AMDGPUInstrInfo { MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg) const; + + static bool isKillTerminator(unsigned Opcode); + const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; }; namespace AMDGPU { diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index e3bed5eb3db5a..1273f451e18d5 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -232,16 +232,6 @@ def si_setcc_uniform : PatFrag < return true; }]>; -def si_uniform_br : PatFrag < - (ops node:$cond, node:$bb), (brcond node:$cond, node:$bb), [{ - return isUniformBr(N); -}]>; - -def si_uniform_br_scc : PatFrag < - (ops node:$cond, node:$bb), (si_uniform_br node:$cond, node:$bb), [{ - return isCBranchSCC(N); -}]>; - def lshr_rev : PatFrag < (ops node:$src1, node:$src0), (srl $src0, $src1) @@ -264,27 +254,28 @@ multiclass SIAtomicM0Glue2 { [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] >; - def _local : local_binary_atomic_op (NAME#"_glue")>; + def _local_m0 : local_binary_atomic_op (NAME#"_glue")>; } -defm si_atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; -defm si_atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; -defm si_atomic_inc : SIAtomicM0Glue2 <"INC", 1>; -defm si_atomic_dec : SIAtomicM0Glue2 <"DEC", 1>; -defm si_atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; -defm si_atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; -defm si_atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; -defm si_atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; -defm si_atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; -defm si_atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; -defm si_atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; -defm si_atomic_swap : SIAtomicM0Glue2 <"SWAP">; +defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">; +defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">; +defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>; +defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>; +defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">; +defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">; +defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">; +defm atomic_load_or : SIAtomicM0Glue2 <"LOAD_OR">; +defm atomic_load_xor : SIAtomicM0Glue2 <"LOAD_XOR">; +defm atomic_load_umin : SIAtomicM0Glue2 <"LOAD_UMIN">; +defm atomic_load_umax : SIAtomicM0Glue2 <"LOAD_UMAX">; +defm atomic_swap : SIAtomicM0Glue2 <"SWAP">; -def si_atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, +def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] >; -defm si_atomic_cmp_swap : AtomicCmpSwapLocal ; +def atomic_cmp_swap_local_m0 : AtomicCmpSwapLocal; + def as_i1imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); @@ -306,6 +297,10 @@ def as_i64imm: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64); }]>; +def cond_as_i32imm: SDNodeXFormgetTargetConstant(N->get(), SDLoc(N), MVT::i32); +}]>; + // Copied from the AArch64 backend: def bitcast_fpimm_to_i32 : SDNodeXFormgetTargetConstant( @@ -994,7 +989,7 @@ class getVOP3SrcForVT { VCSrc_f64, VCSrc_b64), !if(!eq(VT.Value, i1.Value), - SCSrc_b64, + SCSrc_i1, !if(isFP, !if(!eq(VT.Value, f16.Value), VCSrc_f16, diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 1ed5e8e09378f..6cee5be9da9bb 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -11,13 +11,6 @@ // that are not yet supported remain commented out. //===----------------------------------------------------------------------===// -def isGCN : Predicate<"Subtarget->getGeneration() " - ">= SISubtarget::SOUTHERN_ISLANDS">, - AssemblerPredicate<"FeatureGCN">; -def isSI : Predicate<"Subtarget->getGeneration() " - "== SISubtarget::SOUTHERN_ISLANDS">, - AssemblerPredicate<"FeatureSouthernIslands">; - def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">; def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">; def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, @@ -25,14 +18,17 @@ def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">, def HasMovrel : Predicate<"Subtarget->hasMovrel()">, AssemblerPredicate<"FeatureMovrel">; +class GCNPat : AMDGPUPat { + let SubtargetPredicate = isGCN; +} + + include "VOPInstructions.td" include "SOPInstructions.td" include "SMInstructions.td" include "FLATInstructions.td" include "BUFInstructions.td" -let SubtargetPredicate = isGCN in { - //===----------------------------------------------------------------------===// // EXP Instructions //===----------------------------------------------------------------------===// @@ -208,12 +204,14 @@ def SI_MASK_BRANCH : VPseudoInstSI < let isTerminator = 1 in { +let OtherPredicates = [EnableLateCFGStructurize] in { def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI < (outs), (ins SReg_64:$vcc, brtarget:$target), [(brcond i1:$vcc, bb:$target)]> { let Size = 12; } +} def SI_IF: CFPseudoInstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), @@ -277,18 +275,21 @@ def SI_ELSE_BREAK : CFPseudoInstSI < } let Uses = [EXEC], Defs = [EXEC,VCC] in { -def SI_KILL : PseudoInstSI < - (outs), (ins VSrc_b32:$src), - [(AMDGPUkill i32:$src)]> { - let isConvergent = 1; - let usesCustomInserter = 1; -} -def SI_KILL_TERMINATOR : SPseudoInstSI < - (outs), (ins VSrc_b32:$src)> { - let isTerminator = 1; +multiclass PseudoInstKill { + def _PSEUDO : PseudoInstSI <(outs), ins> { + let isConvergent = 1; + let usesCustomInserter = 1; + } + + def _TERMINATOR : SPseudoInstSI <(outs), ins> { + let isTerminator = 1; + } } +defm SI_KILL_I1 : PseudoInstKill <(ins SSrc_b64:$src, i1imm:$killvalue)>; +defm SI_KILL_F32_COND_IMM : PseudoInstKill <(ins VSrc_b32:$src0, i32imm:$src1, i32imm:$cond)>; + def SI_ILLEGAL_COPY : SPseudoInstSI < (outs unknown:$dst), (ins unknown:$src), [], " ; illegal copy $src to $dst">; @@ -526,39 +527,63 @@ def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < let Defs = [SCC]; } -} // End SubtargetPredicate = isGCN - -let Predicates = [isGCN] in { -def : Pat < +def : GCNPat < (AMDGPUinit_exec i64:$src), (SI_INIT_EXEC (as_i64imm $src)) >; -def : Pat < +def : GCNPat < (AMDGPUinit_exec_from_input i32:$input, i32:$shift), (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift)) >; -def : Pat< +def : GCNPat< (AMDGPUtrap timm:$trapid), (S_TRAP $trapid) >; -def : Pat< +def : GCNPat< (AMDGPUelse i64:$src, bb:$target), (SI_ELSE $src, $target, 0) >; -def : Pat < +def : GCNPat < (int_AMDGPU_kilp), - (SI_KILL (i32 0xbf800000)) + (SI_KILL_I1_PSEUDO (i1 0), 0) +>; + +def : Pat < + // -1.0 as i32 (LowerINTRINSIC_VOID converts all other constants to -1.0) + (AMDGPUkill (i32 -1082130432)), + (SI_KILL_I1_PSEUDO (i1 0), 0) +>; + +def : Pat < + (int_amdgcn_kill i1:$src), + (SI_KILL_I1_PSEUDO $src, 0) >; +def : Pat < + (int_amdgcn_kill (i1 (not i1:$src))), + (SI_KILL_I1_PSEUDO $src, -1) +>; + +def : Pat < + (AMDGPUkill i32:$src), + (SI_KILL_F32_COND_IMM_PSEUDO $src, 0, 3) // 3 means SETOGE +>; + +def : Pat < + (int_amdgcn_kill (i1 (setcc f32:$src, InlineFPImm:$imm, cond:$cond))), + (SI_KILL_F32_COND_IMM_PSEUDO $src, (bitcast_fpimm_to_i32 $imm), (cond_as_i32imm $cond)) +>; +// TODO: we could add more variants for other types of conditionals + //===----------------------------------------------------------------------===// // VOP1 Patterns //===----------------------------------------------------------------------===// -let Predicates = [UnsafeFPMath] in { +let SubtargetPredicate = isGCN, OtherPredicates = [UnsafeFPMath] in { //def : RcpPat; //defm : RsqPat; @@ -568,70 +593,70 @@ def : RsqPat; def : RsqPat; // Convert (x - floor(x)) to fract(x) -def : Pat < +def : GCNPat < (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)), (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))), (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) >; // Convert (x + (-floor(x))) to fract(x) -def : Pat < +def : GCNPat < (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)), (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))), (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE) >; -} // End Predicates = [UnsafeFPMath] +} // End SubtargetPredicate = isGCN, OtherPredicates = [UnsafeFPMath] // f16_to_fp patterns -def : Pat < +def : GCNPat < (f32 (f16_to_fp i32:$src0)), (V_CVT_F32_F16_e64 SRCMODS.NONE, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; -def : Pat < +def : GCNPat < (f32 (f16_to_fp (and_oneuse i32:$src0, 0x7fff))), (V_CVT_F32_F16_e64 SRCMODS.ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; -def : Pat < +def : GCNPat < (f32 (f16_to_fp (or_oneuse i32:$src0, 0x8000))), (V_CVT_F32_F16_e64 SRCMODS.NEG_ABS, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; -def : Pat < +def : GCNPat < (f32 (f16_to_fp (xor_oneuse i32:$src0, 0x8000))), (V_CVT_F32_F16_e64 SRCMODS.NEG, $src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; -def : Pat < +def : GCNPat < (f64 (fpextend f16:$src)), (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src)) >; // fp_to_fp16 patterns -def : Pat < +def : GCNPat < (i32 (AMDGPUfp_to_f16 (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)))), (V_CVT_F16_F32_e64 $src0_modifiers, f32:$src0, DSTCLAMP.NONE, DSTOMOD.NONE) >; -def : Pat < +def : GCNPat < (i32 (fp_to_sint f16:$src)), (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 $src)) >; -def : Pat < +def : GCNPat < (i32 (fp_to_uint f16:$src)), (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 $src)) >; -def : Pat < +def : GCNPat < (f16 (sint_to_fp i32:$src)), (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 $src)) >; -def : Pat < +def : GCNPat < (f16 (uint_to_fp i32:$src)), (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 $src)) >; @@ -641,7 +666,7 @@ def : Pat < //===----------------------------------------------------------------------===// multiclass FMADPat { - def : Pat < + def : GCNPat < (vt (fmad (VOP3NoMods vt:$src0), (VOP3NoMods vt:$src1), (VOP3NoMods vt:$src2))), @@ -653,7 +678,7 @@ multiclass FMADPat { defm : FMADPat ; defm : FMADPat ; -class FMADModsPat : Pat< +class FMADModsPat : GCNPat< (f32 (mad_opr (VOP3Mods f32:$src0, i32:$src0_mod), (VOP3Mods f32:$src1, i32:$src1_mod), (VOP3Mods f32:$src2, i32:$src2_mod))), @@ -664,7 +689,7 @@ class FMADModsPat : Pat< def : FMADModsPat; multiclass SelectPat { - def : Pat < + def : GCNPat < (vt (select i1:$src0, vt:$src1, vt:$src2)), (inst $src2, $src1, $src0) >; @@ -675,7 +700,7 @@ defm : SelectPat ; defm : SelectPat ; defm : SelectPat ; -def : Pat < +def : GCNPat < (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), (V_BCNT_U32_B32_e64 $popcnt, $val) >; @@ -748,6 +773,8 @@ foreach Index = 0-15 in { >; } +let SubtargetPredicate = isGCN in { + // FIXME: Why do only some of these type combinations for SReg and // VReg? // 16-bit bitcast @@ -808,6 +835,8 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; +} // End SubtargetPredicate = isGCN + /********** =================== **********/ /********** Src & Dst modifiers **********/ /********** =================== **********/ @@ -815,7 +844,7 @@ def : BitConvert ; // If denormals are not enabled, it only impacts the compare of the // inputs. The output result is not flushed. -class ClampPat : Pat < +class ClampPat : GCNPat < (vt (AMDGPUclamp (VOP3Mods vt:$src0, i32:$src0_modifiers))), (inst i32:$src0_modifiers, vt:$src0, i32:$src0_modifiers, vt:$src0, DSTCLAMP.ENABLE, DSTOMOD.NONE) @@ -825,7 +854,7 @@ def : ClampPat; def : ClampPat; def : ClampPat; -def : Pat < +def : GCNPat < (v2f16 (AMDGPUclamp (VOP3PMods v2f16:$src0, i32:$src0_modifiers))), (V_PK_MAX_F16 $src0_modifiers, $src0, $src0_modifiers, $src0, DSTCLAMP.ENABLE) @@ -837,13 +866,13 @@ def : Pat < // Prevent expanding both fneg and fabs. -def : Pat < +def : GCNPat < (fneg (fabs f32:$src)), (S_OR_B32 $src, (S_MOV_B32(i32 0x80000000))) // Set sign bit >; // FIXME: Should use S_OR_B32 -def : Pat < +def : GCNPat < (fneg (fabs f64:$src)), (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG f64:$src, sub0)), @@ -853,17 +882,17 @@ def : Pat < sub1) >; -def : Pat < +def : GCNPat < (fabs f32:$src), (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x7fffffff))) >; -def : Pat < +def : GCNPat < (fneg f32:$src), (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000))) >; -def : Pat < +def : GCNPat < (fabs f64:$src), (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG f64:$src, sub0)), @@ -873,7 +902,7 @@ def : Pat < sub1) >; -def : Pat < +def : GCNPat < (fneg f64:$src), (REG_SEQUENCE VReg_64, (i32 (EXTRACT_SUBREG f64:$src, sub0)), @@ -883,18 +912,18 @@ def : Pat < sub1) >; -def : Pat < +def : GCNPat < (fcopysign f16:$src0, f16:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) >; -def : Pat < +def : GCNPat < (fcopysign f32:$src0, f16:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), $src0, (V_LSHLREV_B32_e64 (i32 16), $src1)) >; -def : Pat < +def : GCNPat < (fcopysign f64:$src0, f16:$src1), (REG_SEQUENCE SReg_64, (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, @@ -902,39 +931,39 @@ def : Pat < (V_LSHLREV_B32_e64 (i32 16), $src1)), sub1) >; -def : Pat < +def : GCNPat < (fcopysign f16:$src0, f32:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, (V_LSHRREV_B32_e64 (i32 16), $src1)) >; -def : Pat < +def : GCNPat < (fcopysign f16:$src0, f64:$src1), (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1))) >; -def : Pat < +def : GCNPat < (fneg f16:$src), (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000))) >; -def : Pat < +def : GCNPat < (fabs f16:$src), (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x00007fff))) >; -def : Pat < +def : GCNPat < (fneg (fabs f16:$src)), (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit >; -def : Pat < +def : GCNPat < (fneg v2f16:$src), (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), $src) >; -def : Pat < +def : GCNPat < (fabs v2f16:$src), (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), $src) >; @@ -943,7 +972,7 @@ def : Pat < // // fabs is not reported as free because there is modifier for it in // VOP3P instructions, so it is turned into the bit op. -def : Pat < +def : GCNPat < (fneg (v2f16 (bitconvert (and_oneuse i32:$src, 0x7fff7fff)))), (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit >; @@ -952,17 +981,17 @@ def : Pat < /********** Immediate Patterns **********/ /********** ================== **********/ -def : Pat < +def : GCNPat < (VGPRImm<(i32 imm)>:$imm), (V_MOV_B32_e32 imm:$imm) >; -def : Pat < +def : GCNPat < (VGPRImm<(f32 fpimm)>:$imm), (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) >; -def : Pat < +def : GCNPat < (i32 imm:$imm), (S_MOV_B32 imm:$imm) >; @@ -970,27 +999,27 @@ def : Pat < // FIXME: Workaround for ordering issue with peephole optimizer where // a register class copy interferes with immediate folding. Should // use s_mov_b32, which can be shrunk to s_movk_i32 -def : Pat < +def : GCNPat < (VGPRImm<(f16 fpimm)>:$imm), (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm))) >; -def : Pat < +def : GCNPat < (f32 fpimm:$imm), (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) >; -def : Pat < +def : GCNPat < (f16 fpimm:$imm), (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm))) >; -def : Pat < +def : GCNPat < (i32 frameindex:$fi), (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi))) >; -def : Pat < +def : GCNPat < (i64 InlineImm:$imm), (S_MOV_B64 InlineImm:$imm) >; @@ -998,12 +1027,12 @@ def : Pat < // XXX - Should this use a s_cmp to set SCC? // Set to sign-extended 64-bit value (true = -1, false = 0) -def : Pat < +def : GCNPat < (i1 imm:$imm), (S_MOV_B64 (i64 (as_i64imm $imm))) >; -def : Pat < +def : GCNPat < (f64 InlineFPImm:$imm), (S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm:$imm))) >; @@ -1012,14 +1041,16 @@ def : Pat < /********** Intrinsic Patterns **********/ /********** ================== **********/ +let SubtargetPredicate = isGCN in { def : POW_Common ; +} -def : Pat < +def : GCNPat < (i32 (sext i1:$src0)), (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0) >; -class Ext32Pat : Pat < +class Ext32Pat : GCNPat < (i32 (ext i1:$src0)), (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0) >; @@ -1028,7 +1059,7 @@ def : Ext32Pat ; def : Ext32Pat ; // The multiplication scales from [0,1] to the unsigned integer range -def : Pat < +def : GCNPat < (AMDGPUurecip i32:$src0), (V_CVT_U32_F32_e32 (V_MUL_F32_e32 (i32 CONST.FP_UINT_MAX_PLUS_1), @@ -1039,17 +1070,21 @@ def : Pat < // VOP3 Patterns //===----------------------------------------------------------------------===// +let SubtargetPredicate = isGCN in { + def : IMad24Pat; def : UMad24Pat; defm : BFIPatterns ; def : ROTRPattern ; -def : Pat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))), +} + +def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))), (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; -def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))), +def : GCNPat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))), (V_ALIGNBIT_B32 (i32 (EXTRACT_SUBREG (i64 $src0), sub1)), (i32 (EXTRACT_SUBREG (i64 $src0), sub0)), $src1)>; @@ -1059,13 +1094,13 @@ def : Pat<(i32 (trunc (srl i64:$src0, (i32 ShiftAmt32Imm:$src1)))), multiclass SI_INDIRECT_Pattern { // Extract with offset - def : Pat< + def : GCNPat< (eltvt (extractelt vt:$src, (MOVRELOffset i32:$idx, (i32 imm:$offset)))), (!cast("SI_INDIRECT_SRC_"#VecSize) $src, $idx, imm:$offset) >; // Insert with offset - def : Pat< + def : GCNPat< (insertelt vt:$src, eltvt:$val, (MOVRELOffset i32:$idx, (i32 imm:$offset))), (!cast("SI_INDIRECT_DST_"#VecSize) $src, $idx, imm:$offset, $val) >; @@ -1085,14 +1120,14 @@ defm : SI_INDIRECT_Pattern ; // SAD Patterns //===----------------------------------------------------------------------===// -def : Pat < +def : GCNPat < (add (sub_oneuse (umax i32:$src0, i32:$src1), (umin i32:$src0, i32:$src1)), i32:$src2), (V_SAD_U32 $src0, $src1, $src2, (i1 0)) >; -def : Pat < +def : GCNPat < (add (select_oneuse (i1 (setugt i32:$src0, i32:$src1)), (sub i32:$src0, i32:$src1), (sub i32:$src1, i32:$src0)), @@ -1104,51 +1139,51 @@ def : Pat < // Conversion Patterns //===----------------------------------------------------------------------===// -def : Pat<(i32 (sext_inreg i32:$src, i1)), +def : GCNPat<(i32 (sext_inreg i32:$src, i1)), (S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16 // Handle sext_inreg in i64 -def : Pat < +def : GCNPat < (i64 (sext_inreg i64:$src, i1)), (S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16 >; -def : Pat < +def : GCNPat < (i16 (sext_inreg i16:$src, i1)), (S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16 >; -def : Pat < +def : GCNPat < (i16 (sext_inreg i16:$src, i8)), (S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16 >; -def : Pat < +def : GCNPat < (i64 (sext_inreg i64:$src, i8)), (S_BFE_I64 i64:$src, (i32 0x80000)) // 0 | 8 << 16 >; -def : Pat < +def : GCNPat < (i64 (sext_inreg i64:$src, i16)), (S_BFE_I64 i64:$src, (i32 0x100000)) // 0 | 16 << 16 >; -def : Pat < +def : GCNPat < (i64 (sext_inreg i64:$src, i32)), (S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16 >; -def : Pat < +def : GCNPat < (i64 (zext i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 (i32 0)), sub1) >; -def : Pat < +def : GCNPat < (i64 (anyext i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1) >; -class ZExt_i64_i1_Pat : Pat < +class ZExt_i64_i1_Pat : GCNPat < (i64 (ext i1:$src)), (REG_SEQUENCE VReg_64, (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0, @@ -1161,20 +1196,20 @@ def : ZExt_i64_i1_Pat; // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that // REG_SEQUENCE patterns don't support instructions with multiple outputs. -def : Pat < +def : GCNPat < (i64 (sext i32:$src)), (REG_SEQUENCE SReg_64, $src, sub0, (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, (i32 31)), SReg_32_XM0)), sub1) >; -def : Pat < +def : GCNPat < (i64 (sext i1:$src)), (REG_SEQUENCE VReg_64, (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0, (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1) >; -class FPToI1Pat : Pat < +class FPToI1Pat : GCNPat < (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))), (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE)) >; @@ -1190,37 +1225,37 @@ def : FPToI1Pat; // 64-bit comparisons. When legalizing SGPR copies, instructions // resulting in the copies from SCC to these instructions will be // moved to the VALU. -def : Pat < +def : GCNPat < (i1 (and i1:$src0, i1:$src1)), (S_AND_B64 $src0, $src1) >; -def : Pat < +def : GCNPat < (i1 (or i1:$src0, i1:$src1)), (S_OR_B64 $src0, $src1) >; -def : Pat < +def : GCNPat < (i1 (xor i1:$src0, i1:$src1)), (S_XOR_B64 $src0, $src1) >; -def : Pat < +def : GCNPat < (f32 (sint_to_fp i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src) >; -def : Pat < +def : GCNPat < (f32 (uint_to_fp i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src) >; -def : Pat < +def : GCNPat < (f64 (sint_to_fp i1:$src)), (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) >; -def : Pat < +def : GCNPat < (f64 (uint_to_fp i1:$src)), (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)) >; @@ -1228,103 +1263,87 @@ def : Pat < //===----------------------------------------------------------------------===// // Miscellaneous Patterns //===----------------------------------------------------------------------===// -def : Pat < +def : GCNPat < (i32 (AMDGPUfp16_zext f16:$src)), (COPY $src) >; -def : Pat < +def : GCNPat < (i32 (trunc i64:$a)), (EXTRACT_SUBREG $a, sub0) >; -def : Pat < +def : GCNPat < (i1 (trunc i32:$a)), (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) >; -def : Pat < +def : GCNPat < (i1 (trunc i16:$a)), (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) >; -def : Pat < +def : GCNPat < (i1 (trunc i64:$a)), (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1)) >; -def : Pat < +def : GCNPat < (i32 (bswap i32:$a)), (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)), (V_ALIGNBIT_B32 $a, $a, (i32 24)), (V_ALIGNBIT_B32 $a, $a, (i32 8))) >; -multiclass BFMPatterns { - def : Pat < - (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)), - (BFM $a, $b) - >; - - def : Pat < - (vt (add (vt (shl 1, vt:$a)), -1)), - (BFM $a, (MOV (i32 0))) - >; -} - -defm : BFMPatterns ; -// FIXME: defm : BFMPatterns ; -defm : BFEPattern ; - -let Predicates = [NoFP16Denormals] in { -def : Pat< +let OtherPredicates = [NoFP16Denormals] in { +def : GCNPat< (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), $src_mods, $src, 0, 0) >; -def : Pat< +def : GCNPat< (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), (V_PK_MUL_F16 0, (i32 CONST.V2FP16_ONE), $src_mods, $src, DSTCLAMP.NONE) >; } -let Predicates = [FP16Denormals] in { -def : Pat< +let OtherPredicates = [FP16Denormals] in { +def : GCNPat< (fcanonicalize (f16 (VOP3Mods f16:$src, i32:$src_mods))), (V_MAX_F16_e64 $src_mods, $src, $src_mods, $src, 0, 0) >; -def : Pat< +def : GCNPat< (fcanonicalize (v2f16 (VOP3PMods v2f16:$src, i32:$src_mods))), (V_PK_MAX_F16 $src_mods, $src, $src_mods, $src, DSTCLAMP.NONE) >; } -let Predicates = [NoFP32Denormals] in { -def : Pat< +let OtherPredicates = [NoFP32Denormals] in { +def : GCNPat< (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))), (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), $src_mods, $src, 0, 0) >; } -let Predicates = [FP32Denormals] in { -def : Pat< +let OtherPredicates = [FP32Denormals] in { +def : GCNPat< (fcanonicalize (f32 (VOP3Mods f32:$src, i32:$src_mods))), (V_MAX_F32_e64 $src_mods, $src, $src_mods, $src, 0, 0) >; } -let Predicates = [NoFP64Denormals] in { -def : Pat< +let OtherPredicates = [NoFP64Denormals] in { +def : GCNPat< (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), (V_MUL_F64 0, CONST.FP64_ONE, $src_mods, $src, 0, 0) >; } -let Predicates = [FP64Denormals] in { -def : Pat< +let OtherPredicates = [FP64Denormals] in { +def : GCNPat< (fcanonicalize (f64 (VOP3Mods f64:$src, i32:$src_mods))), (V_MAX_F64 $src_mods, $src, $src_mods, $src, 0, 0) >; @@ -1332,7 +1351,7 @@ def : Pat< // Allow integer inputs -class ExpPattern : Pat< +class ExpPattern : GCNPat< (node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)), (Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en) >; @@ -1340,43 +1359,43 @@ class ExpPattern : Pat< def : ExpPattern; def : ExpPattern; -def : Pat < +def : GCNPat < (v2i16 (build_vector i16:$src0, i16:$src1)), (v2i16 (S_PACK_LL_B32_B16 $src0, $src1)) >; // COPY_TO_REGCLASS is workaround tablegen bug from multiple outputs // from S_LSHL_B32's multiple outputs from implicit scc def. -def : Pat < +def : GCNPat < (v2i16 (build_vector (i16 0), i16:$src1)), (v2i16 (COPY_TO_REGCLASS (S_LSHL_B32 i16:$src1, (i16 16)), SReg_32_XM0)) >; // With multiple uses of the shift, this will duplicate the shift and // increase register pressure. -def : Pat < +def : GCNPat < (v2i16 (build_vector i16:$src0, (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))), (v2i16 (S_PACK_LH_B32_B16 i16:$src0, i32:$src1)) >; -def : Pat < +def : GCNPat < (v2i16 (build_vector (i16 (trunc (srl_oneuse i32:$src0, (i32 16)))), (i16 (trunc (srl_oneuse i32:$src1, (i32 16)))))), (v2i16 (S_PACK_HH_B32_B16 $src0, $src1)) >; // TODO: Should source modifiers be matched to v_pack_b32_f16? -def : Pat < +def : GCNPat < (v2f16 (build_vector f16:$src0, f16:$src1)), (v2f16 (S_PACK_LL_B32_B16 $src0, $src1)) >; -// def : Pat < +// def : GCNPat < // (v2f16 (scalar_to_vector f16:$src0)), // (COPY $src0) // >; -// def : Pat < +// def : GCNPat < // (v2i16 (scalar_to_vector i16:$src0)), // (COPY $src0) // >; @@ -1385,7 +1404,7 @@ def : Pat < // Fract Patterns //===----------------------------------------------------------------------===// -let Predicates = [isSI] in { +let SubtargetPredicate = isSI in { // V_FRACT is buggy on SI, so the F32 version is never used and (x-floor(x)) is // used instead. However, SI doesn't have V_FLOOR_F64, so the most efficient @@ -1394,7 +1413,7 @@ let Predicates = [isSI] in { // fract(x) = isnan(x) ? x : min(V_FRACT(x), 0.99999999999999999) // Convert floor(x) to (x - fract(x)) -def : Pat < +def : GCNPat < (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))), (V_ADD_F64 $mods, @@ -1412,7 +1431,7 @@ def : Pat < DSTCLAMP.NONE, DSTOMOD.NONE) >; -} // End Predicates = [isSI] +} // End SubtargetPredicates = isSI //============================================================================// // Miscellaneous Optimization Patterns @@ -1421,20 +1440,41 @@ def : Pat < // Undo sub x, c -> add x, -c canonicalization since c is more likely // an inline immediate than -c. // TODO: Also do for 64-bit. -def : Pat< +def : GCNPat< (add i32:$src0, (i32 NegSubInlineConst32:$src1)), (S_SUB_I32 $src0, NegSubInlineConst32:$src1) >; + +multiclass BFMPatterns { + def : GCNPat < + (vt (shl (vt (add (vt (shl 1, vt:$a)), -1)), vt:$b)), + (BFM $a, $b) + >; + + def : GCNPat < + (vt (add (vt (shl 1, vt:$a)), -1)), + (BFM $a, (MOV (i32 0))) + >; +} + +let SubtargetPredicate = isGCN in { + +defm : BFMPatterns ; +// FIXME: defm : BFMPatterns ; + +defm : BFEPattern ; def : SHA256MaPattern ; def : IntMed3Pat; def : IntMed3Pat; +} + // This matches 16 permutations of // max(min(x, y), min(max(x, y), z)) class FPMed3Pat : Pat< + Instruction med3Inst> : GCNPat< (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), @@ -1444,7 +1484,7 @@ class FPMed3Pat; class FP16Med3Pat : Pat< + Instruction med3Inst> : GCNPat< (fmaxnum (fminnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), (VOP3Mods_nnan vt:$src1, i32:$src1_mods)), (fminnum_oneuse (fmaxnum_oneuse (VOP3Mods_nnan vt:$src0, i32:$src0_mods), @@ -1457,7 +1497,7 @@ class Int16Med3Pat : Pat< + ValueType vt = i32> : GCNPat< (max (min_oneuse vt:$src0, vt:$src1), (min_oneuse (max_oneuse vt:$src0, vt:$src1), vt:$src2)), (med3Inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE) @@ -1465,7 +1505,7 @@ class Int16Med3Pat; -let Predicates = [isGFX9] in { +let OtherPredicates = [isGFX9] in { def : FP16Med3Pat; def : Int16Med3Pat; def : Int16Med3Pat; @@ -1498,6 +1538,7 @@ multiclass NoCarryAlias; @@ -1513,5 +1554,3 @@ def : MnemonicAlias<"v_add_u32", "v_add_i32">; def : MnemonicAlias<"v_sub_u32", "v_sub_i32">; def : MnemonicAlias<"v_subrev_u32", "v_subrev_i32">; } - -} // End isGCN predicate diff --git a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 8b3fdd874385b..026fd9743242c 100644 --- a/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -69,7 +69,7 @@ using namespace llvm; namespace { class SILoadStoreOptimizer : public MachineFunctionPass { - using CombineInfo = struct { + struct CombineInfo { MachineBasicBlock::iterator I; MachineBasicBlock::iterator Paired; unsigned EltSize; diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp index 8e19e15997126..29fc5ef50dbc7 100644 --- a/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -134,7 +134,8 @@ static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) { char &llvm::SILowerControlFlowID = SILowerControlFlow::ID; -static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) { +static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI, + const SIInstrInfo *TII) { unsigned SaveExecReg = MI.getOperand(0).getReg(); auto U = MRI->use_instr_nodbg_begin(SaveExecReg); @@ -143,7 +144,7 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) { U->getOpcode() != AMDGPU::SI_END_CF) return false; - // Check for SI_KILL_TERMINATOR on path from if to endif. + // Check for SI_KILL_*_TERMINATOR on path from if to endif. // if there is any such terminator simplififcations are not safe. auto SMBB = MI.getParent(); auto EMBB = U->getParent(); @@ -157,7 +158,7 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) { if (MBB == EMBB || !Visited.insert(MBB).second) continue; for(auto &Term : MBB->terminators()) - if (Term.getOpcode() == AMDGPU::SI_KILL_TERMINATOR) + if (TII->isKillTerminator(Term.getOpcode())) return false; Worklist.append(MBB->succ_begin(), MBB->succ_end()); @@ -184,7 +185,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { // If there is only one use of save exec register and that use is SI_END_CF, // we can optimize SI_IF by returning the full saved exec mask instead of // just cleared bits. - bool SimpleIf = isSimpleIf(MI, MRI); + bool SimpleIf = isSimpleIf(MI, MRI, TII); // Add an implicit def of exec to discourage scheduling VALU after this which // will interfere with trying to form s_and_saveexec_b64 later. diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp index ba616ada0c9ce..3880d052bf895 100644 --- a/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -121,11 +121,14 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { } } + unsigned int TmpSrc = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass); + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::COPY), TmpSrc) + .add(Src); BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64)) .add(Dst) .addImm(0) .addImm(-1) - .add(Src); + .addReg(TmpSrc); MI.eraseFromParent(); } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) && SrcRC == &AMDGPU::VReg_1RegClass) { diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index ebb83fea1fde9..0a92cd176541d 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -48,7 +48,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDY(false), WorkItemIDZ(false), ImplicitBufferPtr(false), - ImplicitArgPtr(false) { + ImplicitArgPtr(false), + GITPtrHigh(0xffffffff) { const SISubtarget &ST = MF.getSubtarget(); const Function *F = MF.getFunction(); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); @@ -160,6 +161,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch")) FlatScratchInit = true; } + + Attribute A = F->getFnAttribute("amdgpu-git-ptr-high"); + StringRef S = A.getValueAsString(); + if (!S.empty()) + S.consumeInteger(0, GITPtrHigh); } unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 242b41a590852..ade909cc84e3a 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -185,6 +185,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { // user arguments. This is an offset from the KernargSegmentPtr. bool ImplicitArgPtr : 1; + // The hard-wired high half of the address of the global information table + // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since + // current hardware only allows a 16 bit value. + unsigned GITPtrHigh; + MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; @@ -406,6 +411,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { return ArgInfo.getPreloadedValue(Value).first->getRegister(); } + unsigned getGITPtrHigh() const { + return GITPtrHigh; + } + unsigned getNumUserSGPRs() const { return NumUserSGPRs; } diff --git a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 46e58a2ca5f74..4c991c7c21a5b 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -205,6 +205,9 @@ static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) { } bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + const SISubtarget &ST = MF.getSubtarget(); const SIRegisterInfo *TRI = ST.getRegisterInfo(); const SIInstrInfo *TII = ST.getInstrInfo(); diff --git a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 24b7fe0f991d5..939062817a1e0 100644 --- a/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -830,7 +830,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI, const SISubtarget bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); - if (!ST.hasSDWA()) + if (!ST.hasSDWA() || skipFunction(*MF.getFunction())) return false; MRI = &MF.getRegInfo(); diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index 7c73f92eed279..a367bd7e129cf 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1474,7 +1474,8 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const { + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { unsigned SrcSize = getRegSizeInBits(*SrcRC); unsigned DstSize = getRegSizeInBits(*DstRC); unsigned NewSize = getRegSizeInBits(*NewRC); diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h index 65655b79c2141..bf814b6974a82 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/lib/Target/AMDGPU/SIRegisterInfo.h @@ -22,6 +22,7 @@ namespace llvm { +class LiveIntervals; class MachineRegisterInfo; class SISubtarget; class SIMachineFunctionInfo; @@ -212,7 +213,8 @@ class SIRegisterInfo final : public AMDGPURegisterInfo { unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const override; + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td index d685326c9b5e7..5062a626d9418 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/lib/Target/AMDGPU/SIRegisterInfo.td @@ -483,6 +483,8 @@ defm SSrc : RegImmOperand<"SReg", "SSrc">; defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ; +def SCSrc_i1 : RegisterOperand; + //===----------------------------------------------------------------------===// // VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td index 73dd8b7daa4ea..131cd2f990fc5 100644 --- a/lib/Target/AMDGPU/SMInstructions.td +++ b/lib/Target/AMDGPU/SMInstructions.td @@ -241,25 +241,23 @@ def SMRDBufferImm : ComplexPattern; def SMRDBufferImm32 : ComplexPattern; def SMRDBufferSgpr : ComplexPattern; -let Predicates = [isGCN] in { - multiclass SMRD_Pattern { // 1. IMM offset - def : Pat < + def : GCNPat < (smrd_load (SMRDImm i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_IMM") $sbase, $offset, 0)) >; // 2. SGPR offset - def : Pat < + def : GCNPat < (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_SGPR") $sbase, $offset, 0)) >; } -let Predicates = [isSICI] in { -def : Pat < +let OtherPredicates = [isSICI] in { +def : GCNPat < (i64 (readcyclecounter)), (S_MEMTIME) >; @@ -277,29 +275,27 @@ defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>; defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>; // 1. Offset as an immediate -def SM_LOAD_PATTERN : Pat < // name this pattern to reuse AddedComplexity on CI +def SM_LOAD_PATTERN : GCNPat < // name this pattern to reuse AddedComplexity on CI (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)), (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0) >; // 2. Offset loaded in an 32bit SGPR -def : Pat < +def : GCNPat < (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)), (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0) >; } // End let AddedComplexity = 100 -} // let Predicates = [isGCN] - -let Predicates = [isVI] in { +let OtherPredicates = [isVI] in { -def : Pat < +def : GCNPat < (i64 (readcyclecounter)), (S_MEMREALTIME) >; -} // let Predicates = [isVI] +} // let OtherPredicates = [isVI] //===----------------------------------------------------------------------===// @@ -508,10 +504,10 @@ def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>; let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in { -class SMRD_Pattern_ci : Pat < +class SMRD_Pattern_ci : GCNPat < (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), (vt (!cast(Instr#"_IMM_ci") $sbase, $offset, 0))> { - let Predicates = [isCIOnly]; + let OtherPredicates = [isCIOnly]; } def : SMRD_Pattern_ci <"S_LOAD_DWORD", i32>; @@ -520,10 +516,10 @@ def : SMRD_Pattern_ci <"S_LOAD_DWORDX4", v4i32>; def : SMRD_Pattern_ci <"S_LOAD_DWORDX8", v8i32>; def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>; -def : Pat < +def : GCNPat < (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)), (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> { - let Predicates = [isCI]; // should this be isCIOnly? + let OtherPredicates = [isCI]; // should this be isCIOnly? } } // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index 041fec52efe7e..02a95a4b6f249 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -139,7 +139,9 @@ let Defs = [SCC] in { [(set i64:$sdst, (not i64:$src0))] >; def S_WQM_B32 : SOP1_32 <"s_wqm_b32">; - def S_WQM_B64 : SOP1_64 <"s_wqm_b64">; + def S_WQM_B64 : SOP1_64 <"s_wqm_b64", + [(set i1:$sdst, (int_amdgcn_wqm_vote i1:$src0))] + >; } // End Defs = [SCC] @@ -159,10 +161,11 @@ def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64">; def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">; def S_FF0_I32_B64 : SOP1_32_64 <"s_ff0_i32_b64">; +def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64">; + def S_FF1_I32_B32 : SOP1_32 <"s_ff1_i32_b32", - [(set i32:$sdst, (cttz_zero_undef i32:$src0))] + [(set i32:$sdst, (AMDGPUffbl_b32 i32:$src0))] >; -def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64">; def S_FLBIT_I32_B32 : SOP1_32 <"s_flbit_i32_b32", [(set i32:$sdst, (AMDGPUffbh_u32 i32:$src0))] @@ -817,8 +820,7 @@ def S_CBRANCH_SCC0 : SOPP < >; def S_CBRANCH_SCC1 : SOPP < 0x00000005, (ins sopp_brtarget:$simm16), - "s_cbranch_scc1 $simm16", - [(si_uniform_br_scc SCC, bb:$simm16)] + "s_cbranch_scc1 $simm16" >; } // End Uses = [SCC] @@ -948,12 +950,10 @@ def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16), } } -let Predicates = [isGCN] in { - //===----------------------------------------------------------------------===// // S_GETREG_B32 Intrinsic Pattern. //===----------------------------------------------------------------------===// -def : Pat < +def : GCNPat < (int_amdgcn_s_getreg imm:$simm16), (S_GETREG_B32 (as_i16imm $simm16)) >; @@ -962,25 +962,25 @@ def : Pat < // SOP1 Patterns //===----------------------------------------------------------------------===// -def : Pat < +def : GCNPat < (i64 (ctpop i64:$src)), (i64 (REG_SEQUENCE SReg_64, (i32 (COPY_TO_REGCLASS (S_BCNT1_I32_B64 $src), SReg_32)), sub0, (S_MOV_B32 (i32 0)), sub1)) >; -def : Pat < +def : GCNPat < (i32 (smax i32:$x, (i32 (ineg i32:$x)))), (S_ABS_I32 $x) >; -def : Pat < +def : GCNPat < (i16 imm:$imm), (S_MOV_B32 imm:$imm) >; // Same as a 32-bit inreg -def : Pat< +def : GCNPat< (i32 (sext i16:$src)), (S_SEXT_I32_I16 $src) >; @@ -992,7 +992,7 @@ def : Pat< // V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector // case, the sgpr-copies pass will fix this to use the vector version. -def : Pat < +def : GCNPat < (i32 (addc i32:$src0, i32:$src1)), (S_ADD_U32 $src0, $src1) >; @@ -1000,20 +1000,20 @@ def : Pat < // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that // REG_SEQUENCE patterns don't support instructions with multiple // outputs. -def : Pat< +def : GCNPat< (i64 (zext i16:$src)), (REG_SEQUENCE SReg_64, (i32 (COPY_TO_REGCLASS (S_AND_B32 $src, (S_MOV_B32 (i32 0xffff))), SGPR_32)), sub0, (S_MOV_B32 (i32 0)), sub1) >; -def : Pat < +def : GCNPat < (i64 (sext i16:$src)), (REG_SEQUENCE SReg_64, (i32 (S_SEXT_I32_I16 $src)), sub0, (i32 (COPY_TO_REGCLASS (S_ASHR_I32 (i32 (S_SEXT_I32_I16 $src)), (S_MOV_B32 (i32 31))), SGPR_32)), sub1) >; -def : Pat< +def : GCNPat< (i32 (zext i16:$src)), (S_AND_B32 (S_MOV_B32 (i32 0xffff)), $src) >; @@ -1024,13 +1024,11 @@ def : Pat< // SOPP Patterns //===----------------------------------------------------------------------===// -def : Pat < +def : GCNPat < (int_amdgcn_s_waitcnt i32:$simm16), (S_WAITCNT (as_i16imm $simm16)) >; -} // End isGCN predicate - //===----------------------------------------------------------------------===// // Real target instructions, move this to the appropriate subtarget TD file diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 03c9f7f4f399d..018cb5d0c3658 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -147,6 +147,25 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) { return {7, 0, 0}; } +void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) { + auto TargetTriple = STI->getTargetTriple(); + auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits()); + + Stream << TargetTriple.getArchName() << '-' + << TargetTriple.getVendorName() << '-' + << TargetTriple.getOSName() << '-' + << TargetTriple.getEnvironmentName() << '-' + << "gfx" + << ISAVersion.Major + << ISAVersion.Minor + << ISAVersion.Stepping; + Stream.flush(); +} + +bool hasCodeObjectV3(const FeatureBitset &Features) { + return Features.test(FeatureCodeObjectV3); +} + unsigned getWavefrontSize(const FeatureBitset &Features) { if (Features.test(FeatureWavefrontSize16)) return 16; @@ -486,7 +505,9 @@ unsigned getInitialPSInputAddr(const Function &F) { bool isShader(CallingConv::ID cc) { switch(cc) { case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_LS: case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: @@ -508,7 +529,9 @@ bool isEntryFunctionCC(CallingConv::ID CC) { case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_LS: return true; default: return false; @@ -744,7 +767,9 @@ bool isArgPassedInSGPR(const Argument *A) { case CallingConv::SPIR_KERNEL: return true; case CallingConv::AMDGPU_VS: + case CallingConv::AMDGPU_LS: case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_ES: case CallingConv::AMDGPU_GS: case CallingConv::AMDGPU_PS: case CallingConv::AMDGPU_CS: diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index ea9dcdf8f3c2a..60a7af837fb11 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -19,6 +19,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include +#include #include namespace llvm { @@ -54,6 +55,13 @@ struct IsaVersion { /// \returns Isa version for given subtarget \p Features. IsaVersion getIsaVersion(const FeatureBitset &Features); +/// \brief Streams isa version string for given subtarget \p STI into \p Stream. +void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream); + +/// \returns True if given subtarget \p Features support code object version 3, +/// false otherwise. +bool hasCodeObjectV3(const FeatureBitset &Features); + /// \returns Wavefront size for given subtarget \p Features. unsigned getWavefrontSize(const FeatureBitset &Features); diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td index 4520f474d6692..ff2bd24544002 100644 --- a/lib/Target/AMDGPU/VOP1Instructions.td +++ b/lib/Target/AMDGPU/VOP1Instructions.td @@ -361,14 +361,14 @@ defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } -let Predicates = [Has16BitInsts] in { +let OtherPredicates = [Has16BitInsts] in { -def : Pat< +def : GCNPat< (f32 (f16_to_fp i16:$src)), (V_CVT_F32_F16_e32 $src) >; -def : Pat< +def : GCNPat< (i16 (AMDGPUfp_to_f16 f32:$src)), (V_CVT_F16_F32_e32 $src) >; @@ -653,9 +653,9 @@ def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo; def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo; def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo; -let Predicates = [isVI] in { +let OtherPredicates = [isVI] in { -def : Pat < +def : GCNPat < (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, imm:$bound_ctrl)), (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl), @@ -663,7 +663,7 @@ def : Pat < (as_i1imm $bound_ctrl)) >; -def : Pat < +def : GCNPat < (i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask, imm:$bound_ctrl)), (V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl), @@ -671,26 +671,26 @@ def : Pat < (as_i1imm $bound_ctrl)) >; -def : Pat< +def : GCNPat< (i32 (anyext i16:$src)), (COPY $src) >; -def : Pat< +def : GCNPat< (i64 (anyext i16:$src)), (REG_SEQUENCE VReg_64, (i32 (COPY $src)), sub0, (V_MOV_B32_e32 (i32 0)), sub1) >; -def : Pat< +def : GCNPat< (i16 (trunc i32:$src)), (COPY $src) >; -def : Pat < +def : GCNPat < (i16 (trunc i64:$src)), (EXTRACT_SUBREG $src, sub0) >; -} // End Predicates = [isVI] +} // End OtherPredicates = [isVI] diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index 2db0669310c61..e0ef8ce3c7703 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -408,12 +408,12 @@ defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT; -def : Pat< +def : GCNPat< (AMDGPUsube i32:$src0, i32:$src1, i1:$src2), (V_SUBB_U32_e64 $src0, $src1, $src2) >; @@ -469,17 +469,17 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; // Note: 16-bit instructions produce a 0 result in the high 16-bits. multiclass Arithmetic_i16_Pats { -def : Pat< +def : GCNPat< (op i16:$src0, i16:$src1), (inst $src0, $src1) >; -def : Pat< +def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), (inst $src0, $src1) >; -def : Pat< +def : GCNPat< (i64 (zext (op i16:$src0, i16:$src1))), (REG_SEQUENCE VReg_64, (inst $src0, $src1), sub0, @@ -490,18 +490,18 @@ def : Pat< multiclass Bits_OpsRev_i16_Pats { -def : Pat< +def : GCNPat< (op i16:$src0, i16:$src1), (inst $src1, $src0) >; -def : Pat< +def : GCNPat< (i32 (zext (op i16:$src0, i16:$src1))), (inst $src1, $src0) >; -def : Pat< +def : GCNPat< (i64 (zext (op i16:$src0, i16:$src1))), (REG_SEQUENCE VReg_64, (inst $src1, $src0), sub0, @@ -509,7 +509,7 @@ def : Pat< >; } -class ZExt_i16_i1_Pat : Pat < +class ZExt_i16_i1_Pat : GCNPat < (i16 (ext i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src) >; @@ -524,17 +524,17 @@ defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; -def : Pat < +def : GCNPat < (and i16:$src0, i16:$src1), (V_AND_B32_e64 $src0, $src1) >; -def : Pat < +def : GCNPat < (or i16:$src0, i16:$src1), (V_OR_B32_e64 $src0, $src1) >; -def : Pat < +def : GCNPat < (xor i16:$src0, i16:$src1), (V_XOR_B32_e64 $src0, $src1) >; @@ -546,7 +546,7 @@ defm : Bits_OpsRev_i16_Pats; def : ZExt_i16_i1_Pat; def : ZExt_i16_i1_Pat; -def : Pat < +def : GCNPat < (i16 (sext i1:$src)), (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src) >; @@ -554,7 +554,7 @@ def : Pat < // Undo sub x, c -> add x, -c canonicalization since c is more likely // an inline immediate than -c. // TODO: Also do for 64-bit. -def : Pat< +def : GCNPat< (add i16:$src0, (i16 NegSubInlineConst16:$src1)), (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) >; diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td index 736c6a5c44986..aa041aab51c8d 100644 --- a/lib/Target/AMDGPU/VOP3Instructions.td +++ b/lib/Target/AMDGPU/VOP3Instructions.td @@ -450,17 +450,17 @@ let Predicates = [Has16BitInsts] in { multiclass Ternary_i16_Pats { -def : Pat< +def : GCNPat < (op2 (op1 i16:$src0, i16:$src1), i16:$src2), (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) >; -def : Pat< +def : GCNPat< (i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))), (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)) >; -def : Pat< +def : GCNPat< (i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))), (REG_SEQUENCE VReg_64, (inst i16:$src0, i16:$src1, i16:$src2, (i1 0)), sub0, @@ -528,7 +528,7 @@ class getClampRes { ret1)); } -class IntClampPat : Pat< +class IntClampPat : GCNPat< getClampPat.ret, getClampRes.ret >; diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td index 313792f37048b..eeee8b36c1753 100644 --- a/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/lib/Target/AMDGPU/VOP3PInstructions.td @@ -68,6 +68,8 @@ def V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile, ashr_rev>; def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile, lshr_rev>; + +let SubtargetPredicate = HasMadMixInsts in { // These are VOP3a-like opcodes which accept no omod. // Size of src arguments (16/32) is controlled by op_sel. // For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi. @@ -82,9 +84,7 @@ def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile; -def : Pat < +def : GCNPat < (build_vector f16:$elt0, (AMDGPUclamp (fpround (fmad (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)), @@ -122,7 +122,7 @@ def : Pat < $elt0)) >; -def : Pat < +def : GCNPat < (AMDGPUclamp (build_vector (fpround (fmad (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)), (f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)), @@ -141,7 +141,7 @@ def : Pat < (i32 (IMPLICIT_DEF))))) >; -} // End Predicates = [HasMadMix] +} // End SubtargetPredicate = [HasMadMixInsts] multiclass VOP3P_Real_vi op> { def _vi : VOP3P_Real(NAME), SIEncodingFamily.VI>, diff --git a/lib/Target/AMDGPU/VOPCInstructions.td b/lib/Target/AMDGPU/VOPCInstructions.td index b636fc9be431b..146870e215313 100644 --- a/lib/Target/AMDGPU/VOPCInstructions.td +++ b/lib/Target/AMDGPU/VOPCInstructions.td @@ -607,9 +607,7 @@ defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">; // V_ICMPIntrinsic Pattern. //===----------------------------------------------------------------------===// -let Predicates = [isGCN] in { - -class ICMP_Pattern : Pat < +class ICMP_Pattern : GCNPat < (AMDGPUsetcc vt:$src0, vt:$src1, cond), (inst $src0, $src1) >; @@ -636,7 +634,7 @@ def : ICMP_Pattern ; def : ICMP_Pattern ; def : ICMP_Pattern ; -class FCMP_Pattern : Pat < +class FCMP_Pattern : GCNPat < (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)), (inst $src0_modifiers, $src0, $src1_modifiers, $src1, @@ -671,8 +669,6 @@ def : FCMP_Pattern ; def : FCMP_Pattern ; def : FCMP_Pattern ; -} // End Predicates = [isGCN] - //===----------------------------------------------------------------------===// // Target //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 25b9802f5414c..c1a3f639461d1 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -1022,6 +1022,10 @@ def ARMAsmWriter : AsmWriter { bit isMCAsmWriter = 1; } +def ARMAsmParser : AsmParser { + bit ReportMultipleNearMisses = 1; +} + def ARMAsmParserVariant : AsmParserVariant { int Variant = 0; string Name = "ARM"; @@ -1032,5 +1036,6 @@ def ARM : Target { // Pull in Instruction Info. let InstructionSet = ARMInstrInfo; let AssemblyWriters = [ARMAsmWriter]; + let AssemblyParsers = [ARMAsmParser]; let AssemblyParserVariants = [ARMAsmParserVariant]; } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 13335a84f6d9b..2147c1cfcf8b5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1204,6 +1204,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); ARMTargetStreamer &ATS = static_cast(TS); + const MachineFunction &MF = *MI->getParent()->getParent(); + const ARMSubtarget &STI = MF.getSubtarget(); + unsigned FramePtr = STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11; + // If we just ended a constant pool, mark it as such. if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) { OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); @@ -1884,13 +1888,33 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) - .addReg(ARM::R7) - .addReg(SrcReg) - .addImm(0) - // Predicate. - .addImm(ARMCC::AL) - .addReg(0)); + if (STI.isTargetDarwin() || STI.isTargetWindows()) { + // These platforms always use the same frame register + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) + .addReg(FramePtr) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + } else { + // If the calling code might use either R7 or R11 as + // frame pointer register, restore it into both. + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) + .addReg(ARM::R7) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) + .addReg(ARM::R11) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + } assert(Subtarget->hasV4TOps()); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX) @@ -1934,13 +1958,33 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) - .addReg(ARM::R7) - .addReg(SrcReg) - .addImm(0) - // Predicate. - .addImm(ARMCC::AL) - .addReg(0)); + if (STI.isTargetDarwin() || STI.isTargetWindows()) { + // These platforms always use the same frame register + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) + .addReg(FramePtr) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + } else { + // If the calling code might use either R7 or R11 as + // frame pointer register, restore it into both. + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) + .addReg(ARM::R7) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) + .addReg(ARM::R11) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + } EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX) .addReg(ScratchReg) diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 1726926811227..63b14ee98d707 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -391,16 +391,12 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const ARMFunctionInfo *AFI = MF.getInfo(); const ARMFrameLowering *TFI = getFrameLowering(MF); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, - // 2. This is a Thumb1 function (it's not useful, so we don't bother), or - // 3. There are VLAs in the function and the base pointer is disabled. + // 2. There are VLAs in the function and the base pointer is disabled. if (!TargetRegisterInfo::canRealignStack(MF)) return false; - if (AFI->isThumb1OnlyFunction()) - return false; // Stack realignment requires a frame pointer. If we already started // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget()))) @@ -807,7 +803,8 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const { + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { auto MBB = MI->getParent(); auto MF = MBB->getParent(); const MachineRegisterInfo &MRI = MF->getRegInfo(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 2e91d9d4be246..a8e947184ea05 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -27,6 +27,8 @@ namespace llvm { +class LiveIntervals; + /// Register allocation hints. namespace ARMRI { @@ -204,7 +206,8 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo { unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const override; + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index 6dc0e86255500..e1323cd9427ee 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -343,13 +343,26 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { assert(VA.isRegLoc() && "Value shouldn't be assigned to reg"); assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?"); - assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size"); - assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size"); + auto ValSize = VA.getValVT().getSizeInBits(); + auto LocSize = VA.getLocVT().getSizeInBits(); + + assert(ValSize <= 64 && "Unsupported value size"); + assert(LocSize <= 64 && "Unsupported location size"); - // The necessary extensions are handled on the other side of the ABI - // boundary. markPhysRegUsed(PhysReg); - MIRBuilder.buildCopy(ValVReg, PhysReg); + if (ValSize == LocSize) { + MIRBuilder.buildCopy(ValVReg, PhysReg); + } else { + assert(ValSize < LocSize && "Extensions not supported"); + + // We cannot create a truncating copy, nor a trunc of a physical register. + // Therefore, we need to copy the content of the physical register into a + // virtual one and then truncate that. + auto PhysRegToVReg = + MRI.createGenericVirtualRegister(LLT::scalar(LocSize)); + MIRBuilder.buildCopy(PhysRegToVReg, PhysReg); + MIRBuilder.buildTrunc(ValVReg, PhysRegToVReg); + } } unsigned assignCustomValue(const ARMCallLowering::ArgInfo &Arg, @@ -480,19 +493,26 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const auto &TLI = *getTLI(); const auto &DL = MF.getDataLayout(); - const auto &STI = MF.getSubtarget(); + const auto &STI = MF.getSubtarget(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - if (MF.getSubtarget().genLongCalls()) + if (STI.genLongCalls()) return false; auto CallSeqStart = MIRBuilder.buildInstr(ARM::ADJCALLSTACKDOWN); // Create the call instruction so we can add the implicit uses of arg // registers, but don't insert it yet. - auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask( - TRI->getCallPreservedMask(MF, CallConv)); + bool isDirect = !Callee.isReg(); + auto CallOpcode = + isDirect ? ARM::BL + : STI.hasV5TOps() + ? ARM::BLX + : STI.hasV4TOps() ? ARM::BX_CALL : ARM::BMOVPCRX_CALL; + auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode) + .add(Callee) + .addRegMask(TRI->getCallPreservedMask(MF, CallConv)); if (Callee.isReg()) { auto CalleeReg = Callee.getReg(); if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 65ca2ad504a82..ce4add974d6ac 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1053,7 +1053,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, unsigned LastReg = 0; bool DeleteRet = false; for (; i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); + CalleeSavedInfo &Info = CSI[i-1]; + unsigned Reg = Info.getReg(); if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // The aligned reloads from area DPRCS2 are not inserted here. @@ -1066,6 +1067,9 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, Reg = ARM::PC; DeleteRet = true; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; + // We 'restore' LR into PC so it is not live out of the return block: + // Clear Restored bit. + Info.setRestored(false); } else LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; // Fold the return instruction into the LDM. @@ -1099,13 +1103,6 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MIB.copyImplicitOps(*MI); MI->eraseFromParent(); } - // If LR is not restored, mark it in CSI. - for (CalleeSavedInfo &I : CSI) { - if (I.getReg() != ARM::LR) - continue; - I.setRestored(false); - break; - } } MI = MIB; } else if (Regs.size() == 1) { @@ -1613,14 +1610,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (AFI->getArgRegsSaveSize() > 0) SavedRegs.set(ARM::LR); - // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know - // for sure what the stack size will be, but for this, an estimate is good - // enough. If there anything changes it, it'll be a spill, which implies - // we've used all the registers and so R4 is already used, so not marking - // it here will be OK. + // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function + // requires stack alignment. We don't know for sure what the stack size + // will be, but for this, an estimate is good enough. If there anything + // changes it, it'll be a spill, which implies we've used all the registers + // and so R4 is already used, so not marking it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = MFI.estimateStackSize(MF); - if (MFI.hasVarSizedObjects() || StackSize > 508) + if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) || + MFI.estimateStackSize(MF) > 508) SavedRegs.set(ARM::R4); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 333a077e6402d..3e80c1e8359ef 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3857,6 +3857,12 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, break; } } + } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) && + (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) { + // In ARM and Thumb-2, the compare instructions can shift their second + // operand. + CC = ISD::getSetCCSwappedOperands(CC); + std::swap(LHS, RHS); } ARMCC::CondCodes CondCode = IntCCToARMCC(CC); @@ -7781,6 +7787,7 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, } SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); @@ -10350,95 +10357,17 @@ static SDValue PerformORCombineToSMULWBT(SDNode *OR, return SDValue(OR, 0); } -/// PerformORCombine - Target-specific dag combine xforms for ISD::OR -static SDValue PerformORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const ARMSubtarget *Subtarget) { - // Attempt to use immediate-form VORR - BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); - SDLoc dl(N); - EVT VT = N->getValueType(0); - SelectionDAG &DAG = DCI.DAG; - - if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (BVN && Subtarget->hasNEON() && - BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - if (SplatBitSize <= 64) { - EVT VorrVT; - SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VorrVT, VT.is128BitVector(), - OtherModImm); - if (Val.getNode()) { - SDValue Input = - DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); - SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); - } - } - } - - if (!Subtarget->isThumb1Only()) { - // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) - if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) - return Result; - if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) - return Result; - } - - // The code below optimizes (or (and X, Y), Z). - // The AND operand needs to have a single user to make these optimizations - // profitable. - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) - return SDValue(); - SDValue N1 = N->getOperand(1); - - // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. - if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && - DAG.getTargetLoweringInfo().isTypeLegal(VT)) { - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - - APInt SplatBits0, SplatBits1; - BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); - BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); - // Ensure that the second operand of both ands are constants - if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - // Ensure that the bit width of the constants are the same and that - // the splat arguments are logical inverses as per the pattern we - // are trying to simplify. - if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && - SplatBits0 == ~SplatBits1) { - // Canonicalize the vector type to make instruction selection - // simpler. - EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; - SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, - N0->getOperand(1), - N0->getOperand(0), - N1->getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); - } - } - } - } - - // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when - // reasonable. - +static SDValue PerformORCombineToBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // BFI is only available on V6T2+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val @@ -10480,9 +10409,10 @@ static SDValue PerformORCombine(SDNode *N, DAG.getConstant(Val, DL, MVT::i32), DAG.getConstant(Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } } else if (N1.getOpcode() == ISD::AND) { // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask @@ -10506,9 +10436,10 @@ static SDValue PerformORCombine(SDNode *N, DAG.getConstant(amt, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } else if (ARM::isBitFieldInvertedMask(~Mask) && (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, @@ -10522,9 +10453,10 @@ static SDValue PerformORCombine(SDNode *N, DAG.getConstant(lsb, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, DAG.getConstant(Mask2, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } } @@ -10542,10 +10474,102 @@ static SDValue PerformORCombine(SDNode *N, Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), DAG.getConstant(~Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); + } + + return SDValue(); +} + +/// PerformORCombine - Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Attempt to use immediate-form VORR + BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); + SDLoc dl(N); + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN && Subtarget->hasNEON() && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatBitSize <= 64) { + EVT VorrVT; + SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, + DAG, dl, VorrVT, VT.is128BitVector(), + OtherModImm); + if (Val.getNode()) { + SDValue Input = + DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); + SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); + } + } + } + + if (!Subtarget->isThumb1Only()) { + // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) + if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) + return Result; + if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) + return Result; + } + + // The code below optimizes (or (and X, Y), Z). + // The AND operand needs to have a single user to make these optimizations + // profitable. + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + SDValue N1 = N->getOperand(1); + + // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. + if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && + DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + APInt SplatBits0, SplatBits1; + BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); + BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); + // Ensure that the second operand of both ands are constants + if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + // Ensure that the bit width of the constants are the same and that + // the splat arguments are logical inverses as per the pattern we + // are trying to simplify. + if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection + // simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), + N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } + } } + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when + // reasonable. + if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget)) + return Res; + return SDValue(); } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 1bbe7f0d275ed..f7c6c32eb4dc0 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -69,6 +69,7 @@ def NVExtFrm : Format<39>; def NVMulSLFrm : Format<40>; def NVTBLFrm : Format<41>; def DPSoRegImmFrm : Format<42>; +def N3RegCplxFrm : Format<43>; // Misc flags. @@ -2513,6 +2514,80 @@ multiclass NEONDTAnyInstAlias : AsmPseudoInst, Requires<[HasNEON]>; +// Extension of NEON 3-vector data processing instructions in coprocessor 8 +// encoding space, introduced in ARMv8.3-A. +class N3VCP8 op24_23, bits<2> op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let DecoderNamespace = "VFPV8"; + // These have the same encodings in ARM and Thumb2 + let PostEncoderMethod = ""; + + let Inst{31-25} = 0b1111110; + let Inst{24-23} = op24_23; + let Inst{22} = Vd{4}; + let Inst{21-20} = op21_20; + let Inst{19-16} = Vn{3-0}; + let Inst{15-12} = Vd{3-0}; + let Inst{11-8} = 0b1000; + let Inst{7} = Vn{4}; + let Inst{6} = op6; + let Inst{5} = Vm{4}; + let Inst{4} = op4; + let Inst{3-0} = Vm{3-0}; +} + +// Extension of NEON 2-vector-and-scalar data processing instructions in +// coprocessor 8 encoding space, introduced in ARMv8.3-A. +class N3VLaneCP8 op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let DecoderNamespace = "VFPV8"; + // These have the same encodings in ARM and Thumb2 + let PostEncoderMethod = ""; + + let Inst{31-24} = 0b11111110; + let Inst{23} = op23; + let Inst{22} = Vd{4}; + let Inst{21-20} = op21_20; + let Inst{19-16} = Vn{3-0}; + let Inst{15-12} = Vd{3-0}; + let Inst{11-8} = 0b1000; + let Inst{7} = Vn{4}; + let Inst{6} = op6; + // Bit 5 set by sub-classes + let Inst{4} = op4; + let Inst{3-0} = Vm{3-0}; +} + +// Operand types for complex instructions +class ComplexRotationOperand + : AsmOperandClass { + let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">"; + let DiagnosticString = "complex rotation must be " # Diag; + let Name = "ComplexRotation" # Type; +} +def complexrotateop : Operand { + let ParserMatchClass = ComplexRotationOperand<90, 0, "Even", "0, 90, 180 or 270">; + let PrintMethod = "printComplexRotationOp<90, 0>"; +} +def complexrotateopodd : Operand { + let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd", "90 or 270">; + let PrintMethod = "printComplexRotationOp<180, 90>"; +} + // Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. def : TokenAlias<".s8", ".i8">; def : TokenAlias<".u8", ".i8">; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index ba9c9729d7534..c031f6ff7acf8 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -460,12 +460,13 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; - let DiagnosticType = "ImmRange" # Low # "_" # High; + let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; } class ImmAsmOperandMinusOne : AsmOperandClass { let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; let DiagnosticType = "ImmRange" # Low # "_" # High; + let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; } // Operands that are part of a memory addressing mode. @@ -754,7 +755,6 @@ def imm1_31 : Operand, ImmLeaf 0 && Imm < 32; }]> { /// imm0_15 predicate - Immediate in the range [0,15]. def Imm0_15AsmOperand: ImmAsmOperand<0,15> { let Name = "Imm0_15"; - let DiagnosticType = "ImmRange0_15"; } def imm0_15 : Operand, ImmLeaf= 0 && Imm < 16; @@ -789,7 +789,6 @@ def imm0_63 : Operand, ImmLeaf { let Name = "Imm0_239"; - let DiagnosticType = "ImmRange0_239"; } def imm0_239 : Operand, ImmLeaf= 0 && Imm < 240; }]> { let ParserMatchClass = Imm0_239AsmOperand; @@ -823,6 +822,7 @@ def imm0_65535_neg : Operand, ImmLeaf { @@ -836,7 +836,10 @@ def imm256_65535_expr : Operand { } /// imm24b - True if the 32-bit immediate is encodable in 24 bits. -def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { let Name = "Imm24bit"; } +def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { + let Name = "Imm24bit"; + let DiagnosticString = "operand must be an immediate in the range [0,0xffffff]"; +} def imm24b : Operand, ImmLeaf= 0 && Imm <= 0xffffff; }]> { @@ -1129,7 +1132,7 @@ class AddrMode6Align : MemOperand, // VLD/VST instructions and checking the alignment is not specified. def AddrMode6AlignNoneAsmOperand : AsmOperandClass { let Name = "AlignedMemoryNone"; - let DiagnosticType = "AlignedMemoryRequiresNone"; + let DiagnosticString = "alignment must be omitted"; } def addrmode6alignNone : AddrMode6Align { // The alignment specifier can only be omitted. @@ -1140,7 +1143,7 @@ def addrmode6alignNone : AddrMode6Align { // VLD/VST instructions and checking the alignment value. def AddrMode6Align16AsmOperand : AsmOperandClass { let Name = "AlignedMemory16"; - let DiagnosticType = "AlignedMemoryRequires16"; + let DiagnosticString = "alignment must be 16 or omitted"; } def addrmode6align16 : AddrMode6Align { // The alignment specifier can only be 16 or omitted. @@ -1151,7 +1154,7 @@ def addrmode6align16 : AddrMode6Align { // VLD/VST instructions and checking the alignment value. def AddrMode6Align32AsmOperand : AsmOperandClass { let Name = "AlignedMemory32"; - let DiagnosticType = "AlignedMemoryRequires32"; + let DiagnosticString = "alignment must be 32 or omitted"; } def addrmode6align32 : AddrMode6Align { // The alignment specifier can only be 32 or omitted. @@ -1162,7 +1165,7 @@ def addrmode6align32 : AddrMode6Align { // VLD/VST instructions and checking the alignment value. def AddrMode6Align64AsmOperand : AsmOperandClass { let Name = "AlignedMemory64"; - let DiagnosticType = "AlignedMemoryRequires64"; + let DiagnosticString = "alignment must be 64 or omitted"; } def addrmode6align64 : AddrMode6Align { // The alignment specifier can only be 64 or omitted. @@ -1173,7 +1176,7 @@ def addrmode6align64 : AddrMode6Align { // for VLD/VST instructions and checking the alignment value. def AddrMode6Align64or128AsmOperand : AsmOperandClass { let Name = "AlignedMemory64or128"; - let DiagnosticType = "AlignedMemoryRequires64or128"; + let DiagnosticString = "alignment must be 64, 128 or omitted"; } def addrmode6align64or128 : AddrMode6Align { // The alignment specifier can only be 64, 128 or omitted. @@ -1184,7 +1187,7 @@ def addrmode6align64or128 : AddrMode6Align { // encoding for VLD/VST instructions and checking the alignment value. def AddrMode6Align64or128or256AsmOperand : AsmOperandClass { let Name = "AlignedMemory64or128or256"; - let DiagnosticType = "AlignedMemoryRequires64or128or256"; + let DiagnosticString = "alignment must be 64, 128, 256 or omitted"; } def addrmode6align64or128or256 : AddrMode6Align { // The alignment specifier can only be 64, 128, 256 or omitted. @@ -1215,7 +1218,7 @@ class AddrMode6DupAlign : MemOperand, // VLD-dup instruction and checking the alignment is not specified. def AddrMode6dupAlignNoneAsmOperand : AsmOperandClass { let Name = "DupAlignedMemoryNone"; - let DiagnosticType = "DupAlignedMemoryRequiresNone"; + let DiagnosticString = "alignment must be omitted"; } def addrmode6dupalignNone : AddrMode6DupAlign { // The alignment specifier can only be omitted. @@ -1226,7 +1229,7 @@ def addrmode6dupalignNone : AddrMode6DupAlign { // instruction and checking the alignment value. def AddrMode6dupAlign16AsmOperand : AsmOperandClass { let Name = "DupAlignedMemory16"; - let DiagnosticType = "DupAlignedMemoryRequires16"; + let DiagnosticString = "alignment must be 16 or omitted"; } def addrmode6dupalign16 : AddrMode6DupAlign { // The alignment specifier can only be 16 or omitted. @@ -1237,7 +1240,7 @@ def addrmode6dupalign16 : AddrMode6DupAlign { // instruction and checking the alignment value. def AddrMode6dupAlign32AsmOperand : AsmOperandClass { let Name = "DupAlignedMemory32"; - let DiagnosticType = "DupAlignedMemoryRequires32"; + let DiagnosticString = "alignment must be 32 or omitted"; } def addrmode6dupalign32 : AddrMode6DupAlign { // The alignment specifier can only be 32 or omitted. @@ -1248,7 +1251,7 @@ def addrmode6dupalign32 : AddrMode6DupAlign { // instructions and checking the alignment value. def AddrMode6dupAlign64AsmOperand : AsmOperandClass { let Name = "DupAlignedMemory64"; - let DiagnosticType = "DupAlignedMemoryRequires64"; + let DiagnosticString = "alignment must be 64 or omitted"; } def addrmode6dupalign64 : AddrMode6DupAlign { // The alignment specifier can only be 64 or omitted. @@ -1259,7 +1262,7 @@ def addrmode6dupalign64 : AddrMode6DupAlign { // for VLD instructions and checking the alignment value. def AddrMode6dupAlign64or128AsmOperand : AsmOperandClass { let Name = "DupAlignedMemory64or128"; - let DiagnosticType = "DupAlignedMemoryRequires64or128"; + let DiagnosticString = "alignment must be 64, 128 or omitted"; } def addrmode6dupalign64or128 : AddrMode6DupAlign { // The alignment specifier can only be 64, 128 or omitted. diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 495d44f96b8f8..cd67dded5853f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -108,6 +108,7 @@ def nImmSplatI64 : Operand { def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } +def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } def VectorIndex8 : Operand, ImmLeaf { @@ -129,6 +130,13 @@ def VectorIndex32 : Operand, ImmLeaf, ImmLeaf { + let ParserMatchClass = VectorIndex64Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} // Register list of one D register. def VecListOneDAsmOperand : AsmOperandClass { @@ -4724,6 +4732,131 @@ def VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR>; } // HasDotProd +// ARMv8.3 complex operations +class BaseN3VCP8ComplexTied pattern> + : N3VCP8<{?,?}, {op21,s}, q, op4, oops, + iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ + bits<2> rot; + let Inst{24-23} = rot; +} + +class BaseN3VCP8ComplexOdd pattern> + : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, + iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { + bits<1> rot; + let Inst{24} = rot; +} + +class BaseN3VCP8ComplexTiedLane32 pattern> + : N3VLaneCP8 { + bits<2> rot; + bit lane; + + let Inst{21-20} = rot; + let Inst{5} = lane; +} + +class BaseN3VCP8ComplexTiedLane64 pattern> + : N3VLaneCP8 { + bits<2> rot; + bit lane; + + let Inst{21-20} = rot; + let Inst{5} = Vm{4}; + // This is needed because the lane operand does not have any bits in the + // encoding (it only has one possible value), so we need to manually set it + // to it's default value. + let DecoderMethod = "DecodeNEONComplexLane64Instruction"; +} + +multiclass N3VCP8ComplexTied { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16 : BaseN3VCP8ComplexTied; + def v8f16 : BaseN3VCP8ComplexTied; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32 : BaseN3VCP8ComplexTied; + def v4f32 : BaseN3VCP8ComplexTied; + } +} + +multiclass N3VCP8ComplexOdd { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16 : BaseN3VCP8ComplexOdd; + def v8f16 : BaseN3VCP8ComplexOdd; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32 : BaseN3VCP8ComplexOdd; + def v4f32 : BaseN3VCP8ComplexOdd; + } +} + +// These instructions index by pairs of lanes, so the VectorIndexes are twice +// as wide as the data types. +multiclass N3VCP8ComplexTiedLane { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16_indexed : BaseN3VCP8ComplexTiedLane32; + def v8f16_indexed : BaseN3VCP8ComplexTiedLane32; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32_indexed : BaseN3VCP8ComplexTiedLane64; + def v4f32_indexed : BaseN3VCP8ComplexTiedLane64; + } +} + +defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; +defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; +defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 2ae23fe1c052b..d6b9a21c2b833 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -338,7 +338,7 @@ def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val", } def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end", - []>, T1Encoding<0b101101>, Requires<[IsNotMClass]>, Deprecated { + []>, T1Encoding<0b101101>, Requires<[IsThumb, IsNotMClass]>, Deprecated { bits<1> end; // A8.6.156 let Inst{9-5} = 0b10010; @@ -1671,13 +1671,6 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def : InstAlias<"nop", (tMOVr R8, R8, 14, 0), 0>, Requires<[IsThumb, IsThumb1Only]>; -// For round-trip assembly/disassembly, we have to handle a CPS instruction -// without any iflags. That's not, strictly speaking, valid syntax, but it's -// a useful extension and assembles to defined behaviour (the insn does -// nothing). -def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; -def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; - // "neg" is and alias for "rsb rd, rn, #0" def : tInstAlias<"neg${s}${p} $Rd, $Rm", (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 00517aeb03298..d8cfdd9180efb 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -59,6 +59,7 @@ class ARMInstructionSelector : public InstructionSelector { bool selectGlobal(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const; bool selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const; + bool selectShift(unsigned ShiftOpc, MachineInstrBuilder &MIB) const; // Check if the types match and both operands have the expected size and // register bank. @@ -640,6 +641,14 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, return true; } +bool ARMInstructionSelector::selectShift(unsigned ShiftOpc, + MachineInstrBuilder &MIB) const { + MIB->setDesc(TII.get(ARM::MOVsr)); + MIB.addImm(ShiftOpc); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -766,6 +775,13 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { ARM::FPRRegBankID, Size); return selectCmp(Helper, MIB, MRI); } + case G_LSHR: + return selectShift(ARM_AM::ShiftOpc::lsr, MIB); + case G_ASHR: + return selectShift(ARM_AM::ShiftOpc::asr, MIB); + case G_SHL: { + return selectShift(ARM_AM::ShiftOpc::lsl, MIB); + } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 8185f8acc9213..695e0f6326d07 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -80,6 +80,9 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, 1, Ty}, Legal); } + for (unsigned Op : {G_ASHR, G_LSHR, G_SHL}) + setAction({Op, s32}, Legal); + setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 47e4956224668..4aa7e1503427e 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1909,6 +1909,17 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { MO.setReg(ARM::PC); PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); MBB.erase(MBBI); + // We now restore LR into PC so it is not live-out of the return block + // anymore: Clear the CSI Restored bit. + MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); + // CSI should be fixed after PrologEpilog Insertion + assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid"); + for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { + if (Info.getReg() == ARM::LR) { + Info.setRestored(false); + break; + } + } return true; } } diff --git a/lib/Target/ARM/ARMMacroFusion.cpp b/lib/Target/ARM/ARMMacroFusion.cpp index 1b6e97c28d453..a34ed2cb5a25d 100644 --- a/lib/Target/ARM/ARMMacroFusion.cpp +++ b/lib/Target/ARM/ARMMacroFusion.cpp @@ -31,7 +31,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, // Assume wildcards for unspecified instrs. unsigned FirstOpcode = FirstMI ? FirstMI->getOpcode() - : static_cast(ARM::INSTRUCTION_LIST_END); + : static_cast(ARM::INSTRUCTION_LIST_END); unsigned SecondOpcode = SecondMI.getOpcode(); if (ST.hasFuseAES()) diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index 2400e1af246b8..c01cc064e1a57 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -218,6 +218,9 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_AND: case G_OR: case G_XOR: + case G_LSHR: + case G_ASHR: + case G_SHL: case G_SDIV: case G_UDIV: case G_SEXT: diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 137539134dfbf..14526b777c70a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -213,6 +213,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), let AltOrderSelect = [{ return 1 + MF.getSubtarget().isThumb1Only(); }]; + let DiagnosticString = "operand must be a register in range [r0, r15]"; } // GPRs without the PC. Some ARM instructions do not allow the PC in @@ -223,6 +224,7 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { let AltOrderSelect = [{ return 1 + MF.getSubtarget().isThumb1Only(); }]; + let DiagnosticString = "operand must be a register in range [r0, r14]"; } // GPRs without the PC but with APSR. Some instructions allow accessing the @@ -233,6 +235,7 @@ def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV) let AltOrderSelect = [{ return 1 + MF.getSubtarget().isThumb1Only(); }]; + let DiagnosticString = "operand must be a register in range [r0, r14] or apsr_nzcv"; } // GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the @@ -240,7 +243,9 @@ def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV) // FIXME: It would be better to not use this at all and refactor the // instructions to not have SP an an explicit argument. That makes // frame index resolution a bit trickier, though. -def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>; +def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)> { + let DiagnosticString = "operand must be a register sp"; +} // restricted GPR register class. Many Thumb2 instructions allow the full // register range for operands, but have undefined behaviours when PC @@ -251,18 +256,23 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { let AltOrderSelect = [{ return 1 + MF.getSubtarget().isThumb1Only(); }]; + let DiagnosticType = "rGPR"; } // Thumb registers are R0-R7 normally. Some instructions can still use // the general GPR register class above (MOV, e.g.) -def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>; +def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)> { + let DiagnosticString = "operand must be a register in range [r0, r7]"; +} // Thumb registers R0-R7 and the PC. Some instructions like TBB or THH allow // the PC to be used as a destination operand as well. def tGPRwithpc : RegisterClass<"ARM", [i32], 32, (add tGPR, PC)>; // The high registers in thumb mode, R8-R15. -def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>; +def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)> { + let DiagnosticString = "operand must be a register in range [r8, r15]"; +} // For tail calls, we can't use callee-saved registers, as they are restored // to the saved value before the tail call, which would clobber a call address. @@ -294,11 +304,14 @@ def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> { let AltOrderSelect = [{ return 1 + MF.getSubtarget().useStride4VFPs(MF); }]; + let DiagnosticString = "operand must be a register in range [s0, s31]"; } // Subset of SPR which can be used as a source of NEON scalars for 16-bit // operations -def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>; +def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)> { + let DiagnosticString = "operand must be a register in range [s0, s15]"; +} // Scalar double precision floating point / generic 64-bit vector register // class. @@ -313,17 +326,22 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 6 let AltOrderSelect = [{ return 1 + MF.getSubtarget().useStride4VFPs(MF); }]; + let DiagnosticType = "DPR"; } // Subset of DPR that are accessible with VFP2 (and so that also have // 32-bit SPR subregs). def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64, - (trunc DPR, 16)>; + (trunc DPR, 16)> { + let DiagnosticString = "operand must be a register in range [d0, d15]"; +} // Subset of DPR which can be used as a source of NEON scalars for 16-bit // operations def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64, - (trunc DPR, 8)>; + (trunc DPR, 8)> { + let DiagnosticString = "operand must be a register in range [d0, d7]"; +} // Generic 128-bit vector register class. def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128, @@ -331,15 +349,20 @@ def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16] // Allocate non-VFP2 aliases Q8-Q15 first. let AltOrders = [(rotl QPR, 8)]; let AltOrderSelect = [{ return 1; }]; + let DiagnosticString = "operand must be a register in range [q0, q15]"; } // Subset of QPR that have 32-bit SPR subregs. def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (trunc QPR, 8)>; + 128, (trunc QPR, 8)> { + let DiagnosticString = "operand must be a register in range [q0, q7]"; +} // Subset of QPR that have DPR_8 and SPR_8 subregs. def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (trunc QPR, 4)>; + 128, (trunc QPR, 4)> { + let DiagnosticString = "operand must be a register in range [q0, q3]"; +} // Pseudo-registers representing odd-even pairs of D registers. The even-odd // pairs are already represented by the Q registers. diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e440957269f6c..a8546ec40a6ba 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -150,7 +150,9 @@ void ARMSubtarget::initializeEnvironment() { // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this // directly from it, but we can try to make sure they're consistent when both // available. - UseSjLjEH = isTargetDarwin() && !isTargetWatchABI(); + UseSjLjEH = (isTargetDarwin() && !isTargetWatchABI() && + Options.ExceptionModel == ExceptionHandling::None) || + Options.ExceptionModel == ExceptionHandling::SjLj; assert((!TM.getMCAsmInfo() || (TM.getMCAsmInfo()->getExceptionHandlingType() == ExceptionHandling::SjLj) == UseSjLjEH) && diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 9a191e4224775..39b8df401aa57 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -310,7 +310,14 @@ namespace { class ARMPassConfig : public TargetPassConfig { public: ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + if (TM.getOptLevel() != CodeGenOpt::None) { + ARMGenSubtargetInfo STI(TM.getTargetTriple(), TM.getTargetCPU(), + TM.getTargetFeatureString()); + if (STI.hasFeature(ARM::FeatureUseMISched)) + substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); + } + } ARMBaseTargetMachine &getARMTargetMachine() const { return getTM(); diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 4395a319864ce..ca80d6f53f4c7 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -586,34 +586,53 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, if (!ST->isMClass()) return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP); - // Only enable on Thumb-2 targets for simple loops. - if (!ST->isThumb2() || L->getNumBlocks() != 1) - return; - // Disable loop unrolling for Oz and Os. UP.OptSizeThreshold = 0; UP.PartialOptSizeThreshold = 0; - BasicBlock *BB = L->getLoopLatch(); - if (BB->getParent()->optForSize()) + if (L->getHeader()->getParent()->optForSize()) + return; + + // Only enable on Thumb-2 targets. + if (!ST->isThumb2()) + return; + + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + DEBUG(dbgs() << "Loop has:\n" + << "Blocks: " << L->getNumBlocks() << "\n" + << "Exit blocks: " << ExitingBlocks.size() << "\n"); + + // Only allow another exit other than the latch. This acts as an early exit + // as it mirrors the profitability calculation of the runtime unroller. + if (ExitingBlocks.size() > 2) + return; + + // Limit the CFG of the loop body for targets with a branch predictor. + // Allowing 4 blocks permits if-then-else diamonds in the body. + if (ST->hasBranchPredictor() && L->getNumBlocks() > 4) return; // Scan the loop: don't unroll loops with calls as this could prevent // inlining. unsigned Cost = 0; - for (auto &I : *BB) { - if (isa(I) || isa(I)) { - ImmutableCallSite CS(&I); - if (const Function *F = CS.getCalledFunction()) { - if (!isLoweredToCall(F)) - continue; + for (auto *BB : L->getBlocks()) { + for (auto &I : *BB) { + if (isa(I) || isa(I)) { + ImmutableCallSite CS(&I); + if (const Function *F = CS.getCalledFunction()) { + if (!isLoweredToCall(F)) + continue; + } + return; } - return; + SmallVector Operands(I.value_op_begin(), + I.value_op_end()); + Cost += getUserCost(&I, Operands); } - SmallVector Operands(I.value_op_begin(), - I.value_op_end()); - Cost += getUserCost(&I, Operands); } + DEBUG(dbgs() << "Cost of loop: " << Cost << "\n"); + UP.Partial = true; UP.Runtime = true; UP.UnrollRemainder = true; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 287ed20988fad..5ad7f72f6dabb 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -63,6 +64,8 @@ #include #include +#define DEBUG_TYPE "asm-parser" + using namespace llvm; namespace { @@ -162,7 +165,6 @@ class UnwindContext { }; class ARMAsmParser : public MCTargetAsmParser { - const MCInstrInfo &MII; const MCRegisterInfo *MRI; UnwindContext UC; @@ -558,6 +560,7 @@ class ARMAsmParser : public MCTargetAsmParser { bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands); bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands); bool isITBlockTerminator(MCInst &Inst) const; + void fixupGNULDRDAlias(StringRef Mnemonic, OperandVector &Operands); public: enum ARMMatchResultTy { @@ -574,7 +577,7 @@ class ARMAsmParser : public MCTargetAsmParser { ARMAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI), MII(MII), UC(Parser) { + : MCTargetAsmParser(Options, STI, MII), UC(Parser) { MCAsmParserExtension::Initialize(Parser); // Cache the MCRegisterInfo. @@ -608,8 +611,23 @@ class ARMAsmParser : public MCTargetAsmParser { uint64_t &ErrorInfo, bool MatchingInlineAsm) override; unsigned MatchInstruction(OperandVector &Operands, MCInst &Inst, - uint64_t &ErrorInfo, bool MatchingInlineAsm, - bool &EmitInITBlock, MCStreamer &Out); + SmallVectorImpl &NearMisses, + bool MatchingInlineAsm, bool &EmitInITBlock, + MCStreamer &Out); + + struct NearMissMessage { + SMLoc Loc; + SmallString<128> Message; + }; + + const char *getCustomOperandDiag(ARMMatchResultTy MatchError); + + void FilterNearMisses(SmallVectorImpl &NearMissesIn, + SmallVectorImpl &NearMissesOut, + SMLoc IDLoc, OperandVector &Operands); + void ReportNearMisses(SmallVectorImpl &NearMisses, SMLoc IDLoc, + OperandVector &Operands); + void onLabelParsed(MCSymbol *Symbol) override; }; @@ -1756,6 +1774,10 @@ class ARMOperand : public MCParsedAsmOperand { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 2; } + bool isVectorIndex64() const { + if (Kind != k_VectorIndex) return false; + return VectorIndex.Val < 1; + } bool isNEONi8splat() const { if (!isImm()) return false; @@ -1885,6 +1907,17 @@ class ARMOperand : public MCParsedAsmOperand { return true; } + template + bool isComplexRotation() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + uint64_t Value = CE->getValue(); + + return (Value % Angle == Remainder && Value <= 270); + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. if (!Expr) @@ -2628,6 +2661,11 @@ class ARMOperand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createImm(getVectorIndex())); } + void addVectorIndex64Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(getVectorIndex())); + } + void addNEONi8splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2740,6 +2778,18 @@ class ARMOperand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createImm(Imm | 0x1e00)); } + void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast(getImm()); + Inst.addOperand(MCOperand::createImm(CE->getValue() / 90)); + } + + void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast(getImm()); + Inst.addOperand(MCOperand::createImm((CE->getValue() - 90) / 180)); + } + void print(raw_ostream &OS) const override; static std::unique_ptr CreateITMask(unsigned Mask, SMLoc S) { @@ -3355,13 +3405,13 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { /// parse for a specific register type. bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) { MCAsmParser &Parser = getParser(); - const AsmToken &RegTok = Parser.getTok(); + SMLoc RegStartLoc = Parser.getTok().getLoc(); + SMLoc RegEndLoc = Parser.getTok().getEndLoc(); int RegNo = tryParseRegister(); if (RegNo == -1) return true; - Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(), - RegTok.getEndLoc())); + Operands.push_back(ARMOperand::CreateReg(RegNo, RegStartLoc, RegEndLoc)); const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { @@ -5047,7 +5097,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, SMLoc Loc = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) - return true; + return Error(Loc, "illegal shift operator"); StringRef ShiftName = Tok.getString(); if (ShiftName == "lsl" || ShiftName == "LSL" || ShiftName == "asl" || ShiftName == "ASL") @@ -5432,7 +5482,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" || Mnemonic == "bxns" || Mnemonic == "blxns" || - Mnemonic == "vudot" || Mnemonic == "vsdot") + Mnemonic == "vudot" || Mnemonic == "vsdot" || + Mnemonic == "vcmla" || Mnemonic == "vcadd") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5521,7 +5572,8 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || (FullInst.startswith("vmull") && FullInst.endswith(".p64")) || Mnemonic == "vmovx" || Mnemonic == "vins" || - Mnemonic == "vudot" || Mnemonic == "vsdot") { + Mnemonic == "vudot" || Mnemonic == "vsdot" || + Mnemonic == "vcmla" || Mnemonic == "vcadd") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { @@ -5810,6 +5862,52 @@ static bool RequiresVFPRegListValidation(StringRef Inst, return false; } +// The GNU assembler has aliases of ldrd and strd with the second register +// omitted. We don't have a way to do that in tablegen, so fix it up here. +// +// We have to be careful to not emit an invalid Rt2 here, because the rest of +// the assmebly parser could then generate confusing diagnostics refering to +// it. If we do find anything that prevents us from doing the transformation we +// bail out, and let the assembly parser report an error on the instruction as +// it is written. +void ARMAsmParser::fixupGNULDRDAlias(StringRef Mnemonic, + OperandVector &Operands) { + if (Mnemonic != "ldrd" && Mnemonic != "strd") + return; + if (Operands.size() < 4) + return; + + ARMOperand &Op2 = static_cast(*Operands[2]); + ARMOperand &Op3 = static_cast(*Operands[3]); + + if (!Op2.isReg()) + return; + if (!Op3.isMem()) + return; + + const MCRegisterClass &GPR = MRI->getRegClass(ARM::GPRRegClassID); + if (!GPR.contains(Op2.getReg())) + return; + + unsigned RtEncoding = MRI->getEncodingValue(Op2.getReg()); + if (!isThumb() && (RtEncoding & 1)) { + // In ARM mode, the registers must be from an aligned pair, this + // restriction does not apply in Thumb mode. + return; + } + if (Op2.getReg() == ARM::PC) + return; + unsigned PairedReg = GPR.getRegister(RtEncoding + 1); + if (!PairedReg || PairedReg == ARM::PC || + (PairedReg == ARM::SP && !hasV8Ops())) + return; + + Operands.insert( + Operands.begin() + 3, + ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc())); + return; +} + /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { @@ -6052,25 +6150,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } - // GNU Assembler extension (compatibility) - if ((Mnemonic == "ldrd" || Mnemonic == "strd")) { - ARMOperand &Op2 = static_cast(*Operands[2]); - ARMOperand &Op3 = static_cast(*Operands[3]); - if (Op3.isMem()) { - assert(Op2.isReg() && "expected register argument"); - - unsigned SuperReg = MRI->getMatchingSuperReg( - Op2.getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID)); - - assert(SuperReg && "expected register pair"); - - unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1); - - Operands.insert( - Operands.begin() + 3, - ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc())); - } - } + // GNU Assembler extension (compatibility). + fixupGNULDRDAlias(Mnemonic, Operands); // FIXME: As said above, this is all a pretty gross hack. This instruction // does not fit with other "subs" and tblgen. @@ -8875,19 +8956,19 @@ bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const { } unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, - uint64_t &ErrorInfo, + SmallVectorImpl &NearMisses, bool MatchingInlineAsm, bool &EmitInITBlock, MCStreamer &Out) { // If we can't use an implicit IT block here, just match as normal. if (inExplicitITBlock() || !isThumbTwo() || !useImplicitITThumb()) - return MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + return MatchInstructionImpl(Operands, Inst, &NearMisses, MatchingInlineAsm); // Try to match the instruction in an extension of the current IT block (if // there is one). if (inImplicitITBlock()) { extendImplicitITBlock(ITState.Cond); - if (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm) == + if (MatchInstructionImpl(Operands, Inst, nullptr, MatchingInlineAsm) == Match_Success) { // The match succeded, but we still have to check that the instruction is // valid in this implicit IT block. @@ -8913,7 +8994,7 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, // Finish the current IT block, and try to match outside any IT block. flushPendingInstructions(Out); unsigned PlainMatchResult = - MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + MatchInstructionImpl(Operands, Inst, &NearMisses, MatchingInlineAsm); if (PlainMatchResult == Match_Success) { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); if (MCID.isPredicable()) { @@ -8940,7 +9021,7 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, // condition, so we create an IT block with a dummy condition, and fix it up // once we know the actual condition. startImplicitITBlock(); - if (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm) == + if (MatchInstructionImpl(Operands, Inst, nullptr, MatchingInlineAsm) == Match_Success) { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); if (MCID.isPredicable()) { @@ -8959,7 +9040,8 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, return PlainMatchResult; } -std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS); +static std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS, + unsigned VariantID = 0); static const char *getSubtargetFeatureName(uint64_t Val); bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -8970,16 +9052,10 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, unsigned MatchResult; bool PendConditionalInstruction = false; - MatchResult = MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm, + SmallVector NearMisses; + MatchResult = MatchInstruction(Operands, Inst, NearMisses, MatchingInlineAsm, PendConditionalInstruction, Out); - SMLoc ErrorLoc; - if (ErrorInfo < Operands.size()) { - ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); - if (ErrorLoc == SMLoc()) - ErrorLoc = IDLoc; - } - switch (MatchResult) { case Match_Success: // Context sensitive operand constraints aren't handled by the matcher, @@ -9027,33 +9103,9 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, Out.EmitInstruction(Inst, getSTI()); } return false; - case Match_MissingFeature: { - assert(ErrorInfo && "Unknown missing feature!"); - // Special case the error message for the very common case where only - // a single subtarget feature is missing (Thumb vs. ARM, e.g.). - std::string Msg = "instruction requires:"; - uint64_t Mask = 1; - for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { - if (ErrorInfo & Mask) { - Msg += " "; - Msg += getSubtargetFeatureName(ErrorInfo & Mask); - } - Mask <<= 1; - } - return Error(IDLoc, Msg); - } - case Match_InvalidOperand: { - SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0ULL) { - if (ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); - - ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - } - - return Error(ErrorLoc, "invalid operand for instruction"); - } + case Match_NearMisses: + ReportNearMisses(NearMisses, IDLoc, Operands); + return true; case Match_MnemonicFail: { uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); std::string Suggestion = ARMMnemonicSpellCheck( @@ -9061,100 +9113,6 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "invalid instruction" + Suggestion, ((ARMOperand &)*Operands[0]).getLocRange()); } - case Match_RequiresNotITBlock: - return Error(IDLoc, "flag setting instruction only valid outside IT block"); - case Match_RequiresITBlock: - return Error(IDLoc, "instruction only valid inside IT block"); - case Match_RequiresV6: - return Error(IDLoc, "instruction variant requires ARMv6 or later"); - case Match_RequiresThumb2: - return Error(IDLoc, "instruction variant requires Thumb2"); - case Match_RequiresV8: - return Error(IDLoc, "instruction variant requires ARMv8 or later"); - case Match_RequiresFlagSetting: - return Error(IDLoc, "no flag-preserving variant of this instruction available"); - case Match_ImmRange0_1: - return Error(ErrorLoc, "immediate operand must be in the range [0,1]"); - case Match_ImmRange0_3: - return Error(ErrorLoc, "immediate operand must be in the range [0,3]"); - case Match_ImmRange0_7: - return Error(ErrorLoc, "immediate operand must be in the range [0,7]"); - case Match_ImmRange0_15: - return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); - case Match_ImmRange0_31: - return Error(ErrorLoc, "immediate operand must be in the range [0,31]"); - case Match_ImmRange0_32: - return Error(ErrorLoc, "immediate operand must be in the range [0,32]"); - case Match_ImmRange0_63: - return Error(ErrorLoc, "immediate operand must be in the range [0,63]"); - case Match_ImmRange0_239: - return Error(ErrorLoc, "immediate operand must be in the range [0,239]"); - case Match_ImmRange0_255: - return Error(ErrorLoc, "immediate operand must be in the range [0,255]"); - case Match_ImmRange0_4095: - return Error(ErrorLoc, "immediate operand must be in the range [0,4095]"); - case Match_ImmRange0_65535: - return Error(ErrorLoc, "immediate operand must be in the range [0,65535]"); - case Match_ImmRange1_7: - return Error(ErrorLoc, "immediate operand must be in the range [1,7]"); - case Match_ImmRange1_8: - return Error(ErrorLoc, "immediate operand must be in the range [1,8]"); - case Match_ImmRange1_15: - return Error(ErrorLoc, "immediate operand must be in the range [1,15]"); - case Match_ImmRange1_16: - return Error(ErrorLoc, "immediate operand must be in the range [1,16]"); - case Match_ImmRange1_31: - return Error(ErrorLoc, "immediate operand must be in the range [1,31]"); - case Match_ImmRange1_32: - return Error(ErrorLoc, "immediate operand must be in the range [1,32]"); - case Match_ImmRange1_64: - return Error(ErrorLoc, "immediate operand must be in the range [1,64]"); - case Match_ImmRange8_8: - return Error(ErrorLoc, "immediate operand must be 8."); - case Match_ImmRange16_16: - return Error(ErrorLoc, "immediate operand must be 16."); - case Match_ImmRange32_32: - return Error(ErrorLoc, "immediate operand must be 32."); - case Match_ImmRange256_65535: - return Error(ErrorLoc, "immediate operand must be in the range [255,65535]"); - case Match_ImmRange0_16777215: - return Error(ErrorLoc, "immediate operand must be in the range [0,0xffffff]"); - case Match_AlignedMemoryRequiresNone: - case Match_DupAlignedMemoryRequiresNone: - case Match_AlignedMemoryRequires16: - case Match_DupAlignedMemoryRequires16: - case Match_AlignedMemoryRequires32: - case Match_DupAlignedMemoryRequires32: - case Match_AlignedMemoryRequires64: - case Match_DupAlignedMemoryRequires64: - case Match_AlignedMemoryRequires64or128: - case Match_DupAlignedMemoryRequires64or128: - case Match_AlignedMemoryRequires64or128or256: - { - SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getAlignmentLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - switch (MatchResult) { - default: - llvm_unreachable("Missing Match_Aligned type"); - case Match_AlignedMemoryRequiresNone: - case Match_DupAlignedMemoryRequiresNone: - return Error(ErrorLoc, "alignment must be omitted"); - case Match_AlignedMemoryRequires16: - case Match_DupAlignedMemoryRequires16: - return Error(ErrorLoc, "alignment must be 16 or omitted"); - case Match_AlignedMemoryRequires32: - case Match_DupAlignedMemoryRequires32: - return Error(ErrorLoc, "alignment must be 32 or omitted"); - case Match_AlignedMemoryRequires64: - case Match_DupAlignedMemoryRequires64: - return Error(ErrorLoc, "alignment must be 64 or omitted"); - case Match_AlignedMemoryRequires64or128: - case Match_DupAlignedMemoryRequires64or128: - return Error(ErrorLoc, "alignment must be 64, 128 or omitted"); - case Match_AlignedMemoryRequires64or128or256: - return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted"); - } - } } llvm_unreachable("Implement any new match types added!"); @@ -10163,8 +10121,206 @@ extern "C" void LLVMInitializeARMAsmParser() { #define GET_REGISTER_MATCHER #define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER #include "ARMGenAsmMatcher.inc" +// Some diagnostics need to vary with subtarget features, so they are handled +// here. For example, the DPR class has either 16 or 32 registers, depending +// on the FPU available. +const char * +ARMAsmParser::getCustomOperandDiag(ARMMatchResultTy MatchError) { + switch (MatchError) { + // rGPR contains sp starting with ARMv8. + case Match_rGPR: + return hasV8Ops() ? "operand must be a register in range [r0, r14]" + : "operand must be a register in range [r0, r12] or r14"; + // DPR contains 16 registers for some FPUs, and 32 for others. + case Match_DPR: + return hasD16() ? "operand must be a register in range [d0, d15]" + : "operand must be a register in range [d0, d31]"; + + // For all other diags, use the static string from tablegen. + default: + return getMatchKindDiag(MatchError); + } +} + +// Process the list of near-misses, throwing away ones we don't want to report +// to the user, and converting the rest to a source location and string that +// should be reported. +void +ARMAsmParser::FilterNearMisses(SmallVectorImpl &NearMissesIn, + SmallVectorImpl &NearMissesOut, + SMLoc IDLoc, OperandVector &Operands) { + // TODO: If operand didn't match, sub in a dummy one and run target + // predicate, so that we can avoid reporting near-misses that are invalid? + // TODO: Many operand types dont have SuperClasses set, so we report + // redundant ones. + // TODO: Some operands are superclasses of registers (e.g. + // MCK_RegShiftedImm), we don't have any way to represent that currently. + // TODO: This is not all ARM-specific, can some of it be factored out? + + // Record some information about near-misses that we have already seen, so + // that we can avoid reporting redundant ones. For example, if there are + // variants of an instruction that take 8- and 16-bit immediates, we want + // to only report the widest one. + std::multimap OperandMissesSeen; + SmallSet FeatureMissesSeen; + + // Process the near-misses in reverse order, so that we see more general ones + // first, and so can avoid emitting more specific ones. + for (NearMissInfo &I : reverse(NearMissesIn)) { + switch (I.getKind()) { + case NearMissInfo::NearMissOperand: { + SMLoc OperandLoc = + ((ARMOperand &)*Operands[I.getOperandIndex()]).getStartLoc(); + const char *OperandDiag = + getCustomOperandDiag((ARMMatchResultTy)I.getOperandError()); + + // If we have already emitted a message for a superclass, don't also report + // the sub-class. We consider all operand classes that we don't have a + // specialised diagnostic for to be equal for the propose of this check, + // so that we don't report the generic error multiple times on the same + // operand. + unsigned DupCheckMatchClass = OperandDiag ? I.getOperandClass() : ~0U; + auto PrevReports = OperandMissesSeen.equal_range(I.getOperandIndex()); + if (std::any_of(PrevReports.first, PrevReports.second, + [DupCheckMatchClass]( + const std::pair Pair) { + if (DupCheckMatchClass == ~0U || Pair.second == ~0U) + return Pair.second == DupCheckMatchClass; + else + return isSubclass((MatchClassKind)DupCheckMatchClass, + (MatchClassKind)Pair.second); + })) + break; + OperandMissesSeen.insert( + std::make_pair(I.getOperandIndex(), DupCheckMatchClass)); + + NearMissMessage Message; + Message.Loc = OperandLoc; + if (OperandDiag) { + Message.Message = OperandDiag; + } else if (I.getOperandClass() == InvalidMatchClass) { + Message.Message = "too many operands for instruction"; + } else { + Message.Message = "invalid operand for instruction"; + DEBUG(dbgs() << "Missing diagnostic string for operand class " << + getMatchClassName((MatchClassKind)I.getOperandClass()) + << I.getOperandClass() << ", error " << I.getOperandError() + << ", opcode " << MII.getName(I.getOpcode()) << "\n"); + } + NearMissesOut.emplace_back(Message); + break; + } + case NearMissInfo::NearMissFeature: { + uint64_t MissingFeatures = I.getFeatures(); + // Don't report the same set of features twice. + if (FeatureMissesSeen.count(MissingFeatures)) + break; + FeatureMissesSeen.insert(MissingFeatures); + + // Special case: don't report a feature set which includes arm-mode for + // targets that don't have ARM mode. + if ((MissingFeatures & Feature_IsARM) && !hasARM()) + break; + // Don't report any near-misses that both require switching instruction + // set, and adding other subtarget features. + if (isThumb() && (MissingFeatures & Feature_IsARM) && + (MissingFeatures & ~Feature_IsARM)) + break; + if (!isThumb() && (MissingFeatures & Feature_IsThumb) && + (MissingFeatures & ~Feature_IsThumb)) + break; + if (!isThumb() && (MissingFeatures & Feature_IsThumb2) && + (MissingFeatures & ~(Feature_IsThumb2 | Feature_IsThumb))) + break; + + NearMissMessage Message; + Message.Loc = IDLoc; + raw_svector_ostream OS(Message.Message); + + OS << "instruction requires:"; + uint64_t Mask = 1; + for (unsigned MaskPos = 0; MaskPos < (sizeof(MissingFeatures) * 8 - 1); + ++MaskPos) { + if (MissingFeatures & Mask) { + OS << " " << getSubtargetFeatureName(MissingFeatures & Mask); + } + Mask <<= 1; + } + NearMissesOut.emplace_back(Message); + + break; + } + case NearMissInfo::NearMissPredicate: { + NearMissMessage Message; + Message.Loc = IDLoc; + switch (I.getPredicateError()) { + case Match_RequiresNotITBlock: + Message.Message = "flag setting instruction only valid outside IT block"; + break; + case Match_RequiresITBlock: + Message.Message = "instruction only valid inside IT block"; + break; + case Match_RequiresV6: + Message.Message = "instruction variant requires ARMv6 or later"; + break; + case Match_RequiresThumb2: + Message.Message = "instruction variant requires Thumb2"; + break; + case Match_RequiresV8: + Message.Message = "instruction variant requires ARMv8 or later"; + break; + case Match_RequiresFlagSetting: + Message.Message = "no flag-preserving variant of this instruction available"; + break; + case Match_InvalidOperand: + Message.Message = "invalid operand for instruction"; + break; + default: + llvm_unreachable("Unhandled target predicate error"); + break; + } + NearMissesOut.emplace_back(Message); + break; + } + case NearMissInfo::NearMissTooFewOperands: { + SMLoc EndLoc = ((ARMOperand &)*Operands.back()).getEndLoc(); + NearMissesOut.emplace_back( + NearMissMessage{ EndLoc, StringRef("too few operands for instruction") }); + break; + } + case NearMissInfo::NoNearMiss: + // This should never leave the matcher. + llvm_unreachable("not a near-miss"); + break; + } + } +} + +void ARMAsmParser::ReportNearMisses(SmallVectorImpl &NearMisses, + SMLoc IDLoc, OperandVector &Operands) { + SmallVector Messages; + FilterNearMisses(NearMisses, Messages, IDLoc, Operands); + + if (Messages.size() == 0) { + // No near-misses were found, so the best we can do is "invalid + // instruction". + Error(IDLoc, "invalid instruction"); + } else if (Messages.size() == 1) { + // One near miss was found, report it as the sole error. + Error(Messages[0].Loc, Messages[0].Message); + } else { + // More than one near miss, so report a generic "invalid instruction" + // error, followed by notes for each of the near-misses. + Error(IDLoc, "invalid instruction, any one of the following would fix this:"); + for (auto &M : Messages) { + Note(M.Loc, M.Message); + } + } +} + // FIXME: This structure should be moved inside ARMTargetParser // when we start to table-generate them, and we can use the ARM // flags below, that were generated by table-gen. @@ -10275,7 +10431,7 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, case MCK_rGPR: if (hasV8Ops() && Op.isReg() && Op.getReg() == ARM::SP) return Match_Success; - break; + return Match_rGPR; case MCK_GPRPair: if (Op.isReg() && MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg())) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e3854989c4fae..a29a2eeccfe87 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -322,6 +322,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); @@ -5215,6 +5219,39 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); + unsigned Vn = (fieldFromInstruction(Insn, 16, 4) << 0); + Vn |= (fieldFromInstruction(Insn, 7, 1) << 4); + unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); + unsigned q = (fieldFromInstruction(Insn, 6, 1) << 0); + unsigned rotate = (fieldFromInstruction(Insn, 20, 2) << 0); + + DecodeStatus S = MCDisassembler::Success; + + auto DestRegDecoder = q ? DecodeQPRRegisterClass : DecodeDPRRegisterClass; + + if (!Check(S, DestRegDecoder(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DestRegDecoder(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DestRegDecoder(Inst, Vn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder))) + return MCDisassembler::Fail; + // The lane index does not have any bits in the encoding, because it can only + // be 0. + Inst.addOperand(MCOperand::createImm(0)); + Inst.addOperand(MCOperand::createImm(rotate)); + + return S; +} + static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -5303,8 +5340,14 @@ static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val, } else Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)); - Inst.addOperand(MCOperand::createImm(ARMCC::AL)); - Inst.addOperand(MCOperand::createReg(0)); + if (featureBits[ARM::ModeThumb]) { + Inst.addOperand(MCOperand::createImm(ARMCC::AL)); + Inst.addOperand(MCOperand::createReg(0)); + } else { + unsigned pred = fieldFromInstruction(Val, 28, 4); + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + } return S; } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index be6815af2eb2b..4fc67a4f6eb5e 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1535,3 +1535,12 @@ void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, printRegName(O, MI->getOperand(OpNum).getReg() + 6); O << "}"; } + +template +void ARMInstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); + O << "#" << (Val * Angle) + Remainder; +} + diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 86873a3a6ccbb..7dc311229cca8 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -231,6 +231,9 @@ class ARMInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template + void printComplexRotationOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); }; } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 3959eab966a84..f472b21543143 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -38,11 +38,9 @@ namespace ARM_AM { add }; - static inline const char *getAddrOpcStr(AddrOpc Op) { - return Op == sub ? "-" : ""; - } + inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; } - static inline const char *getShiftOpcStr(ShiftOpc Op) { + inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return "asr"; @@ -53,7 +51,7 @@ namespace ARM_AM { } } - static inline unsigned getShiftOpcEncoding(ShiftOpc Op) { + inline unsigned getShiftOpcEncoding(ShiftOpc Op) { switch (Op) { default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return 2; @@ -71,7 +69,7 @@ namespace ARM_AM { db }; - static inline const char *getAMSubModeStr(AMSubMode Mode) { + inline const char *getAMSubModeStr(AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unknown addressing sub-mode!"); case ARM_AM::ia: return "ia"; @@ -83,14 +81,14 @@ namespace ARM_AM { /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits. /// - static inline unsigned rotr32(unsigned Val, unsigned Amt) { + inline unsigned rotr32(unsigned Val, unsigned Amt) { assert(Amt < 32 && "Invalid rotate amount"); return (Val >> Amt) | (Val << ((32-Amt)&31)); } /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits. /// - static inline unsigned rotl32(unsigned Val, unsigned Amt) { + inline unsigned rotl32(unsigned Val, unsigned Amt) { assert(Amt < 32 && "Invalid rotate amount"); return (Val << Amt) | (Val >> ((32-Amt)&31)); } @@ -109,32 +107,24 @@ namespace ARM_AM { // reg, the second is the shift amount (or reg0 if not present or imm). The // third operand encodes the shift opcode and the imm if a reg isn't present. // - static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) { + inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) { return ShOp | (Imm << 3); } - static inline unsigned getSORegOffset(unsigned Op) { - return Op >> 3; - } - static inline ShiftOpc getSORegShOp(unsigned Op) { - return (ShiftOpc)(Op & 7); - } + inline unsigned getSORegOffset(unsigned Op) { return Op >> 3; } + inline ShiftOpc getSORegShOp(unsigned Op) { return (ShiftOpc)(Op & 7); } /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return /// the 8-bit imm value. - static inline unsigned getSOImmValImm(unsigned Imm) { - return Imm & 0xFF; - } + inline unsigned getSOImmValImm(unsigned Imm) { return Imm & 0xFF; } /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return /// the rotate amount. - static inline unsigned getSOImmValRot(unsigned Imm) { - return (Imm >> 8) * 2; - } + inline unsigned getSOImmValRot(unsigned Imm) { return (Imm >> 8) * 2; } /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand, /// computing the rotate amount to use. If this immediate value cannot be /// handled with a single shifter-op, determine a good rotate amount that will /// take a maximal chunk of bits out of the immediate. - static inline unsigned getSOImmValRotate(unsigned Imm) { + inline unsigned getSOImmValRotate(unsigned Imm) { // 8-bit (or less) immediates are trivially shifter_operands with a rotate // of zero. if ((Imm & ~255U) == 0) return 0; @@ -168,7 +158,7 @@ namespace ARM_AM { /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit /// into an shifter_operand immediate operand, return the 12-bit encoding for /// it. If not, return -1. - static inline int getSOImmVal(unsigned Arg) { + inline int getSOImmVal(unsigned Arg) { // 8-bit (or less) immediates are trivially shifter_operands with a rotate // of zero. if ((Arg & ~255U) == 0) return Arg; @@ -185,7 +175,7 @@ namespace ARM_AM { /// isSOImmTwoPartVal - Return true if the specified value can be obtained by /// or'ing together two SOImmVal's. - static inline bool isSOImmTwoPartVal(unsigned V) { + inline bool isSOImmTwoPartVal(unsigned V) { // If this can be handled with a single shifter_op, bail out. V = rotr32(~255U, getSOImmValRotate(V)) & V; if (V == 0) @@ -198,13 +188,13 @@ namespace ARM_AM { /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, /// return the first chunk of it. - static inline unsigned getSOImmTwoPartFirst(unsigned V) { + inline unsigned getSOImmTwoPartFirst(unsigned V) { return rotr32(255U, getSOImmValRotate(V)) & V; } /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, /// return the second chunk of it. - static inline unsigned getSOImmTwoPartSecond(unsigned V) { + inline unsigned getSOImmTwoPartSecond(unsigned V) { // Mask out the first hunk. V = rotr32(~255U, getSOImmValRotate(V)) & V; @@ -215,7 +205,7 @@ namespace ARM_AM { /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed /// by a left shift. Returns the shift amount to use. - static inline unsigned getThumbImmValShift(unsigned Imm) { + inline unsigned getThumbImmValShift(unsigned Imm) { // 8-bit (or less) immediates are trivially immediate operand with a shift // of zero. if ((Imm & ~255U) == 0) return 0; @@ -226,7 +216,7 @@ namespace ARM_AM { /// isThumbImmShiftedVal - Return true if the specified value can be obtained /// by left shifting a 8-bit immediate. - static inline bool isThumbImmShiftedVal(unsigned V) { + inline bool isThumbImmShiftedVal(unsigned V) { // If this can be handled with V = (~255U << getThumbImmValShift(V)) & V; return V == 0; @@ -234,7 +224,7 @@ namespace ARM_AM { /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed /// by a left shift. Returns the shift amount to use. - static inline unsigned getThumbImm16ValShift(unsigned Imm) { + inline unsigned getThumbImm16ValShift(unsigned Imm) { // 16-bit (or less) immediates are trivially immediate operand with a shift // of zero. if ((Imm & ~65535U) == 0) return 0; @@ -245,7 +235,7 @@ namespace ARM_AM { /// isThumbImm16ShiftedVal - Return true if the specified value can be /// obtained by left shifting a 16-bit immediate. - static inline bool isThumbImm16ShiftedVal(unsigned V) { + inline bool isThumbImm16ShiftedVal(unsigned V) { // If this can be handled with V = (~65535U << getThumbImm16ValShift(V)) & V; return V == 0; @@ -253,7 +243,7 @@ namespace ARM_AM { /// getThumbImmNonShiftedVal - If V is a value that satisfies /// isThumbImmShiftedVal, return the non-shiftd value. - static inline unsigned getThumbImmNonShiftedVal(unsigned V) { + inline unsigned getThumbImmNonShiftedVal(unsigned V) { return V >> getThumbImmValShift(V); } @@ -267,7 +257,7 @@ namespace ARM_AM { /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3 /// Return -1 if none of the above apply. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmValSplatVal(unsigned V) { + inline int getT2SOImmValSplatVal(unsigned V) { unsigned u, Vs, Imm; // control = 0 if ((V & 0xffffff00) == 0) @@ -295,7 +285,7 @@ namespace ARM_AM { /// specified value is a rotated 8-bit value. Return -1 if no rotation /// encoding is possible. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmValRotateVal(unsigned V) { + inline int getT2SOImmValRotateVal(unsigned V) { unsigned RotAmt = countLeadingZeros(V); if (RotAmt >= 24) return -1; @@ -311,7 +301,7 @@ namespace ARM_AM { /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit /// encoding for it. If not, return -1. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmVal(unsigned Arg) { + inline int getT2SOImmVal(unsigned Arg) { // If 'Arg' is an 8-bit splat, then get the encoded value. int Splat = getT2SOImmValSplatVal(Arg); if (Splat != -1) @@ -325,14 +315,14 @@ namespace ARM_AM { return -1; } - static inline unsigned getT2SOImmValRotate(unsigned V) { + inline unsigned getT2SOImmValRotate(unsigned V) { if ((V & ~255U) == 0) return 0; // Use CTZ to compute the rotate amount. unsigned RotAmt = countTrailingZeros(V); return (32 - RotAmt) & 31; } - static inline bool isT2SOImmTwoPartVal (unsigned Imm) { + inline bool isT2SOImmTwoPartVal(unsigned Imm) { unsigned V = Imm; // Passing values can be any combination of splat values and shifter // values. If this can be handled with a single shifter or splat, bail @@ -359,7 +349,7 @@ namespace ARM_AM { return false; } - static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) { + inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) { assert (isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"); // Try a shifter operand as one part @@ -376,7 +366,7 @@ namespace ARM_AM { return Imm & 0x00ff00ffU; } - static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) { + inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) { // Mask out the first hunk Imm ^= getT2SOImmTwoPartFirst(Imm); // Return what's left @@ -404,25 +394,22 @@ namespace ARM_AM { // and code rewriting), this operand will have the form: FI#, reg0, // with no shift amount for the frame offset. // - static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, - unsigned IdxMode = 0) { + inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, + unsigned IdxMode = 0) { assert(Imm12 < (1 << 12) && "Imm too large!"); bool isSub = Opc == sub; return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ; } - static inline unsigned getAM2Offset(unsigned AM2Opc) { + inline unsigned getAM2Offset(unsigned AM2Opc) { return AM2Opc & ((1 << 12)-1); } - static inline AddrOpc getAM2Op(unsigned AM2Opc) { + inline AddrOpc getAM2Op(unsigned AM2Opc) { return ((AM2Opc >> 12) & 1) ? sub : add; } - static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { + inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { return (ShiftOpc)((AM2Opc >> 13) & 7); } - static inline unsigned getAM2IdxMode(unsigned AM2Opc) { - return (AM2Opc >> 16); - } - + inline unsigned getAM2IdxMode(unsigned AM2Opc) { return (AM2Opc >> 16); } //===--------------------------------------------------------------------===// // Addressing Mode #3 @@ -439,20 +426,16 @@ namespace ARM_AM { // index mode. /// getAM3Opc - This function encodes the addrmode3 opc field. - static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, - unsigned IdxMode = 0) { + inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, + unsigned IdxMode = 0) { bool isSub = Opc == sub; return ((int)isSub << 8) | Offset | (IdxMode << 9); } - static inline unsigned char getAM3Offset(unsigned AM3Opc) { - return AM3Opc & 0xFF; - } - static inline AddrOpc getAM3Op(unsigned AM3Opc) { + inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; } + inline AddrOpc getAM3Op(unsigned AM3Opc) { return ((AM3Opc >> 8) & 1) ? sub : add; } - static inline unsigned getAM3IdxMode(unsigned AM3Opc) { - return (AM3Opc >> 9); - } + inline unsigned getAM3IdxMode(unsigned AM3Opc) { return (AM3Opc >> 9); } //===--------------------------------------------------------------------===// // Addressing Mode #4 @@ -469,13 +452,11 @@ namespace ARM_AM { // DB - Decrement before // For VFP instructions, only the IA and DB modes are valid. - static inline AMSubMode getAM4SubMode(unsigned Mode) { + inline AMSubMode getAM4SubMode(unsigned Mode) { return (AMSubMode)(Mode & 0x7); } - static inline unsigned getAM4ModeImm(AMSubMode SubMode) { - return (int)SubMode; - } + inline unsigned getAM4ModeImm(AMSubMode SubMode) { return (int)SubMode; } //===--------------------------------------------------------------------===// // Addressing Mode #5 @@ -489,14 +470,12 @@ namespace ARM_AM { // operation (add or subtract) in bit 8 and the immediate in bits 0-7. /// getAM5Opc - This function encodes the addrmode5 opc field. - static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { + inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { bool isSub = Opc == sub; return ((int)isSub << 8) | Offset; } - static inline unsigned char getAM5Offset(unsigned AM5Opc) { - return AM5Opc & 0xFF; - } - static inline AddrOpc getAM5Op(unsigned AM5Opc) { + inline unsigned char getAM5Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; } + inline AddrOpc getAM5Op(unsigned AM5Opc) { return ((AM5Opc >> 8) & 1) ? sub : add; } @@ -512,14 +491,14 @@ namespace ARM_AM { // operation (add or subtract) in bit 8 and the immediate in bits 0-7. /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field. - static inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) { + inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) { bool isSub = Opc == sub; return ((int)isSub << 8) | Offset; } - static inline unsigned char getAM5FP16Offset(unsigned AM5Opc) { + inline unsigned char getAM5FP16Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; } - static inline AddrOpc getAM5FP16Op(unsigned AM5Opc) { + inline AddrOpc getAM5FP16Op(unsigned AM5Opc) { return ((AM5Opc >> 8) & 1) ? sub : add; } @@ -548,20 +527,18 @@ namespace ARM_AM { // the "Cmode" field of the instruction. The interfaces below treat the // Op and Cmode values as a single 5-bit value. - static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) { + inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) { return (OpCmode << 8) | Val; } - static inline unsigned getNEONModImmOpCmode(unsigned ModImm) { + inline unsigned getNEONModImmOpCmode(unsigned ModImm) { return (ModImm >> 8) & 0x1f; } - static inline unsigned getNEONModImmVal(unsigned ModImm) { - return ModImm & 0xff; - } + inline unsigned getNEONModImmVal(unsigned ModImm) { return ModImm & 0xff; } /// decodeNEONModImm - Decode a NEON modified immediate value into the /// element value and the element size in bits. (If the element size is /// smaller than the vector, it is splatted into all the elements.) - static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) { + inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) { unsigned OpCmode = getNEONModImmOpCmode(ModImm); unsigned Imm8 = getNEONModImmVal(ModImm); uint64_t Val = 0; @@ -599,7 +576,7 @@ namespace ARM_AM { } // Generic validation for single-byte immediate (0X00, 00X0, etc). - static inline bool isNEONBytesplat(unsigned Value, unsigned Size) { + inline bool isNEONBytesplat(unsigned Value, unsigned Size) { assert(Size >= 1 && Size <= 4 && "Invalid size"); unsigned count = 0; for (unsigned i = 0; i < Size; ++i) { @@ -610,7 +587,7 @@ namespace ARM_AM { } /// Checks if Value is a correct immediate for instructions like VBIC/VORR. - static inline bool isNEONi16splat(unsigned Value) { + inline bool isNEONi16splat(unsigned Value) { if (Value > 0xffff) return false; // i16 value with set bits only in one byte X0 or 0X. @@ -618,7 +595,7 @@ namespace ARM_AM { } // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR - static inline unsigned encodeNEONi16splat(unsigned Value) { + inline unsigned encodeNEONi16splat(unsigned Value) { assert(isNEONi16splat(Value) && "Invalid NEON splat value"); if (Value >= 0x100) Value = (Value >> 8) | 0xa00; @@ -628,13 +605,13 @@ namespace ARM_AM { } /// Checks if Value is a correct immediate for instructions like VBIC/VORR. - static inline bool isNEONi32splat(unsigned Value) { + inline bool isNEONi32splat(unsigned Value) { // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X. return Value == 0 || isNEONBytesplat(Value, 4); } /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR. - static inline unsigned encodeNEONi32splat(unsigned Value) { + inline unsigned encodeNEONi32splat(unsigned Value) { assert(isNEONi32splat(Value) && "Invalid NEON splat value"); if (Value >= 0x100 && Value <= 0xff00) Value = (Value >> 8) | 0x200; @@ -648,7 +625,7 @@ namespace ARM_AM { //===--------------------------------------------------------------------===// // Floating-point Immediates // - static inline float getFPImmFloat(unsigned Imm) { + inline float getFPImmFloat(unsigned Imm) { // We expect an 8-bit binary encoding of a floating-point number here. union { uint32_t I; @@ -676,7 +653,7 @@ namespace ARM_AM { /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. - static inline int getFP16Imm(const APInt &Imm) { + inline int getFP16Imm(const APInt &Imm) { uint32_t Sign = Imm.lshr(15).getZExtValue() & 1; int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15 int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits @@ -695,14 +672,14 @@ namespace ARM_AM { return ((int)Sign << 7) | (Exp << 4) | Mantissa; } - static inline int getFP16Imm(const APFloat &FPImm) { + inline int getFP16Imm(const APFloat &FPImm) { return getFP16Imm(FPImm.bitcastToAPInt()); } /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. - static inline int getFP32Imm(const APInt &Imm) { + inline int getFP32Imm(const APInt &Imm) { uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits @@ -723,14 +700,14 @@ namespace ARM_AM { return ((int)Sign << 7) | (Exp << 4) | Mantissa; } - static inline int getFP32Imm(const APFloat &FPImm) { + inline int getFP32Imm(const APFloat &FPImm) { return getFP32Imm(FPImm.bitcastToAPInt()); } /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. - static inline int getFP64Imm(const APInt &Imm) { + inline int getFP64Imm(const APInt &Imm) { uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL; @@ -751,7 +728,7 @@ namespace ARM_AM { return ((int)Sign << 7) | (Exp << 4) | Mantissa; } - static inline int getFP64Imm(const APFloat &FPImm) { + inline int getFP64Imm(const APFloat &FPImm) { return getFP64Imm(FPImm.bitcastToAPInt()); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index bd729fabedf5a..989bd552a3764 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -23,7 +23,8 @@ class ARMAsmBackendDarwin : public ARMAsmBackend { : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) { } - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM, Subtype); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h index 748f915be17bb..703567d6006f5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h @@ -22,7 +22,8 @@ class ARMAsmBackendELF : public ARMAsmBackend { bool IsLittle) : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createARMELFObjectWriter(OS, OSABI, isLittle()); } }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h index 2a375be49a830..2c48dcccab1b4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h @@ -18,7 +18,8 @@ class ARMAsmBackendWinCOFF : public ARMAsmBackend { public: ARMAsmBackendWinCOFF(const Target &T, const Triple &TheTriple) : ARMAsmBackend(T, TheTriple, true) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); } }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 31f081b77bd93..17da82b4ca391 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -343,6 +343,7 @@ namespace ARMII { NVExtFrm = 39 << FormShift, NVMulSLFrm = 40 << FormShift, NVTBLFrm = 41 << FormShift, + N3RegCplxFrm = 43 << FormShift, //===------------------------------------------------------------------===// // Misc flags. diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index dc893472096e9..3cd52fe1e7eb1 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -235,9 +236,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, } } -MCObjectWriter *llvm::createARMELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI, - bool IsLittleEndian) { - MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, IsLittleEndian); +std::unique_ptr +llvm::createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, + bool IsLittleEndian) { + return createELFObjectWriter(llvm::make_unique(OSABI), OS, + IsLittleEndian); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index af11fa74bba84..1f995ddba7d71 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -440,9 +440,11 @@ class ARMELFStreamer : public MCELFStreamer { public: friend class ARMTargetELFStreamer; - ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool IsThumb) - : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb) { + ARMELFStreamer(MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, std::unique_ptr Emitter, + bool IsThumb) + : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)), + IsThumb(IsThumb) { EHReset(); } @@ -1485,19 +1487,21 @@ MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, return new ARMTargetStreamer(S); } -MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, +MCELFStreamer *createARMELFStreamer(MCContext &Context, + std::unique_ptr TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, - bool IsThumb) { - ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); - // FIXME: This should eventually end up somewhere else where more - // intelligent flag decisions can be made. For now we are just maintaining - // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. - S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); - - if (RelaxAll) - S->getAssembler().setRelaxAll(true); - return S; + std::unique_ptr Emitter, + bool RelaxAll, bool IsThumb) { + ARMELFStreamer *S = new ARMELFStreamer(Context, std::move(TAB), OS, + std::move(Emitter), IsThumb); + // FIXME: This should eventually end up somewhere else where more + // intelligent flag decisions can be made. For now we are just maintaining + // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. + S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); + + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; } } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 088f59cda766f..ae5bc723ee5fc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -16,6 +16,8 @@ #include "ARMMCAsmInfo.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -198,18 +200,22 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, } static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, - MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll) { - return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, - (T.getArch() == Triple::thumb || - T.getArch() == Triple::thumbeb)); + std::unique_ptr &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, + bool RelaxAll) { + return createARMELFStreamer( + Ctx, std::move(MAB), OS, std::move(Emitter), false, + (T.getArch() == Triple::thumb || T.getArch() == Triple::thumbeb)); } -static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, - bool DWARFMustBeAtTheEnd) { - return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd); +static MCStreamer * +createARMMachOStreamer(MCContext &Ctx, std::unique_ptr &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, bool RelaxAll, + bool DWARFMustBeAtTheEnd) { + return createMachOStreamer(Ctx, std::move(MAB), OS, std::move(Emitter), false, + DWARFMustBeAtTheEnd); } static MCInstPrinter *createARMMCInstPrinter(const Triple &T, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index ba834201e585a..0fb97e5fee977 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H #include "llvm/Support/DataTypes.h" +#include #include namespace llvm { @@ -92,23 +93,27 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T, // Construct a PE/COFF machine code streamer which will generate a PE/COFF // object file. -MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, +MCStreamer *createARMWinCOFFStreamer(MCContext &Context, + std::unique_ptr &&MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, + std::unique_ptr &&Emitter, + bool RelaxAll, bool IncrementalLinkerCompatible); /// Construct an ELF Mach-O object writer. -MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, - bool IsLittleEndian); +std::unique_ptr createARMELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, + bool IsLittleEndian); /// Construct an ARM Mach-O object writer. -MCObjectWriter *createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, - uint32_t CPUType, - uint32_t CPUSubtype); +std::unique_ptr createARMMachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); /// Construct an ARM PE/COFF object writer. -MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit); +std::unique_ptr +createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit); /// Construct ARM Mach-O relocation info. MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index f371699e9fc79..521ae5337e7ac 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -484,11 +484,10 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createARMMachObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) { - return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit, - CPUType, - CPUSubtype), - OS, /*IsLittleEndian=*/true); +std::unique_ptr +llvm::createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, + uint32_t CPUType, uint32_t CPUSubtype) { + return createMachObjectWriter( + llvm::make_unique(Is64Bit, CPUType, CPUSubtype), OS, + /*IsLittleEndian=*/true); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index f74fb2e20b5a3..5e09b126f43fc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/Support/ErrorHandling.h" @@ -90,10 +91,10 @@ bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { namespace llvm { -MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit) { - MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit); - return createWinCOFFObjectWriter(MOTW, OS); +std::unique_ptr +createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit) { + auto MOTW = llvm::make_unique(Is64Bit); + return createWinCOFFObjectWriter(std::move(MOTW), OS); } } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index 83fa084e60c75..ba9246eafa919 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "ARMMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCWinCOFFStreamer.h" using namespace llvm; @@ -15,9 +17,9 @@ using namespace llvm; namespace { class ARMWinCOFFStreamer : public MCWinCOFFStreamer { public: - ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, - raw_pwrite_stream &OS) - : MCWinCOFFStreamer(C, AB, CE, OS) {} + ARMWinCOFFStreamer(MCContext &C, std::unique_ptr AB, + std::unique_ptr CE, raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), OS) {} void EmitAssemblerFlag(MCAssemblerFlag Flag) override; void EmitThumbFunc(MCSymbol *Symbol) override; @@ -38,9 +40,11 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { } MCStreamer *llvm::createARMWinCOFFStreamer( - MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, bool IncrementalLinkerCompatible) { - auto *S = new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS); + MCContext &Context, std::unique_ptr &&MAB, + raw_pwrite_stream &OS, std::unique_ptr &&Emitter, + bool RelaxAll, bool IncrementalLinkerCompatible) { + auto *S = + new ARMWinCOFFStreamer(Context, std::move(MAB), std::move(Emitter), OS); S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); return S; } diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 85abe1d3b7334..4f330e3a884be 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -352,10 +352,36 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); - // Thumb1 does not currently support dynamic stack realignment. Report a - // fatal error rather then silently generate bad code. - if (RegInfo->needsStackRealignment(MF)) - report_fatal_error("Dynamic stack realignment not supported for thumb1."); + if (RegInfo->needsStackRealignment(MF)) { + const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment()); + // Emit the following sequence, using R4 as a temporary, since we cannot use + // SP as a source or destination register for the shifts: + // mov r4, sp + // lsrs r4, r4, #NrBitsToZero + // lsls r4, r4, #NrBitsToZero + // mov sp, r4 + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + .addReg(ARM::SP, RegState::Kill) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) + .addDef(ARM::CPSR) + .addReg(ARM::R4, RegState::Kill) + .addImm(NrBitsToZero) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) + .addDef(ARM::CPSR) + .addReg(ARM::R4, RegState::Kill) + .addImm(NrBitsToZero) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4, RegState::Kill) + .add(predOps(ARMCC::AL)); + + AFI->setShouldRestoreSPFromFP(true); + } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects @@ -876,13 +902,15 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, bool NeedsPop = false; for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); + CalleeSavedInfo &Info = CSI[i-1]; + unsigned Reg = Info.getReg(); // High registers (excluding lr) have already been dealt with if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) continue; if (Reg == ARM::LR) { + Info.setRestored(false); if (MBB.succ_empty()) { // Special epilogue for vararg functions. See emitEpilogue if (isVarArg) diff --git a/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 540e05a929971..d6f85edae47c5 100644 --- a/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -583,8 +583,8 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { unsigned TmpReg = 0; // 0 for no temporary register unsigned SrcReg = MI.getOperand(1).getReg(); bool SrcIsKill = MI.getOperand(1).isKill(); - OpLo = AVR::LDRdPtr; - OpHi = AVR::LDDRdPtrQ; + OpLo = AVR::LDRdPtrPi; + OpHi = AVR::LDRdPtr; TRI->splitReg(DstReg, DstLoReg, DstHiReg); // Use a temporary register if src and dst registers are the same. @@ -597,6 +597,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { // Load low byte. auto MIBLO = buildMI(MBB, MBBI, OpLo) .addReg(CurDstLoReg, RegState::Define) + .addReg(SrcReg, RegState::Define) .addReg(SrcReg); // Push low byte onto stack if necessary. @@ -606,8 +607,7 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { // Load high byte. auto MIBHI = buildMI(MBB, MBBI, OpHi) .addReg(CurDstHiReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)) - .addImm(1); + .addReg(SrcReg, getKillRegState(SrcIsKill)); if (TmpReg) { // Move the high byte into the final destination. @@ -699,7 +699,9 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { OpHi = AVR::LDDRdPtrQ; TRI->splitReg(DstReg, DstLoReg, DstHiReg); - assert(Imm <= 63 && "Offset is out of range"); + // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value + // allowed for the instruction, 62 is the limit here. + assert(Imm <= 62 && "Offset is out of range"); // Use a temporary register if src and dst registers are the same. if (DstReg == SrcReg) @@ -741,7 +743,50 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { - llvm_unreachable("wide LPM is unimplemented"); + MachineInstr &MI = *MBBI; + unsigned OpLo, OpHi, DstLoReg, DstHiReg; + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned TmpReg = 0; // 0 for no temporary register + unsigned SrcReg = MI.getOperand(1).getReg(); + bool SrcIsKill = MI.getOperand(1).isKill(); + OpLo = AVR::LPMRdZPi; + OpHi = AVR::LPMRdZ; + TRI->splitReg(DstReg, DstLoReg, DstHiReg); + + // Use a temporary register if src and dst registers are the same. + if (DstReg == SrcReg) + TmpReg = scavengeGPR8(MI); + + unsigned CurDstLoReg = (DstReg == SrcReg) ? TmpReg : DstLoReg; + unsigned CurDstHiReg = (DstReg == SrcReg) ? TmpReg : DstHiReg; + + // Load low byte. + auto MIBLO = buildMI(MBB, MBBI, OpLo) + .addReg(CurDstLoReg, RegState::Define) + .addReg(SrcReg); + + // Push low byte onto stack if necessary. + if (TmpReg) + buildMI(MBB, MBBI, AVR::PUSHRr).addReg(TmpReg); + + // Load high byte. + auto MIBHI = buildMI(MBB, MBBI, OpHi) + .addReg(CurDstHiReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + + if (TmpReg) { + // Move the high byte into the final destination. + buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg); + + // Move the low byte from the scratch space into the final destination. + buildMI(MBB, MBBI, AVR::POPRd).addReg(DstLoReg); + } + + MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + + MI.eraseFromParent(); + return true; } template <> @@ -1074,7 +1119,9 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { OpHi = AVR::STDPtrQRr; TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg); - assert(Imm <= 63 && "Offset is out of range"); + // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value + // allowed for the instruction, 62 is the limit here. + assert(Imm <= 62 && "Offset is out of range"); auto MIBLO = buildMI(MBB, MBBI, OpLo) .addReg(DstReg) @@ -1104,7 +1151,9 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { OpHi = AVR::INRdA; TRI->splitReg(DstReg, DstLoReg, DstHiReg); - assert(Imm <= 63 && "Address is out of range"); + // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value + // allowed for the instruction, 62 is the limit here. + assert(Imm <= 62 && "Address is out of range"); auto MIBLO = buildMI(MBB, MBBI, OpLo) .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead)) @@ -1132,7 +1181,9 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { OpHi = AVR::OUTARr; TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg); - assert(Imm <= 63 && "Address is out of range"); + // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value + // allowed for the instruction, 62 is the limit here. + assert(Imm <= 62 && "Address is out of range"); // 16 bit I/O writes need the high byte first auto MIBHI = buildMI(MBB, MBBI, OpHi) diff --git a/lib/Target/AVR/AVRInstrInfo.cpp b/lib/Target/AVR/AVRInstrInfo.cpp index 744aa723c416c..1a89a13693e1b 100644 --- a/lib/Target/AVR/AVRInstrInfo.cpp +++ b/lib/Target/AVR/AVRInstrInfo.cpp @@ -537,8 +537,7 @@ bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp, llvm_unreachable("unexpected opcode!"); case AVR::JMPk: case AVR::CALLk: - assert(BrOffset >= 0 && "offset must be absolute address"); - return isUIntN(16, BrOffset); + return true; case AVR::RCALLk: case AVR::RJMPk: return isIntN(13, BrOffset); @@ -556,5 +555,20 @@ bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp, } } +unsigned AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS) const { + // This method inserts a *direct* branch (JMP), despite its name. + // LLVM calls this method to fixup unconditional branches; it never calls + // insertBranch or some hypothetical "insertDirectBranch". + // See lib/CodeGen/RegisterRelaxation.cpp for details. + // We end up here when a jump is too long for a RJMP instruction. + auto &MI = *BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB); + + return getInstSizeInBytes(MI); +} + } // end of namespace llvm diff --git a/lib/Target/AVR/AVRInstrInfo.h b/lib/Target/AVR/AVRInstrInfo.h index f42d34fb28480..eee8a92c6191c 100644 --- a/lib/Target/AVR/AVRInstrInfo.h +++ b/lib/Target/AVR/AVRInstrInfo.h @@ -107,6 +107,12 @@ class AVRInstrInfo : public AVRGenInstrInfo { bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override; + + unsigned insertIndirectBranch(MachineBasicBlock &MBB, + MachineBasicBlock &NewDestBB, + const DebugLoc &DL, + int64_t BrOffset, + RegScavenger *RS) const override; private: const AVRRegisterInfo RI; }; diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td index 61a227b7bda31..7d1bfc8d85e02 100644 --- a/lib/Target/AVR/AVRInstrInfo.td +++ b/lib/Target/AVR/AVRInstrInfo.td @@ -1152,10 +1152,10 @@ isReMaterializable = 1 in // // Expands to: // ld Rd, P+ - // ld Rd+1, P+ + // ld Rd+1, P let Constraints = "@earlyclobber $reg" in def LDWRdPtr : Pseudo<(outs DREGS:$reg), - (ins PTRDISPREGS:$ptrreg), + (ins PTRREGS:$ptrreg), "ldw\t$reg, $ptrreg", [(set i16:$reg, (load i16:$ptrreg))]>, Requires<[HasSRAM]>; @@ -1164,7 +1164,7 @@ isReMaterializable = 1 in // Indirect loads (with postincrement or predecrement). let mayLoad = 1, hasSideEffects = 0, -Constraints = "$ptrreg = $base_wb,@earlyclobber $reg,@earlyclobber $base_wb" in +Constraints = "$ptrreg = $base_wb,@earlyclobber $reg" in { def LDRdPtrPi : FSTLD<0, 0b01, @@ -1417,6 +1417,7 @@ def STDWPtrQRr : Pseudo<(outs), // Load program memory operations. let canFoldAsLoad = 1, isReMaterializable = 1, +mayLoad = 1, hasSideEffects = 0 in { let Defs = [R0], @@ -1437,8 +1438,7 @@ hasSideEffects = 0 in Requires<[HasLPMX]>; // Load program memory, while postincrementing the Z register. - let mayLoad = 1, - Defs = [R31R30] in + let Defs = [R31R30] in { def LPMRdZPi : FLPMX<0, 1, diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index 249dc5512c289..7099b29a8bcdf 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -203,7 +203,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If the offset is too big we have to adjust and restore the frame pointer // to materialize a valid load/store with displacement. //:TODO: consider using only one adiw/sbiw chain for more than one frame index - if (Offset > 63) { + if (Offset > 62) { unsigned AddOpc = AVR::ADIWRdK, SubOpc = AVR::SBIWRdK; int AddOffset = Offset - 63 + 1; diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index 5004736365c7b..2e1adcc6a4fa1 100644 --- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -83,7 +83,7 @@ class AVRAsmParser : public MCTargetAsmParser { public: AVRAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI), STI(STI), Parser(Parser) { + : MCTargetAsmParser(Options, STI, MII), STI(STI), Parser(Parser) { MCAsmParserExtension::Initialize(Parser); MRI = getContext().getRegisterInfo(); diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index d18298385adf2..01a09610118ce 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -340,7 +340,8 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, } } -MCObjectWriter *AVRAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { +std::unique_ptr +AVRAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createAVRELFObjectWriter(OS, MCELFObjectTargetWriter::getOSABI(OSType)); } diff --git a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h index 4a75e3b0d22d2..af615df033aa1 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h +++ b/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h @@ -38,7 +38,7 @@ class AVRAsmBackend : public MCAsmBackend { void adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, uint64_t &Value, MCContext *Ctx = nullptr) const; - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + std::unique_ptr createObjectWriter(raw_pwrite_stream &OS) const override; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp index 8bdbfb4178bb4..25da75e63febf 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" @@ -118,9 +119,10 @@ unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx, } } -MCObjectWriter *createAVRELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { - MCELFObjectTargetWriter *MOTW = new AVRELFObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, true); +std::unique_ptr +createAVRELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { + std::unique_ptr MOTW(new AVRELFObjectWriter(OSABI)); + return createELFObjectWriter(std::move(MOTW), OS, true); } } // end of namespace llvm diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp index 826430e94b9c2..bccce5d307e1b 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp @@ -18,6 +18,7 @@ #include "InstPrinter/AVRInstPrinter.h" #include "llvm/MC/MCELFStreamer.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -66,9 +67,12 @@ static MCInstPrinter *createAVRMCInstPrinter(const Triple &T, } static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, - MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll) { - return createELFStreamer(Context, MAB, OS, Emitter, RelaxAll); + std::unique_ptr &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, + bool RelaxAll) { + return createELFStreamer(Context, std::move(MAB), OS, + std::move(Emitter), RelaxAll); } static MCTargetStreamer * diff --git a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h index 41a574767910a..8053b8d389fc3 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h +++ b/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h @@ -16,6 +16,8 @@ #include "llvm/Support/DataTypes.h" +#include + namespace llvm { class MCAsmBackend; @@ -43,7 +45,8 @@ MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI, const llvm::MCTargetOptions &TO); /// Creates an ELF object writer for AVR. -MCObjectWriter *createAVRELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); +std::unique_ptr +createAVRELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); } // end namespace llvm diff --git a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index 9e251d211d21e..deaa11325809b 100644 --- a/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -28,6 +28,7 @@ namespace { struct BPFOperand; class BPFAsmParser : public MCTargetAsmParser { + SMLoc getLoc() const { return getParser().getTok().getLoc(); } bool PreMatchCheck(OperandVector &Operands); @@ -68,7 +69,7 @@ class BPFAsmParser : public MCTargetAsmParser { BPFAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI) { + : MCTargetAsmParser(Options, STI, MII) { setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } }; diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp index 1f382f3e73f50..1c12c23c93128 100644 --- a/lib/Target/BPF/BPFISelDAGToDAG.cpp +++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp @@ -40,7 +40,9 @@ namespace { class BPFDAGToDAGISel : public SelectionDAGISel { public: - explicit BPFDAGToDAGISel(BPFTargetMachine &TM) : SelectionDAGISel(TM) {} + explicit BPFDAGToDAGISel(BPFTargetMachine &TM) : SelectionDAGISel(TM) { + curr_func_ = nullptr; + } StringRef getPassName() const override { return "BPF DAG->DAG Pattern Instruction Selection"; @@ -85,6 +87,8 @@ class BPFDAGToDAGISel : public SelectionDAGISel { std::map cs_vals_; // Mapping from vreg to load memory opcode std::map load_to_vreg_; + // Current function + const Function *curr_func_; }; } // namespace @@ -329,6 +333,16 @@ void BPFDAGToDAGISel::PreprocessISelDAG() { // are 32-bit registers, but later on, kernel verifier will rewrite // it with 64-bit value. Therefore, truncating the value after the // load will result in incorrect code. + + // clear the load_to_vreg_ map so that we have a clean start + // for this function. + if (!curr_func_) { + curr_func_ = FuncInfo->Fn; + } else if (curr_func_ != FuncInfo->Fn) { + load_to_vreg_.clear(); + curr_func_ = FuncInfo->Fn; + } + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) { diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp index d4e06ddccafab..995f206529f2f 100644 --- a/lib/Target/BPF/BPFISelLowering.cpp +++ b/lib/Target/BPF/BPFISelLowering.cpp @@ -611,11 +611,15 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, .addReg(LHS) .addReg(MI.getOperand(2).getReg()) .addMBB(Copy1MBB); - else + else { + int64_t imm32 = MI.getOperand(2).getImm(); + // sanity check before we build J*_ri instruction. + assert (isInt<32>(imm32)); BuildMI(BB, DL, TII.get(NewCC)) .addReg(LHS) - .addImm(MI.getOperand(2).getImm()) + .addImm(imm32) .addMBB(Copy1MBB); + } // Copy0MBB: // %FalseValue = ... diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td index 7d4b03deebe2c..a3ad2ee5e80dc 100644 --- a/lib/Target/BPF/BPFInstrInfo.td +++ b/lib/Target/BPF/BPFInstrInfo.td @@ -238,9 +238,7 @@ class NEG_RR pattern> : TYPE_ALU_JMP { bits<4> dst; - bits<4> src; - let Inst{55-52} = src; let Inst{51-48} = dst; let BPFClass = Class; } @@ -462,7 +460,7 @@ let usesCustomInserter = 1 in { (ins GPR:$lhs, i64imm:$rhs, i64imm:$imm, GPR:$src, GPR:$src2), "# Select PSEUDO $dst = $lhs $imm $rhs ? $src : $src2", [(set i64:$dst, - (BPFselectcc i64:$lhs, (i64 imm:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>; + (BPFselectcc i64:$lhs, (i64immSExt32:$rhs), (i64 imm:$imm), i64:$src, i64:$src2))]>; } // load 64-bit global addr into register diff --git a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp index f5b621f9f8f87..6fc87d79c4398 100644 --- a/lib/Target/BPF/Disassembler/BPFDisassembler.cpp +++ b/lib/Target/BPF/Disassembler/BPFDisassembler.cpp @@ -146,7 +146,8 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, if (Result == MCDisassembler::Fail) return MCDisassembler::Fail; switch (Instr.getOpcode()) { - case BPF::LD_imm64: { + case BPF::LD_imm64: + case BPF::LD_pseudo: { if (Bytes.size() < 16) { Size = 0; return MCDisassembler::Fail; diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 9fc812cdef14f..800700d3dad21 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -31,7 +31,8 @@ class BPFAsmBackend : public MCAsmBackend { const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsResolved) const override; - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override; // No instruction requires relaxation bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -88,7 +89,8 @@ void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, } } -MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { +std::unique_ptr +BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createBPFELFObjectWriter(OS, 0, IsLittleEndian); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp index d5e1d7706edc0..144ea2b697dd5 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/ErrorHandling.h" #include @@ -52,8 +53,9 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, } } -MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI, bool IsLittleEndian) { - MCELFObjectTargetWriter *MOTW = new BPFELFObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, IsLittleEndian); +std::unique_ptr +llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, + bool IsLittleEndian) { + return createELFObjectWriter(llvm::make_unique(OSABI), OS, + IsLittleEndian); } diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp index 797904e1c9768..c8fbc0c2207b0 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp @@ -49,11 +49,13 @@ static MCSubtargetInfo *createBPFMCSubtargetInfo(const Triple &TT, return createBPFMCSubtargetInfoImpl(TT, CPU, FS); } -static MCStreamer *createBPFMCStreamer(const Triple &T, - MCContext &Ctx, MCAsmBackend &MAB, - raw_pwrite_stream &OS, MCCodeEmitter *Emitter, +static MCStreamer *createBPFMCStreamer(const Triple &T, MCContext &Ctx, + std::unique_ptr &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, bool RelaxAll) { - return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll); + return createELFStreamer(Ctx, std::move(MAB), OS, std::move(Emitter), + RelaxAll); } static MCInstPrinter *createBPFMCInstPrinter(const Triple &T, diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h index d1c97c9987e1b..6466042f6929a 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h +++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h @@ -17,6 +17,8 @@ #include "llvm/Config/config.h" #include "llvm/Support/DataTypes.h" +#include + namespace llvm { class MCAsmBackend; class MCCodeEmitter; @@ -50,8 +52,9 @@ MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU, const MCTargetOptions &Options); -MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI, bool IsLittleEndian); +std::unique_ptr createBPFELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, + bool IsLittleEndian); } // Defines symbolic names for BPF registers. This defines a mapping from diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index d901abbd16925..d0d8b39b83bc2 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -96,7 +96,6 @@ class HexagonAsmParser : public MCTargetAsmParser { MCAsmParser &Parser; MCAssembler *Assembler; - MCInstrInfo const &MCII; MCInst MCB; bool InBrackets; @@ -155,8 +154,8 @@ class HexagonAsmParser : public MCTargetAsmParser { public: HexagonAsmParser(const MCSubtargetInfo &_STI, MCAsmParser &_Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, _STI), Parser(_Parser), - MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) { + : MCTargetAsmParser(Options, _STI, MII), Parser(_Parser), + MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) { setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); MCAsmParserExtension::Initialize(_Parser); @@ -462,9 +461,9 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { MCB.setLoc(IDLoc); // Check the bundle for errors. const MCRegisterInfo *RI = getContext().getRegisterInfo(); - HexagonMCChecker Check(getContext(), MCII, getSTI(), MCB, *RI); + HexagonMCChecker Check(getContext(), MII, getSTI(), MCB, *RI); - bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(), + bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MII, getSTI(), getContext(), MCB, &Check); @@ -608,7 +607,7 @@ bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MatchingInlineAsm)) return true; HexagonMCInstrInfo::extendIfNeeded( - getParser().getContext(), MCII, MCB, *SubInst); + getParser().getContext(), MII, MCB, *SubInst); MCB.addOperand(MCOperand::createInst(SubInst)); if (!InBrackets) return finishBundle(IDLoc, Out); diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index 0b0d48a011ecd..09f72b254974e 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -181,8 +181,8 @@ namespace llvm { } // end namespace llvm void BitTracker::print_cells(raw_ostream &OS) const { - for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) - dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n"; + for (const std::pair P : Map) + dbgs() << PrintReg(P.first, &ME.TRI) << " -> " << P.second << "\n"; } BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F) @@ -830,18 +830,16 @@ void BT::visitNonBranch(const MachineInstr &MI) { << " cell: " << ME.getCell(RU, Map) << "\n"; } dbgs() << "Outputs:\n"; - for (CellMapType::iterator I = ResMap.begin(), E = ResMap.end(); - I != E; ++I) { - RegisterRef RD(I->first); - dbgs() << " " << PrintReg(I->first, &ME.TRI) << " cell: " + for (const std::pair &P : ResMap) { + RegisterRef RD(P.first); + dbgs() << " " << PrintReg(P.first, &ME.TRI) << " cell: " << ME.getCell(RD, ResMap) << "\n"; } } // Iterate over all definitions of the instruction, and update the // cells accordingly. - for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { // Visit register defs only. if (!MO.isReg() || !MO.isDef()) continue; @@ -926,14 +924,11 @@ void BT::visitBranchesFrom(const MachineInstr &BI) { ++It; } while (FallsThrough && It != End); - using succ_iterator = MachineBasicBlock::const_succ_iterator; - if (!DefaultToAll) { // Need to add all CFG successors that lead to EH landing pads. // There won't be explicit branches to these blocks, but they must // be processed. - for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) { - const MachineBasicBlock *SB = *I; + for (const MachineBasicBlock *SB : B.successors()) { if (SB->isEHPad()) Targets.insert(SB); } @@ -944,33 +939,27 @@ void BT::visitBranchesFrom(const MachineInstr &BI) { Targets.insert(&*Next); } } else { - for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) - Targets.insert(*I); + for (const MachineBasicBlock *SB : B.successors()) + Targets.insert(SB); } - for (unsigned i = 0, n = Targets.size(); i < n; ++i) { - int TargetN = Targets[i]->getNumber(); - FlowQ.push(CFGEdge(ThisN, TargetN)); - } + for (const MachineBasicBlock *TB : Targets) + FlowQ.push(CFGEdge(ThisN, TB->getNumber())); } void BT::visitUsesOf(unsigned Reg) { if (Trace) dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n"; - using use_iterator = MachineRegisterInfo::use_nodbg_iterator; - - use_iterator End = MRI.use_nodbg_end(); - for (use_iterator I = MRI.use_nodbg_begin(Reg); I != End; ++I) { - MachineInstr *UseI = I->getParent(); - if (!InstrExec.count(UseI)) + for (const MachineInstr &UseI : MRI.use_nodbg_instructions(Reg)) { + if (!InstrExec.count(&UseI)) continue; - if (UseI->isPHI()) - visitPHI(*UseI); - else if (!UseI->isBranch()) - visitNonBranch(*UseI); + if (UseI.isPHI()) + visitPHI(UseI); + else if (!UseI.isBranch()) + visitNonBranch(UseI); else - visitBranchesFrom(*UseI); + visitBranchesFrom(UseI); } } @@ -993,8 +982,8 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { (void)NME; assert((OME-OMB == NME-NMB) && "Substituting registers of different lengths"); - for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { - RegisterCell &RC = I->second; + for (std::pair &P : Map) { + RegisterCell &RC = P.second; for (uint16_t i = 0, w = RC.width(); i < w; ++i) { BitValue &V = RC[i]; if (V.Type != BitValue::Ref || V.RefI.Reg != OldRR.Reg) @@ -1045,10 +1034,9 @@ void BT::run() { const MachineBasicBlock *Entry = MachineFlowGraphTraits::getEntryNode(&MF); unsigned MaxBN = 0; - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - assert(I->getNumber() >= 0 && "Disconnected block"); - unsigned BN = I->getNumber(); + for (const MachineBasicBlock &B : MF) { + assert(B.getNumber() >= 0 && "Disconnected block"); + unsigned BN = B.getNumber(); if (BN > MaxBN) MaxBN = BN; } diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index bbb411a9b4870..ac6a5fcd0812b 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_target(HexagonCodeGen HexagonBranchRelaxation.cpp HexagonCFGOptimizer.cpp HexagonCommonGEP.cpp + HexagonConstExtenders.cpp HexagonConstPropagation.cpp HexagonCopyToCombine.cpp HexagonEarlyIfConv.cpp @@ -67,3 +68,4 @@ add_subdirectory(AsmParser) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) add_subdirectory(Disassembler) + diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index f9724882272ad..c26ba3db8ef60 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -138,24 +138,65 @@ static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); static DecodeStatus s32_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/, const void *Decoder); -static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, - const void *Decoder); -static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, - const void *Decoder); -static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, - const void *Decoder); -static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, - const void *Decoder); -static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, - const void *Decoder); -static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, - const void *Decoder); -static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, - const void *Decoder); static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); -#include "HexagonDepDecoders.h" +static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<4>(MI, tmp, Decoder); + return MCDisassembler::Success; +} +static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<14>(MI, tmp, Decoder); + return MCDisassembler::Success; +} +static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<8>(MI, tmp, Decoder); + return MCDisassembler::Success; +} +static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<7>(MI, tmp, Decoder); + return MCDisassembler::Success; +} +static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<12>(MI, tmp, Decoder); + return MCDisassembler::Success; +} +static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<3>(MI, tmp, Decoder); + return MCDisassembler::Success; +} +static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<13>(MI, tmp, Decoder); + return MCDisassembler::Success; +} +static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<6>(MI, tmp, Decoder); + return MCDisassembler::Success; +} +static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<9>(MI, tmp, Decoder); + return MCDisassembler::Success; +} +static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<5>(MI, tmp, Decoder); + return MCDisassembler::Success; +} +static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t, + const void *Decoder) { + signedDecoder<6>(MI, tmp, Decoder); + return MCDisassembler::Success; +} + #include "HexagonGenDisassemblerTables.inc" static MCDisassembler *createHexagonDisassembler(const Target &T, diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index df6f3ea1f1682..3218f2510e5f9 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -25,10 +25,31 @@ include "llvm/Target/Target.td" include "HexagonDepArch.td" // Hexagon ISA Extensions -def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps", "true", - "Hexagon HVX instructions">; -def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", "true", - "Hexagon HVX Double instructions">; +def ExtensionHVXV60: SubtargetFeature<"hvxv60", "HexagonHVXVersion", + "Hexagon::ArchEnum::V60", "Hexagon HVX instructions">; +def ExtensionHVXV62: SubtargetFeature<"hvxv62", "HexagonHVXVersion", + "Hexagon::ArchEnum::V62", "Hexagon HVX instructions", + [ExtensionHVXV60]>; +def ExtensionHVX: SubtargetFeature<"hvx", "HexagonHVXVersion", + "Hexagon::ArchEnum::V62", "Hexagon HVX instructions", + [ExtensionHVXV60, + ExtensionHVXV62]>; +def ExtensionHVX64B + : SubtargetFeature<"hvx-length64b", "UseHVX64BOps", "true", + "Hexagon HVX 64B instructions", + [ExtensionHVXV60, ExtensionHVXV62]>; +def ExtensionHVX128B + : SubtargetFeature<"hvx-length128b", "UseHVX128BOps", "true", + "Hexagon HVX 128B instructions", + [ExtensionHVXV60, ExtensionHVXV62]>; + +// This is an alias to ExtensionHVX128B to accept the hvx-double as +// an acceptable subtarget feature. +def ExtensionHVXDbl + : SubtargetFeature<"hvx-double", "UseHVX128BOps", "true", + "Hexagon HVX 128B instructions", + [ExtensionHVXV60, ExtensionHVXV62]>; + def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true", "Use constant-extended calls">; @@ -38,14 +59,21 @@ def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true", def UseMEMOP : Predicate<"HST->useMemOps()">; def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; -def UseHVXDbl : Predicate<"HST->useHVXDblOps()">, - AssemblerPredicate<"ExtensionHVXDbl">; -def UseHVXSgl : Predicate<"HST->useHVXSglOps()">; -def UseHVX : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">, - AssemblerPredicate<"ExtensionHVX">; - -def Hvx64 : HwMode<"+hvx,-hvx-double">; -def Hvx128 : HwMode<"+hvx,+hvx-double">; +def UseHVX64B : Predicate<"HST->useHVX64BOps()">, + AssemblerPredicate<"ExtensionHVX64B">; +def UseHVX128B : Predicate<"HST->useHVX128BOps()">, + AssemblerPredicate<"ExtensionHVX128B">; +def UseHVX : Predicate<"HST->useHVXOps()">, + AssemblerPredicate<"ExtensionHVXV60">; +def UseHVXV60 : Predicate<"HST->useHVXOps()">, + AssemblerPredicate<"ExtensionHVXV60">; +def UseHVXV62 : Predicate<"HST->useHVXOps()">, + AssemblerPredicate<"ExtensionHVXV62">; + +def Hvx64 : HwMode<"+hvx-length64b">; +def Hvx64old : HwMode<"-hvx-double">; +def Hvx128 : HwMode<"+hvx-length128b">; +def Hvx128old : HwMode<"+hvx-double">; //===----------------------------------------------------------------------===// // Classes used for relation maps. @@ -158,7 +186,7 @@ def getNonNVStore : InstrMapping { let ValueCols = [["false"]]; } -def getBaseWithImmOffset : InstrMapping { +def changeAddrMode_abs_io: InstrMapping { let FilterClass = "AddrModeRel"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore", "isFloat"]; @@ -167,7 +195,7 @@ def getBaseWithImmOffset : InstrMapping { let ValueCols = [["BaseImmOffset"]]; } -def getAbsoluteForm : InstrMapping { +def changeAddrMode_io_abs: InstrMapping { let FilterClass = "AddrModeRel"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore", "isFloat"]; @@ -176,7 +204,7 @@ def getAbsoluteForm : InstrMapping { let ValueCols = [["Absolute"]]; } -def getBaseWithRegOffset : InstrMapping { +def changeAddrMode_io_rr: InstrMapping { let FilterClass = "AddrModeRel"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; let ColFields = ["addrMode"]; @@ -184,7 +212,7 @@ def getBaseWithRegOffset : InstrMapping { let ValueCols = [["BaseRegOffset"]]; } -def xformRegToImmOffset : InstrMapping { +def changeAddrMode_rr_io: InstrMapping { let FilterClass = "AddrModeRel"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; let ColFields = ["addrMode"]; @@ -192,7 +220,7 @@ def xformRegToImmOffset : InstrMapping { let ValueCols = [["BaseImmOffset"]]; } -def getBaseWithLongOffset : InstrMapping { +def changeAddrMode_rr_ur: InstrMapping { let FilterClass = "ImmRegShl"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; let ColFields = ["addrMode"]; @@ -200,6 +228,14 @@ def getBaseWithLongOffset : InstrMapping { let ValueCols = [["BaseLongOffset"]]; } +def changeAddrMode_ur_rr : InstrMapping { + let FilterClass = "ImmRegShl"; + let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; + let ColFields = ["addrMode"]; + let KeyCol = ["BaseLongOffset"]; + let ValueCols = [["BaseRegOffset"]]; +} + def getRegForm : InstrMapping { let FilterClass = "ImmRegRel"; let RowFields = ["CextOpcode", "PredSense", "PNewValue"]; @@ -208,14 +244,6 @@ def getRegForm : InstrMapping { let ValueCols = [["reg"]]; } -def getRegShlForm : InstrMapping { - let FilterClass = "ImmRegShl"; - let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; - let ColFields = ["InputType"]; - let KeyCol = ["imm"]; - let ValueCols = [["reg"]]; -} - def notTakenBranchPrediction : InstrMapping { let FilterClass = "PredRel"; let RowFields = ["BaseOpcode", "PNewValue", "PredSense", "isBranch", "isPredicated"]; @@ -254,7 +282,6 @@ include "HexagonPseudo.td" include "HexagonPatterns.td" include "HexagonDepMappings.td" include "HexagonIntrinsics.td" -include "HexagonIntrinsicsDerived.td" include "HexagonMapAsm2IntrinV62.gen.td" def HexagonInstrInfo : InstrInfo; @@ -274,9 +301,9 @@ def : Proc<"hexagonv5", HexagonModelV4, def : Proc<"hexagonv55", HexagonModelV55, [ArchV4, ArchV5, ArchV55]>; def : Proc<"hexagonv60", HexagonModelV60, - [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>; + [ArchV4, ArchV5, ArchV55, ArchV60]>; def : Proc<"hexagonv62", HexagonModelV62, - [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ExtensionHVX]>; + [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62]>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index 08e08fe5087bc..c8927ec713a59 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -60,12 +60,8 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, // der the initial sequence of formal parameters that are known to be // passed via registers. unsigned InVirtReg, InPhysReg = 0; - const Function &F = *MF.getFunction(); - using arg_iterator = Function::const_arg_iterator; - - for (arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { - const Argument &Arg = *I; + for (const Argument &Arg : MF.getFunction()->args()) { Type *ATy = Arg.getType(); unsigned Width = 0; if (ATy->isIntegerTy()) @@ -190,8 +186,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, unsigned NumDefs = 0; // Sanity verification: there should not be any defs with subregisters. - for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; NumDefs++; @@ -240,8 +235,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, // checking what kind of operand a given instruction has individually // for each instruction, do it here. Global symbols as operands gene- // rally do not provide any useful information. - for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) { - const MachineOperand &MO = MI.getOperand(i); + for (const MachineOperand &MO : MI.operands()) { if (MO.isGlobal() || MO.isBlockAddress() || MO.isSymbol() || MO.isJTI() || MO.isCPI()) return false; @@ -1254,11 +1248,8 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { } unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const { - using iterator = MachineRegisterInfo::livein_iterator; - - for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) { - if (I->first == PReg) - return I->second; - } + for (std::pair P : MRI.liveins()) + if (P.first == PReg) + return P.second; return 0; } diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp index 34a886fe2ab73..d7eb44b92148f 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.cpp +++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hbr" - #include "HexagonBlockRanges.h" #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" @@ -17,6 +15,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -31,6 +30,8 @@ using namespace llvm; +#define DEBUG_TYPE "hbr" + bool HexagonBlockRanges::IndexRange::overlaps(const IndexRange &A) const { // If A contains start(), or "this" contains A.start(), then overlap. IndexType S = start(), E = end(), AS = A.start(), AE = A.end(); diff --git a/lib/Target/Hexagon/HexagonBlockRanges.h b/lib/Target/Hexagon/HexagonBlockRanges.h index 769ec7044a0ee..4da5a970a6597 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.h +++ b/lib/Target/Hexagon/HexagonBlockRanges.h @@ -1,4 +1,4 @@ -//===--- HexagonBlockRanges.h -----------------------------------*- C++ -*-===// +//===- HexagonBlockRanges.h -------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,11 +6,11 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -#ifndef HEXAGON_BLOCK_RANGES_H -#define HEXAGON_BLOCK_RANGES_H + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBLOCKRANGES_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBLOCKRANGES_H #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include #include #include @@ -23,6 +23,7 @@ class HexagonSubtarget; class MachineBasicBlock; class MachineFunction; class MachineInstr; +class MachineRegisterInfo; class raw_ostream; class TargetInstrInfo; class TargetRegisterInfo; @@ -32,11 +33,12 @@ struct HexagonBlockRanges { struct RegisterRef { unsigned Reg, Sub; + bool operator<(RegisterRef R) const { return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub); } }; - typedef std::set RegisterSet; + using RegisterSet = std::set; // This is to represent an "index", which is an abstraction of a position // of an instruction within a basic block. @@ -49,7 +51,7 @@ struct HexagonBlockRanges { First = 11 // 10th + 1st }; - IndexType() : Index(None) {} + IndexType() {} IndexType(unsigned Idx) : Index(Idx) {} static bool isInstr(IndexType X) { return X.Index >= First; } @@ -68,7 +70,7 @@ struct HexagonBlockRanges { bool operator> (IndexType Idx) const; bool operator>= (IndexType Idx) const; - unsigned Index; + unsigned Index = None; }; // A range of indices, essentially a representation of a live range. @@ -138,7 +140,8 @@ struct HexagonBlockRanges { std::map Map; }; - typedef std::map RegToRangeMap; + using RegToRangeMap = std::map; + RegToRangeMap computeLiveMap(InstrIndexMap &IndexMap); RegToRangeMap computeDeadMap(InstrIndexMap &IndexMap, RegToRangeMap &LiveMap); static RegisterSet expandToSubRegs(RegisterRef R, @@ -241,4 +244,4 @@ raw_ostream &operator<< (raw_ostream &OS, } // end namespace llvm -#endif // HEXAGON_BLOCK_RANGES_H +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBLOCKRANGES_H diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index c7b422e7efd09..22794eb50e2ae 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -1,4 +1,5 @@ -//===-- HexagonCFGOptimizer.cpp - CFG optimizations -----------------------===// +//===- HexagonCFGOptimizer.cpp - CFG optimizations ------------------------===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source @@ -7,53 +8,54 @@ //===----------------------------------------------------------------------===// #include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" -#include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" -#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include using namespace llvm; #define DEBUG_TYPE "hexagon_cfg" namespace llvm { - FunctionPass *createHexagonCFGOptimizer(); - void initializeHexagonCFGOptimizerPass(PassRegistry&); -} +FunctionPass *createHexagonCFGOptimizer(); +void initializeHexagonCFGOptimizerPass(PassRegistry&); + +} // end namespace llvm namespace { class HexagonCFGOptimizer : public MachineFunctionPass { - private: void InvertAndChangeJumpTarget(MachineInstr &, MachineBasicBlock *); bool isOnFallThroughPath(MachineBasicBlock *MBB); public: static char ID; + HexagonCFGOptimizer() : MachineFunctionPass(ID) { initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry()); } StringRef getPassName() const override { return "Hexagon CFG Optimizer"; } bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } }; +} // end anonymous namespace char HexagonCFGOptimizer::ID = 0; @@ -72,7 +74,6 @@ static bool IsConditionalBranch(int Opc) { return false; } - static bool IsUnconditionalJump(int Opc) { return (Opc == Hexagon::J2_jump); } @@ -86,19 +87,15 @@ void HexagonCFGOptimizer::InvertAndChangeJumpTarget( case Hexagon::J2_jumpt: NewOpcode = Hexagon::J2_jumpf; break; - case Hexagon::J2_jumpf: NewOpcode = Hexagon::J2_jumpt; break; - case Hexagon::J2_jumptnewpt: NewOpcode = Hexagon::J2_jumpfnewpt; break; - case Hexagon::J2_jumpfnewpt: NewOpcode = Hexagon::J2_jumptnewpt; break; - default: llvm_unreachable("Cannot handle this case"); } @@ -131,8 +128,6 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { MachineInstr &MI = *MII; int Opc = MI.getOpcode(); if (IsConditionalBranch(Opc)) { - - // // (Case 1) Transform the code if the following condition occurs: // BB1: if (p0) jump BB3 // ...falls-through to BB2 ... @@ -160,7 +155,6 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // Remove BB2 // BB3: ... // BB4: ... - // unsigned NumSuccs = MBB->succ_size(); MachineBasicBlock::succ_iterator SI = MBB->succ_begin(); MachineBasicBlock* FirstSucc = *SI; @@ -200,7 +194,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // Check if the layout successor of BB2 is BB3. bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget); bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) && - JumpAroundTarget->size() >= 1 && + !JumpAroundTarget->empty() && IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) && JumpAroundTarget->pred_size() == 1 && JumpAroundTarget->succ_size() == 1; @@ -223,11 +217,9 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { UncondTarget->moveAfter(JumpAroundTarget); } - // // Correct live-in information. Is used by post-RA scheduler // The live-in to LayoutSucc is now all values live-in to // JumpAroundTarget. - // std::vector OrigLiveIn( LayoutSucc->livein_begin(), LayoutSucc->livein_end()); std::vector NewLiveIn( @@ -245,8 +237,6 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { } return true; } -} - //===----------------------------------------------------------------------===// // Public Constructor Functions diff --git a/lib/Target/Hexagon/HexagonConstExtenders.cpp b/lib/Target/Hexagon/HexagonConstExtenders.cpp new file mode 100644 index 0000000000000..40e11451edebb --- /dev/null +++ b/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -0,0 +1,1872 @@ +//===- HexagonConstExtenders.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Pass.h" +#include +#include +#include +#include + +#define DEBUG_TYPE "hexagon-cext-opt" + +using namespace llvm; + +static cl::opt CountThreshold("hexagon-cext-threshold", + cl::init(3), cl::Hidden, cl::ZeroOrMore, + cl::desc("Minimum number of extenders to trigger replacement")); + +static cl::opt ReplaceLimit("hexagon-cext-limit", cl::init(0), + cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum number of replacements")); + +namespace llvm { + void initializeHexagonConstExtendersPass(PassRegistry&); + FunctionPass *createHexagonConstExtenders(); +} + +namespace { + struct OffsetRange { + int32_t Min = INT_MIN, Max = INT_MAX; + uint8_t Align = 1; + + OffsetRange() = default; + OffsetRange(int32_t L, int32_t H, uint8_t A) + : Min(L), Max(H), Align(A) {} + OffsetRange &intersect(OffsetRange A) { + Align = std::max(Align, A.Align); + Min = std::max(Min, A.Min); + Max = std::min(Max, A.Max); + // Canonicalize empty ranges. + if (Min > Max) + std::tie(Min, Max, Align) = std::make_tuple(0, -1, 1); + return *this; + } + OffsetRange &shift(int32_t S) { + assert(alignTo(std::abs(S), Align) == uint64_t(std::abs(S))); + Min += S; + Max += S; + return *this; + } + OffsetRange &extendBy(int32_t D) { + // If D < 0, extend Min, otherwise extend Max. + if (D < 0) + Min = (INT_MIN-D < Min) ? Min+D : INT_MIN; + else + Max = (INT_MAX-D > Max) ? Max+D : INT_MAX; + return *this; + } + bool empty() const { + return Min > Max; + } + bool contains(int32_t V) const { + return Min <= V && V <= Max && (V % Align) == 0; + } + bool operator==(const OffsetRange &R) const { + return Min == R.Min && Max == R.Max && Align == R.Align; + } + bool operator!=(const OffsetRange &R) const { + return !operator==(R); + } + bool operator<(const OffsetRange &R) const { + if (Min != R.Min) + return Min < R.Min; + if (Max != R.Max) + return Max < R.Max; + return Align < R.Align; + } + static OffsetRange zero() { return {0, 0, 1}; } + }; + + struct RangeTree { + struct Node { + Node(const OffsetRange &R) : MaxEnd(R.Max), Range(R) {} + unsigned Height = 1; + unsigned Count = 1; + int32_t MaxEnd; + const OffsetRange &Range; + Node *Left = nullptr, *Right = nullptr; + }; + + Node *Root = nullptr; + + void add(const OffsetRange &R) { + Root = add(Root, R); + } + void erase(const Node *N) { + Root = remove(Root, N); + delete N; + } + void order(SmallVectorImpl &Seq) const { + order(Root, Seq); + } + SmallVector nodesWith(int32_t P, bool CheckAlign = true) { + SmallVector Nodes; + nodesWith(Root, P, CheckAlign, Nodes); + return Nodes; + } + void dump() const; + ~RangeTree() { + SmallVector Nodes; + order(Nodes); + for (Node *N : Nodes) + delete N; + } + + private: + void dump(const Node *N) const; + void order(Node *N, SmallVectorImpl &Seq) const; + void nodesWith(Node *N, int32_t P, bool CheckA, + SmallVectorImpl &Seq) const; + + Node *add(Node *N, const OffsetRange &R); + Node *remove(Node *N, const Node *D); + Node *rotateLeft(Node *Lower, Node *Higher); + Node *rotateRight(Node *Lower, Node *Higher); + unsigned height(Node *N) { + return N != nullptr ? N->Height : 0; + } + Node *update(Node *N) { + assert(N != nullptr); + N->Height = 1 + std::max(height(N->Left), height(N->Right)); + if (N->Left) + N->MaxEnd = std::max(N->MaxEnd, N->Left->MaxEnd); + if (N->Right) + N->MaxEnd = std::max(N->MaxEnd, N->Right->MaxEnd); + return N; + } + Node *rebalance(Node *N) { + assert(N != nullptr); + int32_t Balance = height(N->Right) - height(N->Left); + if (Balance < -1) + return rotateRight(N->Left, N); + if (Balance > 1) + return rotateLeft(N->Right, N); + return N; + } + }; + + struct Loc { + MachineBasicBlock *Block = nullptr; + MachineBasicBlock::iterator At; + + Loc(MachineBasicBlock *B, MachineBasicBlock::iterator It) + : Block(B), At(It) { + if (B->end() == It) { + Pos = -1; + } else { + assert(It->getParent() == B); + Pos = std::distance(B->begin(), It); + } + } + bool operator<(Loc A) const { + if (Block != A.Block) + return Block->getNumber() < A.Block->getNumber(); + if (A.Pos == -1) + return Pos != A.Pos; + return Pos != -1 && Pos < A.Pos; + } + private: + int Pos = 0; + }; + + struct HexagonConstExtenders : public MachineFunctionPass { + static char ID; + HexagonConstExtenders() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return "Hexagon constant-extender optimization"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + struct Register { + Register() = default; + Register(unsigned R, unsigned S) : Reg(R), Sub(S) {} + Register(const MachineOperand &Op) + : Reg(Op.getReg()), Sub(Op.getSubReg()) {} + Register &operator=(const MachineOperand &Op) { + if (Op.isReg()) { + Reg = Op.getReg(); + Sub = Op.getSubReg(); + } else if (Op.isFI()) { + Reg = TargetRegisterInfo::index2StackSlot(Op.getIndex()); + } + return *this; + } + bool isVReg() const { + return Reg != 0 && !TargetRegisterInfo::isStackSlot(Reg) && + TargetRegisterInfo::isVirtualRegister(Reg); + } + bool isSlot() const { + return Reg != 0 && TargetRegisterInfo::isStackSlot(Reg); + } + operator MachineOperand() const { + if (isVReg()) + return MachineOperand::CreateReg(Reg, /*Def*/false, /*Imp*/false, + /*Kill*/false, /*Dead*/false, /*Undef*/false, + /*EarlyClobber*/false, Sub); + if (TargetRegisterInfo::isStackSlot(Reg)) { + int FI = TargetRegisterInfo::stackSlot2Index(Reg); + return MachineOperand::CreateFI(FI); + } + llvm_unreachable("Cannot create MachineOperand"); + } + bool operator==(Register R) const { return Reg == R.Reg && Sub == R.Sub; } + bool operator!=(Register R) const { return !operator==(R); } + bool operator<(Register R) const { + // For std::map. + return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub); + } + unsigned Reg = 0, Sub = 0; + }; + + struct ExtExpr { + // A subexpression in which the extender is used. In general, this + // represents an expression where adding D to the extender will be + // equivalent to adding D to the expression as a whole. In other + // words, expr(add(##V,D) = add(expr(##V),D). + + // The original motivation for this are the io/ur addressing modes, + // where the offset is extended. Consider the io example: + // In memw(Rs+##V), the ##V could be replaced by a register Rt to + // form the rr mode: memw(Rt+Rs<<0). In such case, however, the + // register Rt must have exactly the value of ##V. If there was + // another instruction memw(Rs+##V+4), it would need a different Rt. + // Now, if Rt was initialized as "##V+Rs<<0", both of these + // instructions could use the same Rt, just with different offsets. + // Here it's clear that "initializer+4" should be the same as if + // the offset 4 was added to the ##V in the initializer. + + // The only kinds of expressions that support the requirement of + // commuting with addition are addition and subtraction from ##V. + // Include shifting the Rs to account for the ur addressing mode: + // ##Val + Rs << S + // ##Val - Rs + Register Rs; + unsigned S = 0; + bool Neg = false; + + ExtExpr() = default; + ExtExpr(Register RS, bool NG, unsigned SH) : Rs(RS), S(SH), Neg(NG) {} + // Expression is trivial if it does not modify the extender. + bool trivial() const { + return Rs.Reg == 0; + } + bool operator==(const ExtExpr &Ex) const { + return Rs == Ex.Rs && S == Ex.S && Neg == Ex.Neg; + } + bool operator!=(const ExtExpr &Ex) const { + return !operator==(Ex); + } + bool operator<(const ExtExpr &Ex) const { + if (Rs != Ex.Rs) + return Rs < Ex.Rs; + if (S != Ex.S) + return S < Ex.S; + return !Neg && Ex.Neg; + } + }; + + struct ExtDesc { + MachineInstr *UseMI = nullptr; + unsigned OpNum = -1u; + // The subexpression in which the extender is used (e.g. address + // computation). + ExtExpr Expr; + // Optional register that is assigned the value of Expr. + Register Rd; + // Def means that the output of the instruction may differ from the + // original by a constant c, and that the difference can be corrected + // by adding/subtracting c in all users of the defined register. + bool IsDef = false; + + MachineOperand &getOp() { + return UseMI->getOperand(OpNum); + } + const MachineOperand &getOp() const { + return UseMI->getOperand(OpNum); + } + }; + + struct ExtRoot { + union { + const ConstantFP *CFP; // MO_FPImmediate + const char *SymbolName; // MO_ExternalSymbol + const GlobalValue *GV; // MO_GlobalAddress + const BlockAddress *BA; // MO_BlockAddress + int64_t ImmVal; // MO_Immediate, MO_TargetIndex, + // and MO_ConstantPoolIndex + } V; + unsigned Kind; // Same as in MachineOperand. + unsigned char TF; // TargetFlags. + + ExtRoot(const MachineOperand &Op); + bool operator==(const ExtRoot &ER) const { + return Kind == ER.Kind && V.ImmVal == ER.V.ImmVal; + } + bool operator!=(const ExtRoot &ER) const { + return !operator==(ER); + } + bool operator<(const ExtRoot &ER) const; + }; + + struct ExtValue : public ExtRoot { + int32_t Offset; + + ExtValue(const MachineOperand &Op); + ExtValue(const ExtDesc &ED) : ExtValue(ED.getOp()) {} + ExtValue(const ExtRoot &ER, int32_t Off) : ExtRoot(ER), Offset(Off) {} + bool operator<(const ExtValue &EV) const; + bool operator==(const ExtValue &EV) const { + return ExtRoot(*this) == ExtRoot(EV) && Offset == EV.Offset; + } + bool operator!=(const ExtValue &EV) const { + return !operator==(EV); + } + explicit operator MachineOperand() const; + }; + + using IndexList = SetVector; + using ExtenderInit = std::pair; + using AssignmentMap = std::map; + using LocDefMap = std::map; + + const HexagonInstrInfo *HII = nullptr; + const HexagonRegisterInfo *HRI = nullptr; + MachineDominatorTree *MDT = nullptr; + MachineRegisterInfo *MRI = nullptr; + std::vector Extenders; + std::vector NewRegs; + + bool isStoreImmediate(unsigned Opc) const; + bool isRegOffOpcode(unsigned ExtOpc) const ; + unsigned getRegOffOpcode(unsigned ExtOpc) const; + unsigned getDirectRegReplacement(unsigned ExtOpc) const; + OffsetRange getOffsetRange(Register R, const MachineInstr &MI) const; + OffsetRange getOffsetRange(const ExtDesc &ED) const; + OffsetRange getOffsetRange(Register Rd) const; + + void recordExtender(MachineInstr &MI, unsigned OpNum); + void collectInstr(MachineInstr &MI); + void collect(MachineFunction &MF); + void assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, + AssignmentMap &IMap); + void calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs, + LocDefMap &Defs); + Register insertInitializer(Loc DefL, const ExtenderInit &ExtI); + bool replaceInstrExact(const ExtDesc &ED, Register ExtR); + bool replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI, + Register ExtR, int32_t &Diff); + bool replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI); + bool replaceExtenders(const AssignmentMap &IMap); + + unsigned getOperandIndex(const MachineInstr &MI, + const MachineOperand &Op) const; + const MachineOperand &getPredicateOp(const MachineInstr &MI) const; + const MachineOperand &getLoadResultOp(const MachineInstr &MI) const; + const MachineOperand &getStoredValueOp(const MachineInstr &MI) const; + + friend struct PrintRegister; + friend struct PrintExpr; + friend struct PrintInit; + friend struct PrintIMap; + friend raw_ostream &operator<< (raw_ostream &OS, + const struct PrintRegister &P); + friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintExpr &P); + friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintInit &P); + friend raw_ostream &operator<< (raw_ostream &OS, const ExtDesc &ED); + friend raw_ostream &operator<< (raw_ostream &OS, const ExtRoot &ER); + friend raw_ostream &operator<< (raw_ostream &OS, const ExtValue &EV); + friend raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR); + friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintIMap &P); + }; + + using HCE = HexagonConstExtenders; + + LLVM_ATTRIBUTE_UNUSED + raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) { + if (OR.Min > OR.Max) + OS << '!'; + OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align); + return OS; + } + + struct PrintRegister { + PrintRegister(HCE::Register R, const HexagonRegisterInfo &I) + : Rs(R), HRI(I) {} + HCE::Register Rs; + const HexagonRegisterInfo &HRI; + }; + + LLVM_ATTRIBUTE_UNUSED + raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) { + if (P.Rs.Reg != 0) + OS << PrintReg(P.Rs.Reg, &P.HRI, P.Rs.Sub); + else + OS << "noreg"; + return OS; + } + + struct PrintExpr { + PrintExpr(const HCE::ExtExpr &E, const HexagonRegisterInfo &I) + : Ex(E), HRI(I) {} + const HCE::ExtExpr &Ex; + const HexagonRegisterInfo &HRI; + }; + + LLVM_ATTRIBUTE_UNUSED + raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) { + OS << "## " << (P.Ex.Neg ? "- " : "+ "); + if (P.Ex.Rs.Reg != 0) + OS << PrintReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub); + else + OS << "__"; + OS << " << " << P.Ex.S; + return OS; + } + + struct PrintInit { + PrintInit(const HCE::ExtenderInit &EI, const HexagonRegisterInfo &I) + : ExtI(EI), HRI(I) {} + const HCE::ExtenderInit &ExtI; + const HexagonRegisterInfo &HRI; + }; + + LLVM_ATTRIBUTE_UNUSED + raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) { + OS << '[' << P.ExtI.first << ", " + << PrintExpr(P.ExtI.second, P.HRI) << ']'; + return OS; + } + + LLVM_ATTRIBUTE_UNUSED + raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) { + assert(ED.OpNum != -1u); + const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent(); + const MachineFunction &MF = *MBB.getParent(); + const auto &HRI = *MF.getSubtarget().getRegisterInfo(); + OS << "bb#" << MBB.getNumber() << ": "; + if (ED.Rd.Reg != 0) + OS << PrintReg(ED.Rd.Reg, &HRI, ED.Rd.Sub); + else + OS << "__"; + OS << " = " << PrintExpr(ED.Expr, HRI); + if (ED.IsDef) + OS << ", def"; + return OS; + } + + LLVM_ATTRIBUTE_UNUSED + raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) { + switch (ER.Kind) { + case MachineOperand::MO_Immediate: + OS << "imm:" << ER.V.ImmVal; + break; + case MachineOperand::MO_FPImmediate: + OS << "fpi:" << *ER.V.CFP; + break; + case MachineOperand::MO_ExternalSymbol: + OS << "sym:" << *ER.V.SymbolName; + break; + case MachineOperand::MO_GlobalAddress: + OS << "gad:" << ER.V.GV->getName(); + break; + case MachineOperand::MO_BlockAddress: + OS << "blk:" << *ER.V.BA; + break; + case MachineOperand::MO_TargetIndex: + OS << "tgi:" << ER.V.ImmVal; + break; + case MachineOperand::MO_ConstantPoolIndex: + OS << "cpi:" << ER.V.ImmVal; + break; + case MachineOperand::MO_JumpTableIndex: + OS << "jti:" << ER.V.ImmVal; + break; + default: + OS << "???:" << ER.V.ImmVal; + break; + } + return OS; + } + + LLVM_ATTRIBUTE_UNUSED + raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) { + OS << HCE::ExtRoot(EV) << " off:" << EV.Offset; + return OS; + } + + struct PrintIMap { + PrintIMap(const HCE::AssignmentMap &M, const HexagonRegisterInfo &I) + : IMap(M), HRI(I) {} + const HCE::AssignmentMap &IMap; + const HexagonRegisterInfo &HRI; + }; + + LLVM_ATTRIBUTE_UNUSED + raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) { + OS << "{\n"; + for (const std::pair &Q : P.IMap) { + OS << " " << PrintInit(Q.first, P.HRI) << " -> {"; + for (unsigned I : Q.second) + OS << ' ' << I; + OS << " }\n"; + } + OS << "}\n"; + return OS; + } +} + +INITIALIZE_PASS_BEGIN(HexagonConstExtenders, "hexagon-cext-opt", + "Hexagon constant-extender optimization", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(HexagonConstExtenders, "hexagon-cext-opt", + "Hexagon constant-extender optimization", false, false) + +static unsigned ReplaceCounter = 0; + +char HCE::ID = 0; + +LLVM_DUMP_METHOD void RangeTree::dump() const { + dbgs() << "Root: " << Root << '\n'; + if (Root) + dump(Root); +} + +void RangeTree::dump(const Node *N) const { + dbgs() << "Node: " << N << '\n'; + dbgs() << " Height: " << N->Height << '\n'; + dbgs() << " Count: " << N->Count << '\n'; + dbgs() << " MaxEnd: " << N->MaxEnd << '\n'; + dbgs() << " Range: " << N->Range << '\n'; + dbgs() << " Left: " << N->Left << '\n'; + dbgs() << " Right: " << N->Right << "\n\n"; + + if (N->Left) + dump(N->Left); + if (N->Right) + dump(N->Right); +} + +void RangeTree::order(Node *N, SmallVectorImpl &Seq) const { + if (N == nullptr) + return; + order(N->Left, Seq); + Seq.push_back(N); + order(N->Right, Seq); +} + +void RangeTree::nodesWith(Node *N, int32_t P, bool CheckA, + SmallVectorImpl &Seq) const { + if (N == nullptr || N->MaxEnd < P) + return; + nodesWith(N->Left, P, CheckA, Seq); + if (N->Range.Min <= P) { + if ((CheckA && N->Range.contains(P)) || (!CheckA && P <= N->Range.Max)) + Seq.push_back(N); + nodesWith(N->Right, P, CheckA, Seq); + } +} + +RangeTree::Node *RangeTree::add(Node *N, const OffsetRange &R) { + if (N == nullptr) + return new Node(R); + + if (N->Range == R) { + N->Count++; + return N; + } + + if (R < N->Range) + N->Left = add(N->Left, R); + else + N->Right = add(N->Right, R); + return rebalance(update(N)); +} + +RangeTree::Node *RangeTree::remove(Node *N, const Node *D) { + assert(N != nullptr); + + if (N != D) { + assert(N->Range != D->Range && "N and D should not be equal"); + if (D->Range < N->Range) + N->Left = remove(N->Left, D); + else + N->Right = remove(N->Right, D); + return rebalance(update(N)); + } + + // We got to the node we need to remove. If any of its children are + // missing, simply replace it with the other child. + if (N->Left == nullptr || N->Right == nullptr) + return (N->Left == nullptr) ? N->Right : N->Left; + + // Find the rightmost child of N->Left, remove it and plug it in place + // of N. + Node *M = N->Left; + while (M->Right) + M = M->Right; + M->Left = remove(N->Left, M); + M->Right = N->Right; + return rebalance(update(M)); +} + +RangeTree::Node *RangeTree::rotateLeft(Node *Lower, Node *Higher) { + assert(Higher->Right == Lower); + // The Lower node is on the right from Higher. Make sure that Lower's + // balance is greater to the right. Otherwise the rotation will create + // an unbalanced tree again. + if (height(Lower->Left) > height(Lower->Right)) + Lower = rotateRight(Lower->Left, Lower); + assert(height(Lower->Left) <= height(Lower->Right)); + Higher->Right = Lower->Left; + update(Higher); + Lower->Left = Higher; + update(Lower); + return Lower; +} + +RangeTree::Node *RangeTree::rotateRight(Node *Lower, Node *Higher) { + assert(Higher->Left == Lower); + // The Lower node is on the left from Higher. Make sure that Lower's + // balance is greater to the left. Otherwise the rotation will create + // an unbalanced tree again. + if (height(Lower->Left) < height(Lower->Right)) + Lower = rotateLeft(Lower->Right, Lower); + assert(height(Lower->Left) >= height(Lower->Right)); + Higher->Left = Lower->Right; + update(Higher); + Lower->Right = Higher; + update(Lower); + return Lower; +} + + +HCE::ExtRoot::ExtRoot(const MachineOperand &Op) { + // Always store ImmVal, since it's the field used for comparisons. + V.ImmVal = 0; + if (Op.isImm()) + ; // Keep 0. Do not use Op.getImm() for value here (treat 0 as the root). + else if (Op.isFPImm()) + V.CFP = Op.getFPImm(); + else if (Op.isSymbol()) + V.SymbolName = Op.getSymbolName(); + else if (Op.isGlobal()) + V.GV = Op.getGlobal(); + else if (Op.isBlockAddress()) + V.BA = Op.getBlockAddress(); + else if (Op.isCPI() || Op.isTargetIndex() || Op.isJTI()) + V.ImmVal = Op.getIndex(); + else + llvm_unreachable("Unexpected operand type"); + + Kind = Op.getType(); + TF = Op.getTargetFlags(); +} + +bool HCE::ExtRoot::operator< (const HCE::ExtRoot &ER) const { + if (Kind != ER.Kind) + return Kind < ER.Kind; + switch (Kind) { + case MachineOperand::MO_Immediate: + case MachineOperand::MO_TargetIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + return V.ImmVal < ER.V.ImmVal; + case MachineOperand::MO_FPImmediate: { + const APFloat &ThisF = V.CFP->getValueAPF(); + const APFloat &OtherF = ER.V.CFP->getValueAPF(); + return ThisF.bitcastToAPInt().ult(OtherF.bitcastToAPInt()); + } + case MachineOperand::MO_ExternalSymbol: + return StringRef(V.SymbolName) < StringRef(ER.V.SymbolName); + case MachineOperand::MO_GlobalAddress: + assert(V.GV->hasName() && ER.V.GV->hasName()); + return V.GV->getName() < ER.V.GV->getName(); + case MachineOperand::MO_BlockAddress: { + const BasicBlock *ThisB = V.BA->getBasicBlock(); + const BasicBlock *OtherB = ER.V.BA->getBasicBlock(); + assert(ThisB->getParent() == OtherB->getParent()); + const Function &F = *ThisB->getParent(); + return std::distance(F.begin(), ThisB->getIterator()) < + std::distance(F.begin(), OtherB->getIterator()); + } + } + return V.ImmVal < ER.V.ImmVal; +} + +HCE::ExtValue::ExtValue(const MachineOperand &Op) : ExtRoot(Op) { + if (Op.isImm()) + Offset = Op.getImm(); + else if (Op.isFPImm() || Op.isJTI()) + Offset = 0; + else if (Op.isSymbol() || Op.isGlobal() || Op.isBlockAddress() || + Op.isCPI() || Op.isTargetIndex()) + Offset = Op.getOffset(); + else + llvm_unreachable("Unexpected operand type"); +} + +bool HCE::ExtValue::operator< (const HCE::ExtValue &EV) const { + const ExtRoot &ER = *this; + if (!(ER == ExtRoot(EV))) + return ER < EV; + return Offset < EV.Offset; +} + +HCE::ExtValue::operator MachineOperand() const { + switch (Kind) { + case MachineOperand::MO_Immediate: + return MachineOperand::CreateImm(V.ImmVal + Offset); + case MachineOperand::MO_FPImmediate: + assert(Offset == 0); + return MachineOperand::CreateFPImm(V.CFP); + case MachineOperand::MO_ExternalSymbol: + assert(Offset == 0); + return MachineOperand::CreateES(V.SymbolName, TF); + case MachineOperand::MO_GlobalAddress: + return MachineOperand::CreateGA(V.GV, Offset, TF); + case MachineOperand::MO_BlockAddress: + return MachineOperand::CreateBA(V.BA, Offset, TF); + case MachineOperand::MO_TargetIndex: + return MachineOperand::CreateTargetIndex(V.ImmVal, Offset, TF); + case MachineOperand::MO_ConstantPoolIndex: + return MachineOperand::CreateCPI(V.ImmVal, Offset, TF); + case MachineOperand::MO_JumpTableIndex: + assert(Offset == 0); + default: + llvm_unreachable("Unhandled kind"); + } +} + +bool HCE::isStoreImmediate(unsigned Opc) const { + switch (Opc) { + case Hexagon::S4_storeirbt_io: + case Hexagon::S4_storeirbf_io: + case Hexagon::S4_storeirht_io: + case Hexagon::S4_storeirhf_io: + case Hexagon::S4_storeirit_io: + case Hexagon::S4_storeirif_io: + case Hexagon::S4_storeirb_io: + case Hexagon::S4_storeirh_io: + case Hexagon::S4_storeiri_io: + return true; + default: + break; + } + return false; +} + +bool HCE::isRegOffOpcode(unsigned Opc) const { + switch (Opc) { + case Hexagon::L2_loadrub_io: + case Hexagon::L2_loadrb_io: + case Hexagon::L2_loadruh_io: + case Hexagon::L2_loadrh_io: + case Hexagon::L2_loadri_io: + case Hexagon::L2_loadrd_io: + case Hexagon::L2_loadbzw2_io: + case Hexagon::L2_loadbzw4_io: + case Hexagon::L2_loadbsw2_io: + case Hexagon::L2_loadbsw4_io: + case Hexagon::L2_loadalignh_io: + case Hexagon::L2_loadalignb_io: + case Hexagon::L2_ploadrubt_io: + case Hexagon::L2_ploadrubf_io: + case Hexagon::L2_ploadrbt_io: + case Hexagon::L2_ploadrbf_io: + case Hexagon::L2_ploadruht_io: + case Hexagon::L2_ploadruhf_io: + case Hexagon::L2_ploadrht_io: + case Hexagon::L2_ploadrhf_io: + case Hexagon::L2_ploadrit_io: + case Hexagon::L2_ploadrif_io: + case Hexagon::L2_ploadrdt_io: + case Hexagon::L2_ploadrdf_io: + case Hexagon::S2_storerb_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storerf_io: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerd_io: + case Hexagon::S2_pstorerbt_io: + case Hexagon::S2_pstorerbf_io: + case Hexagon::S2_pstorerht_io: + case Hexagon::S2_pstorerhf_io: + case Hexagon::S2_pstorerft_io: + case Hexagon::S2_pstorerff_io: + case Hexagon::S2_pstorerit_io: + case Hexagon::S2_pstorerif_io: + case Hexagon::S2_pstorerdt_io: + case Hexagon::S2_pstorerdf_io: + case Hexagon::A2_addi: + return true; + default: + break; + } + return false; +} + +unsigned HCE::getRegOffOpcode(unsigned ExtOpc) const { + // If there exists an instruction that takes a register and offset, + // that corresponds to the ExtOpc, return it, otherwise return 0. + using namespace Hexagon; + switch (ExtOpc) { + case A2_tfrsi: return A2_addi; + default: + break; + } + const MCInstrDesc &D = HII->get(ExtOpc); + if (D.mayLoad() || D.mayStore()) { + uint64_t F = D.TSFlags; + unsigned AM = (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; + switch (AM) { + case HexagonII::Absolute: + case HexagonII::AbsoluteSet: + case HexagonII::BaseLongOffset: + switch (ExtOpc) { + case PS_loadrubabs: + case L4_loadrub_ap: + case L4_loadrub_ur: return L2_loadrub_io; + case PS_loadrbabs: + case L4_loadrb_ap: + case L4_loadrb_ur: return L2_loadrb_io; + case PS_loadruhabs: + case L4_loadruh_ap: + case L4_loadruh_ur: return L2_loadruh_io; + case PS_loadrhabs: + case L4_loadrh_ap: + case L4_loadrh_ur: return L2_loadrh_io; + case PS_loadriabs: + case L4_loadri_ap: + case L4_loadri_ur: return L2_loadri_io; + case PS_loadrdabs: + case L4_loadrd_ap: + case L4_loadrd_ur: return L2_loadrd_io; + case L4_loadbzw2_ap: + case L4_loadbzw2_ur: return L2_loadbzw2_io; + case L4_loadbzw4_ap: + case L4_loadbzw4_ur: return L2_loadbzw4_io; + case L4_loadbsw2_ap: + case L4_loadbsw2_ur: return L2_loadbsw2_io; + case L4_loadbsw4_ap: + case L4_loadbsw4_ur: return L2_loadbsw4_io; + case L4_loadalignh_ap: + case L4_loadalignh_ur: return L2_loadalignh_io; + case L4_loadalignb_ap: + case L4_loadalignb_ur: return L2_loadalignb_io; + case L4_ploadrubt_abs: return L2_ploadrubt_io; + case L4_ploadrubf_abs: return L2_ploadrubf_io; + case L4_ploadrbt_abs: return L2_ploadrbt_io; + case L4_ploadrbf_abs: return L2_ploadrbf_io; + case L4_ploadruht_abs: return L2_ploadruht_io; + case L4_ploadruhf_abs: return L2_ploadruhf_io; + case L4_ploadrht_abs: return L2_ploadrht_io; + case L4_ploadrhf_abs: return L2_ploadrhf_io; + case L4_ploadrit_abs: return L2_ploadrit_io; + case L4_ploadrif_abs: return L2_ploadrif_io; + case L4_ploadrdt_abs: return L2_ploadrdt_io; + case L4_ploadrdf_abs: return L2_ploadrdf_io; + case PS_storerbabs: + case S4_storerb_ap: + case S4_storerb_ur: return S2_storerb_io; + case PS_storerhabs: + case S4_storerh_ap: + case S4_storerh_ur: return S2_storerh_io; + case PS_storerfabs: + case S4_storerf_ap: + case S4_storerf_ur: return S2_storerf_io; + case PS_storeriabs: + case S4_storeri_ap: + case S4_storeri_ur: return S2_storeri_io; + case PS_storerdabs: + case S4_storerd_ap: + case S4_storerd_ur: return S2_storerd_io; + case S4_pstorerbt_abs: return S2_pstorerbt_io; + case S4_pstorerbf_abs: return S2_pstorerbf_io; + case S4_pstorerht_abs: return S2_pstorerht_io; + case S4_pstorerhf_abs: return S2_pstorerhf_io; + case S4_pstorerft_abs: return S2_pstorerft_io; + case S4_pstorerff_abs: return S2_pstorerff_io; + case S4_pstorerit_abs: return S2_pstorerit_io; + case S4_pstorerif_abs: return S2_pstorerif_io; + case S4_pstorerdt_abs: return S2_pstorerdt_io; + case S4_pstorerdf_abs: return S2_pstorerdf_io; + default: + break; + } + break; + case HexagonII::BaseImmOffset: + if (!isStoreImmediate(ExtOpc)) + return ExtOpc; + break; + default: + break; + } + } + return 0; +} + +unsigned HCE::getDirectRegReplacement(unsigned ExtOpc) const { + switch (ExtOpc) { + case Hexagon::A2_addi: return Hexagon::A2_add; + case Hexagon::A2_andir: return Hexagon::A2_and; + case Hexagon::A2_combineii: return Hexagon::A4_combineri; + case Hexagon::A2_orir: return Hexagon::A2_or; + case Hexagon::A2_paddif: return Hexagon::A2_paddf; + case Hexagon::A2_paddit: return Hexagon::A2_paddt; + case Hexagon::A2_subri: return Hexagon::A2_sub; + case Hexagon::A2_tfrsi: return TargetOpcode::COPY; + case Hexagon::A4_cmpbeqi: return Hexagon::A4_cmpbeq; + case Hexagon::A4_cmpbgti: return Hexagon::A4_cmpbgt; + case Hexagon::A4_cmpbgtui: return Hexagon::A4_cmpbgtu; + case Hexagon::A4_cmpheqi: return Hexagon::A4_cmpheq; + case Hexagon::A4_cmphgti: return Hexagon::A4_cmphgt; + case Hexagon::A4_cmphgtui: return Hexagon::A4_cmphgtu; + case Hexagon::A4_combineii: return Hexagon::A4_combineir; + case Hexagon::A4_combineir: return TargetOpcode::REG_SEQUENCE; + case Hexagon::A4_combineri: return TargetOpcode::REG_SEQUENCE; + case Hexagon::A4_rcmpeqi: return Hexagon::A4_rcmpeq; + case Hexagon::A4_rcmpneqi: return Hexagon::A4_rcmpneq; + case Hexagon::C2_cmoveif: return Hexagon::A2_tfrpf; + case Hexagon::C2_cmoveit: return Hexagon::A2_tfrpt; + case Hexagon::C2_cmpeqi: return Hexagon::C2_cmpeq; + case Hexagon::C2_cmpgti: return Hexagon::C2_cmpgt; + case Hexagon::C2_cmpgtui: return Hexagon::C2_cmpgtu; + case Hexagon::C2_muxii: return Hexagon::C2_muxir; + case Hexagon::C2_muxir: return Hexagon::C2_mux; + case Hexagon::C2_muxri: return Hexagon::C2_mux; + case Hexagon::C4_cmpltei: return Hexagon::C4_cmplte; + case Hexagon::C4_cmplteui: return Hexagon::C4_cmplteu; + case Hexagon::C4_cmpneqi: return Hexagon::C4_cmpneq; + case Hexagon::M2_accii: return Hexagon::M2_acci; // T -> T + /* No M2_macsin */ + case Hexagon::M2_macsip: return Hexagon::M2_maci; // T -> T + case Hexagon::M2_mpysin: return Hexagon::M2_mpyi; + case Hexagon::M2_mpysip: return Hexagon::M2_mpyi; + case Hexagon::M2_mpysmi: return Hexagon::M2_mpyi; + case Hexagon::M2_naccii: return Hexagon::M2_nacci; // T -> T + case Hexagon::M4_mpyri_addi: return Hexagon::M4_mpyri_addr; + case Hexagon::M4_mpyri_addr: return Hexagon::M4_mpyrr_addr; // _ -> T + case Hexagon::M4_mpyrr_addi: return Hexagon::M4_mpyrr_addr; // _ -> T + case Hexagon::S4_addaddi: return Hexagon::M2_acci; // _ -> T + case Hexagon::S4_addi_asl_ri: return Hexagon::S2_asl_i_r_acc; // T -> T + case Hexagon::S4_addi_lsr_ri: return Hexagon::S2_lsr_i_r_acc; // T -> T + case Hexagon::S4_andi_asl_ri: return Hexagon::S2_asl_i_r_and; // T -> T + case Hexagon::S4_andi_lsr_ri: return Hexagon::S2_lsr_i_r_and; // T -> T + case Hexagon::S4_ori_asl_ri: return Hexagon::S2_asl_i_r_or; // T -> T + case Hexagon::S4_ori_lsr_ri: return Hexagon::S2_lsr_i_r_or; // T -> T + case Hexagon::S4_subaddi: return Hexagon::M2_subacc; // _ -> T + case Hexagon::S4_subi_asl_ri: return Hexagon::S2_asl_i_r_nac; // T -> T + case Hexagon::S4_subi_lsr_ri: return Hexagon::S2_lsr_i_r_nac; // T -> T + + // Store-immediates: + case Hexagon::S4_storeirbf_io: return Hexagon::S2_pstorerbf_io; + case Hexagon::S4_storeirb_io: return Hexagon::S2_storerb_io; + case Hexagon::S4_storeirbt_io: return Hexagon::S2_pstorerbt_io; + case Hexagon::S4_storeirhf_io: return Hexagon::S2_pstorerhf_io; + case Hexagon::S4_storeirh_io: return Hexagon::S2_storerh_io; + case Hexagon::S4_storeirht_io: return Hexagon::S2_pstorerht_io; + case Hexagon::S4_storeirif_io: return Hexagon::S2_pstorerif_io; + case Hexagon::S4_storeiri_io: return Hexagon::S2_storeri_io; + case Hexagon::S4_storeirit_io: return Hexagon::S2_pstorerit_io; + + default: + break; + } + return 0; +} + +// Return the allowable deviation from the current value of Rb which the +// instruction MI can accommodate. +// The instruction MI is a user of register Rb, which is defined via an +// extender. It may be possible for MI to be tweaked to work for a register +// defined with a slightly different value. For example +// ... = L2_loadrub_io Rb, 0 +// can be modifed to be +// ... = L2_loadrub_io Rb', 1 +// if Rb' = Rb-1. +OffsetRange HCE::getOffsetRange(Register Rb, const MachineInstr &MI) const { + unsigned Opc = MI.getOpcode(); + // Instructions that are constant-extended may be replaced with something + // else that no longer offers the same range as the original. + if (!isRegOffOpcode(Opc) || HII->isConstExtended(MI)) + return OffsetRange::zero(); + + if (Opc == Hexagon::A2_addi) { + const MachineOperand &Op1 = MI.getOperand(1), &Op2 = MI.getOperand(2); + if (Rb != Register(Op1) || !Op2.isImm()) + return OffsetRange::zero(); + OffsetRange R = { -(1<<15)+1, (1<<15)-1, 1 }; + return R.shift(Op2.getImm()); + } + + // HII::getBaseAndOffsetPosition returns the increment position as "offset". + if (HII->isPostIncrement(MI)) + return OffsetRange::zero(); + + const MCInstrDesc &D = HII->get(Opc); + assert(D.mayLoad() || D.mayStore()); + + unsigned BaseP, OffP; + if (!HII->getBaseAndOffsetPosition(MI, BaseP, OffP) || + Rb != Register(MI.getOperand(BaseP)) || + !MI.getOperand(OffP).isImm()) + return OffsetRange::zero(); + + uint64_t F = (D.TSFlags >> HexagonII::MemAccessSizePos) & + HexagonII::MemAccesSizeMask; + uint8_t A = HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F)); + unsigned L = Log2_32(A); + unsigned S = 10+L; // sint11_L + int32_t Min = -alignDown((1<= 0 ? 0 : -Off; + + OffsetRange R = { Min, Max, A }; + return R.shift(Off); +} + +// Return the allowable deviation from the current value of the extender ED, +// for which the instruction corresponding to ED can be modified without +// using an extender. +// The instruction uses the extender directly. It will be replaced with +// another instruction, say MJ, where the extender will be replaced with a +// register. MJ can allow some variability with respect to the value of +// that register, as is the case with indexed memory instructions. +OffsetRange HCE::getOffsetRange(const ExtDesc &ED) const { + // The only way that there can be a non-zero range available is if + // the instruction using ED will be converted to an indexed memory + // instruction. + unsigned IdxOpc = getRegOffOpcode(ED.UseMI->getOpcode()); + switch (IdxOpc) { + case 0: + return OffsetRange::zero(); + case Hexagon::A2_addi: // s16 + return { -32767, 32767, 1 }; + case Hexagon::A2_subri: // s10 + return { -511, 511, 1 }; + } + + if (!ED.UseMI->mayLoad() && !ED.UseMI->mayStore()) + return OffsetRange::zero(); + const MCInstrDesc &D = HII->get(IdxOpc); + uint64_t F = (D.TSFlags >> HexagonII::MemAccessSizePos) & + HexagonII::MemAccesSizeMask; + uint8_t A = HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F)); + unsigned L = Log2_32(A); + unsigned S = 10+L; // sint11_L + int32_t Min = -alignDown((1<use_operands(Rd.Reg)) { + // Make sure that the register being used by this operand is identical + // to the register that was defined: using a different subregister + // precludes any non-trivial range. + if (Rd != Register(Op)) + return OffsetRange::zero(); + Range.intersect(getOffsetRange(Rd, *Op.getParent())); + } + return Range; +} + +void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) { + unsigned Opc = MI.getOpcode(); + ExtDesc ED; + ED.OpNum = OpNum; + + bool IsLoad = MI.mayLoad(); + bool IsStore = MI.mayStore(); + + if (IsLoad || IsStore) { + unsigned AM = HII->getAddrMode(MI); + switch (AM) { + // (Re: ##Off + Rb<isConstExtended(MI)) + return; + + // Skip some non-convertible instructions. + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case Hexagon::M2_macsin: // There is no Rx -= mpyi(Rs,Rt). + case Hexagon::C4_addipc: + case Hexagon::S4_or_andi: + case Hexagon::S4_or_andix: + case Hexagon::S4_or_ori: + return; + } + recordExtender(MI, HII->getCExtOpNum(MI)); +} + +void HCE::collect(MachineFunction &MF) { + Extenders.clear(); + for (MachineBasicBlock &MBB : MF) + for (MachineInstr &MI : MBB) + collectInstr(MI); +} + +void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, + AssignmentMap &IMap) { + // Sanity check: make sure that all extenders in the range [Begin..End) + // share the same root ER. + for (unsigned I = Begin; I != End; ++I) + assert(ER == ExtRoot(Extenders[I].getOp())); + + // Construct the list of ranges, such that for each P in Ranges[I], + // a register Reg = ER+P can be used in place of Extender[I]. If the + // instruction allows, uses in the form of Reg+Off are considered + // (here, Off = required_value - P). + std::vector Ranges(End-Begin); + + // For each extender that is a def, visit all uses of the defined register, + // and produce an offset range that works for all uses. The def doesn't + // have to be checked, because it can become dead if all uses can be updated + // to use a different reg/offset. + for (unsigned I = Begin; I != End; ++I) { + const ExtDesc &ED = Extenders[I]; + if (!ED.IsDef) + continue; + ExtValue EV(ED); + DEBUG(dbgs() << " =" << I << ". " << EV << " " << ED << '\n'); + assert(ED.Rd.Reg != 0); + Ranges[I-Begin] = getOffsetRange(ED.Rd).shift(EV.Offset); + // A2_tfrsi is a special case: it will be replaced with A2_addi, which + // has a 16-bit signed offset. This means that A2_tfrsi not only has a + // range coming from its uses, but also from the fact that its replacement + // has a range as well. + if (ED.UseMI->getOpcode() == Hexagon::A2_tfrsi) { + int32_t D = alignDown(32767, Ranges[I-Begin].Align); // XXX hardcoded + Ranges[I-Begin].extendBy(-D).extendBy(D); + } + } + + // Visit all non-def extenders. For each one, determine the offset range + // available for it. + for (unsigned I = Begin; I != End; ++I) { + const ExtDesc &ED = Extenders[I]; + if (ED.IsDef) + continue; + ExtValue EV(ED); + DEBUG(dbgs() << " " << I << ". " << EV << " " << ED << '\n'); + OffsetRange Dev = getOffsetRange(ED); + Ranges[I-Begin].intersect(Dev.shift(EV.Offset)); + } + + // Here for each I there is a corresponding Range[I]. Construct the + // inverse map, that to each range will assign the set of indexes in + // [Begin..End) that this range corresponds to. + std::map RangeMap; + for (unsigned I = Begin; I != End; ++I) + RangeMap[Ranges[I-Begin]].insert(I); + + DEBUG({ + dbgs() << "Ranges\n"; + for (unsigned I = Begin; I != End; ++I) + dbgs() << " " << I << ". " << Ranges[I-Begin] << '\n'; + dbgs() << "RangeMap\n"; + for (auto &P : RangeMap) { + dbgs() << " " << P.first << " ->"; + for (unsigned I : P.second) + dbgs() << ' ' << I; + dbgs() << '\n'; + } + }); + + // Select the definition points, and generate the assignment between + // these points and the uses. + + // For each candidate offset, keep a pair CandData consisting of + // the total number of ranges containing that candidate, and the + // vector of corresponding RangeTree nodes. + using CandData = std::pair>; + std::map CandMap; + + RangeTree Tree; + for (const OffsetRange &R : Ranges) + Tree.add(R); + SmallVector Nodes; + Tree.order(Nodes); + + auto MaxAlign = [](const SmallVectorImpl &Nodes) { + uint8_t Align = 1; + for (RangeTree::Node *N : Nodes) + Align = std::max(Align, N->Range.Align); + return Align; + }; + + // Construct the set of all potential definition points from the endpoints + // of the ranges. If a given endpoint also belongs to a different range, + // but with a higher alignment, also consider the more-highly-aligned + // value of this endpoint. + std::set CandSet; + for (RangeTree::Node *N : Nodes) { + const OffsetRange &R = N->Range; + uint8_t A0 = MaxAlign(Tree.nodesWith(R.Min, false)); + CandSet.insert(R.Min); + if (R.Align < A0) + CandSet.insert(R.Min < 0 ? -alignDown(-R.Min, A0) : alignTo(R.Min, A0)); + uint8_t A1 = MaxAlign(Tree.nodesWith(R.Max, false)); + CandSet.insert(R.Max); + if (R.Align < A1) + CandSet.insert(R.Max < 0 ? -alignTo(-R.Max, A1) : alignDown(R.Max, A1)); + } + + // Build the assignment map: candidate C -> { list of extender indexes }. + // This has to be done iteratively: + // - pick the candidate that covers the maximum number of extenders, + // - add the candidate to the map, + // - remove the extenders from the pool. + while (true) { + using CMap = std::map; + CMap Counts; + for (auto It = CandSet.begin(), Et = CandSet.end(); It != Et; ) { + auto &&V = Tree.nodesWith(*It); + unsigned N = std::accumulate(V.begin(), V.end(), 0u, + [](unsigned Acc, const RangeTree::Node *N) { + return Acc + N->Count; + }); + if (N != 0) + Counts.insert({*It, N}); + It = (N != 0) ? std::next(It) : CandSet.erase(It); + } + if (Counts.empty()) + break; + + // Find the best candidate with respect to the number of extenders covered. + auto BestIt = std::max_element(Counts.begin(), Counts.end(), + [](const CMap::value_type &A, const CMap::value_type &B) { + return A.second < B.second || + (A.second == B.second && A < B); + }); + int32_t Best = BestIt->first; + ExtValue BestV(ER, Best); + for (RangeTree::Node *N : Tree.nodesWith(Best)) { + for (unsigned I : RangeMap[N->Range]) + IMap[{BestV,Extenders[I].Expr}].insert(I); + Tree.erase(N); + } + } + + DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap, *HRI)); + + // There is some ambiguity in what initializer should be used, if the + // descriptor's subexpression is non-trivial: it can be the entire + // subexpression (which is what has been done so far), or it can be + // the extender's value itself, if all corresponding extenders have the + // exact value of the initializer (i.e. require offset of 0). + + // To reduce the number of initializers, merge such special cases. + for (std::pair &P : IMap) { + // Skip trivial initializers. + if (P.first.second.trivial()) + continue; + // If the corresponding trivial initializer does not exist, skip this + // entry. + const ExtValue &EV = P.first.first; + AssignmentMap::iterator F = IMap.find({EV, ExtExpr()}); + if (F == IMap.end()) + continue; + // Finally, check if all extenders have the same value as the initializer. + auto SameValue = [&EV,this](unsigned I) { + const ExtDesc &ED = Extenders[I]; + return ExtValue(ED).Offset == EV.Offset; + }; + if (all_of(P.second, SameValue)) { + F->second.insert(P.second.begin(), P.second.end()); + P.second.clear(); + } + } + + DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap, *HRI)); +} + +void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs, + LocDefMap &Defs) { + if (Refs.empty()) + return; + + // The placement calculation is somewhat simple right now: it finds a + // single location for the def that dominates all refs. Since this may + // place the def far from the uses, producing several locations for + // defs that collectively dominate all refs could be better. + // For now only do the single one. + DenseSet Blocks; + DenseSet RefMIs; + const ExtDesc &ED0 = Extenders[Refs[0]]; + MachineBasicBlock *DomB = ED0.UseMI->getParent(); + RefMIs.insert(ED0.UseMI); + Blocks.insert(DomB); + for (unsigned i = 1, e = Refs.size(); i != e; ++i) { + const ExtDesc &ED = Extenders[Refs[i]]; + MachineBasicBlock *MBB = ED.UseMI->getParent(); + RefMIs.insert(ED.UseMI); + DomB = MDT->findNearestCommonDominator(DomB, MBB); + Blocks.insert(MBB); + } + +#ifndef NDEBUG + // The block DomB should be dominated by the def of each register used + // in the initializer. + Register Rs = ExtI.second.Rs; // Only one reg allowed now. + const MachineInstr *DefI = Rs.isVReg() ? MRI->getVRegDef(Rs.Reg) : nullptr; + + // This should be guaranteed given that the entire expression is used + // at each instruction in Refs. Add an assertion just in case. + assert(!DefI || MDT->dominates(DefI->getParent(), DomB)); +#endif + + MachineBasicBlock::iterator It; + if (Blocks.count(DomB)) { + // Try to find the latest possible location for the def. + MachineBasicBlock::iterator End = DomB->end(); + for (It = DomB->begin(); It != End; ++It) + if (RefMIs.count(&*It)) + break; + assert(It != End && "Should have found a ref in DomB"); + } else { + // DomB does not contain any refs. + It = DomB->getFirstTerminator(); + } + Loc DefLoc(DomB, It); + Defs.emplace(DefLoc, Refs); +} + +HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) { + unsigned DefR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass); + MachineBasicBlock &MBB = *DefL.Block; + MachineBasicBlock::iterator At = DefL.At; + DebugLoc dl = DefL.Block->findDebugLoc(DefL.At); + const ExtValue &EV = ExtI.first; + MachineOperand ExtOp(EV); + + const ExtExpr &Ex = ExtI.second; + const MachineInstr *InitI = nullptr; + + if (Ex.Rs.isSlot()) { + assert(Ex.S == 0 && "Cannot have a shift of a stack slot"); + assert(!Ex.Neg && "Cannot subtract a stack slot"); + // DefR = PS_fi Rb,##EV + InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::PS_fi), DefR) + .add(MachineOperand(Ex.Rs)) + .add(ExtOp); + } else { + assert((Ex.Rs.Reg == 0 || Ex.Rs.isVReg()) && "Expecting virtual register"); + if (Ex.trivial()) { + // DefR = ##EV + InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_tfrsi), DefR) + .add(ExtOp); + } else if (Ex.S == 0) { + if (Ex.Neg) { + // DefR = sub(##EV,Rb) + InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_subri), DefR) + .add(ExtOp) + .add(MachineOperand(Ex.Rs)); + } else { + // DefR = add(Rb,##EV) + InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_addi), DefR) + .add(MachineOperand(Ex.Rs)) + .add(ExtOp); + } + } else { + unsigned NewOpc = Ex.Neg ? Hexagon::S4_subi_asl_ri + : Hexagon::S4_addi_asl_ri; + // DefR = add(##EV,asl(Rb,S)) + InitI = BuildMI(MBB, At, dl, HII->get(NewOpc), DefR) + .add(ExtOp) + .add(MachineOperand(Ex.Rs)) + .addImm(Ex.S); + } + } + + assert(InitI); + (void)InitI; + DEBUG(dbgs() << "Inserted def in bb#" << MBB.getNumber() + << " for initializer: " << PrintInit(ExtI, *HRI) + << "\n " << *InitI); + return { DefR, 0 }; +} + +// Replace the extender at index Idx with the register ExtR. +bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) { + MachineInstr &MI = *ED.UseMI; + MachineBasicBlock &MBB = *MI.getParent(); + MachineBasicBlock::iterator At = MI.getIterator(); + DebugLoc dl = MI.getDebugLoc(); + unsigned ExtOpc = MI.getOpcode(); + + // With a few exceptions, direct replacement amounts to creating an + // instruction with a corresponding register opcode, with all operands + // the same, except for the register used in place of the extender. + unsigned RegOpc = getDirectRegReplacement(ExtOpc); + + if (RegOpc == TargetOpcode::REG_SEQUENCE) { + if (ExtOpc == Hexagon::A4_combineri) + BuildMI(MBB, At, dl, HII->get(RegOpc)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .addImm(Hexagon::isub_hi) + .add(MachineOperand(ExtR)) + .addImm(Hexagon::isub_lo); + else if (ExtOpc == Hexagon::A4_combineir) + BuildMI(MBB, At, dl, HII->get(RegOpc)) + .add(MI.getOperand(0)) + .add(MachineOperand(ExtR)) + .addImm(Hexagon::isub_hi) + .add(MI.getOperand(2)) + .addImm(Hexagon::isub_lo); + else + llvm_unreachable("Unexpected opcode became REG_SEQUENCE"); + MBB.erase(MI); + return true; + } + if (ExtOpc == Hexagon::C2_cmpgei || ExtOpc == Hexagon::C2_cmpgeui) { + unsigned NewOpc = ExtOpc == Hexagon::C2_cmpgei ? Hexagon::C2_cmplt + : Hexagon::C2_cmpltu; + BuildMI(MBB, At, dl, HII->get(NewOpc)) + .add(MI.getOperand(0)) + .add(MachineOperand(ExtR)) + .add(MI.getOperand(1)); + MBB.erase(MI); + return true; + } + + if (RegOpc != 0) { + MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(RegOpc)); + unsigned RegN = ED.OpNum; + // Copy all operands except the one that has the extender. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (i != RegN) + MIB.add(MI.getOperand(i)); + else + MIB.add(MachineOperand(ExtR)); + } + MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MBB.erase(MI); + return true; + } + + if ((MI.mayLoad() || MI.mayStore()) && !isStoreImmediate(ExtOpc)) { + // For memory instructions, there is an asymmetry in the addressing + // modes. Addressing modes allowing extenders can be replaced with + // addressing modes that use registers, but the order of operands + // (or even their number) may be different. + // Replacements: + // BaseImmOffset (io) -> BaseRegOffset (rr) + // BaseLongOffset (ur) -> BaseRegOffset (rr) + unsigned RegOpc, Shift; + unsigned AM = HII->getAddrMode(MI); + if (AM == HexagonII::BaseImmOffset) { + RegOpc = HII->changeAddrMode_io_rr(ExtOpc); + Shift = 0; + } else if (AM == HexagonII::BaseLongOffset) { + // Loads: Rd = L4_loadri_ur Rs, S, ## + // Stores: S4_storeri_ur Rs, S, ##, Rt + RegOpc = HII->changeAddrMode_ur_rr(ExtOpc); + Shift = MI.getOperand(MI.mayLoad() ? 2 : 1).getImm(); + } else { + llvm_unreachable("Unexpected addressing mode"); + } +#ifndef NDEBUG + if (RegOpc == -1u) { + dbgs() << "\nExtOpc: " << HII->getName(ExtOpc) << " has no rr version\n"; + llvm_unreachable("No corresponding rr instruction"); + } +#endif + + unsigned BaseP, OffP; + HII->getBaseAndOffsetPosition(MI, BaseP, OffP); + + // Build an rr instruction: (RegOff + RegBase<<0) + MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(RegOpc)); + // First, add the def for loads. + if (MI.mayLoad()) + MIB.add(getLoadResultOp(MI)); + // Handle possible predication. + if (HII->isPredicated(MI)) + MIB.add(getPredicateOp(MI)); + // Build the address. + MIB.add(MachineOperand(ExtR)); // RegOff + MIB.add(MI.getOperand(BaseP)); // RegBase + MIB.addImm(Shift); // << Shift + // Add the stored value for stores. + if (MI.mayStore()) + MIB.add(getStoredValueOp(MI)); + MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MBB.erase(MI); + return true; + } + +#ifndef NDEBUG + dbgs() << '\n' << MI; +#endif + llvm_unreachable("Unhandled exact replacement"); + return false; +} + +// Replace the extender ED with a form corresponding to the initializer ExtI. +bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI, + Register ExtR, int32_t &Diff) { + MachineInstr &MI = *ED.UseMI; + MachineBasicBlock &MBB = *MI.getParent(); + MachineBasicBlock::iterator At = MI.getIterator(); + DebugLoc dl = MI.getDebugLoc(); + unsigned ExtOpc = MI.getOpcode(); + + if (ExtOpc == Hexagon::A2_tfrsi) { + // A2_tfrsi is a special case: it's replaced with A2_addi, which introduces + // another range. One range is the one that's common to all tfrsi's uses, + // this one is the range of immediates in A2_addi. When calculating ranges, + // the addi's 16-bit argument was included, so now we need to make it such + // that the produced value is in the range for the uses alone. + // Most of the time, simply adding Diff will make the addi produce exact + // result, but if Diff is outside of the 16-bit range, some adjustment + // will be needed. + unsigned IdxOpc = getRegOffOpcode(ExtOpc); + assert(IdxOpc == Hexagon::A2_addi); + + // Clamp Diff to the 16 bit range. + int32_t D = isInt<16>(Diff) ? Diff : (Diff > 32767 ? 32767 : -32767); + BuildMI(MBB, At, dl, HII->get(IdxOpc)) + .add(MI.getOperand(0)) + .add(MachineOperand(ExtR)) + .addImm(D); + Diff -= D; +#ifndef NDEBUG + // Make sure the output is within allowable range for uses. + OffsetRange Uses = getOffsetRange(MI.getOperand(0)); + if (!Uses.contains(Diff)) + dbgs() << "Diff: " << Diff << " out of range " << Uses + << " for " << MI; + assert(Uses.contains(Diff)); +#endif + MBB.erase(MI); + return true; + } + + const ExtValue &EV = ExtI.first; (void)EV; + const ExtExpr &Ex = ExtI.second; (void)Ex; + + if (ExtOpc == Hexagon::A2_addi || ExtOpc == Hexagon::A2_subri) { + // If addi/subri are replaced with the exactly matching initializer, + // they amount to COPY. + // Check that the initializer is an exact match (for simplicity). +#ifndef NDEBUG + bool IsAddi = ExtOpc == Hexagon::A2_addi; + const MachineOperand &RegOp = MI.getOperand(IsAddi ? 1 : 2); + const MachineOperand &ImmOp = MI.getOperand(IsAddi ? 2 : 1); + assert(Ex.Rs == RegOp && EV == ImmOp && Ex.Neg != IsAddi && + "Initializer mismatch"); +#endif + BuildMI(MBB, At, dl, HII->get(TargetOpcode::COPY)) + .add(MI.getOperand(0)) + .add(MachineOperand(ExtR)); + Diff = 0; + MBB.erase(MI); + return true; + } + if (ExtOpc == Hexagon::M2_accii || ExtOpc == Hexagon::M2_naccii || + ExtOpc == Hexagon::S4_addaddi || ExtOpc == Hexagon::S4_subaddi) { + // M2_accii: add(Rt,add(Rs,V)) (tied) + // M2_naccii: sub(Rt,add(Rs,V)) + // S4_addaddi: add(Rt,add(Rs,V)) + // S4_subaddi: add(Rt,sub(V,Rs)) + // Check that Rs and V match the initializer expression. The Rs+V is the + // combination that is considered "subexpression" for V, although Rx+V + // would also be valid. +#ifndef NDEBUG + bool IsSub = ExtOpc == Hexagon::S4_subaddi; + Register Rs = MI.getOperand(IsSub ? 3 : 2); + ExtValue V = MI.getOperand(IsSub ? 2 : 3); + assert(EV == V && Rs == Ex.Rs && IsSub == Ex.Neg && "Initializer mismatch"); +#endif + unsigned NewOpc = ExtOpc == Hexagon::M2_naccii ? Hexagon::A2_sub + : Hexagon::A2_add; + BuildMI(MBB, At, dl, HII->get(NewOpc)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MachineOperand(ExtR)); + MBB.erase(MI); + return true; + } + + if (MI.mayLoad() || MI.mayStore()) { + unsigned IdxOpc = getRegOffOpcode(ExtOpc); + assert(IdxOpc && "Expecting indexed opcode"); + MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(IdxOpc)); + // Construct the new indexed instruction. + // First, add the def for loads. + if (MI.mayLoad()) + MIB.add(getLoadResultOp(MI)); + // Handle possible predication. + if (HII->isPredicated(MI)) + MIB.add(getPredicateOp(MI)); + // Build the address. + MIB.add(MachineOperand(ExtR)); + MIB.addImm(Diff); + // Add the stored value for stores. + if (MI.mayStore()) + MIB.add(getStoredValueOp(MI)); + MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MBB.erase(MI); + return true; + } + +#ifndef NDEBUG + dbgs() << '\n' << PrintInit(ExtI, *HRI) << " " << MI; +#endif + llvm_unreachable("Unhandled expr replacement"); + return false; +} + +bool HCE::replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI) { + if (ReplaceLimit.getNumOccurrences()) { + if (ReplaceLimit <= ReplaceCounter) + return false; + ++ReplaceCounter; + } + const ExtDesc &ED = Extenders[Idx]; + assert((!ED.IsDef || ED.Rd.Reg != 0) && "Missing Rd for def"); + const ExtValue &DefV = ExtI.first; + assert(ExtRoot(ExtValue(ED)) == ExtRoot(DefV) && "Extender root mismatch"); + const ExtExpr &DefEx = ExtI.second; + + ExtValue EV(ED); + int32_t Diff = EV.Offset - DefV.Offset; + const MachineInstr &MI = *ED.UseMI; + DEBUG(dbgs() << __func__ << " Idx:" << Idx << " ExtR:" + << PrintRegister(ExtR, *HRI) << " Diff:" << Diff << '\n'); + + // These two addressing modes must be converted into indexed forms + // regardless of what the initializer looks like. + bool IsAbs = false, IsAbsSet = false; + if (MI.mayLoad() || MI.mayStore()) { + unsigned AM = HII->getAddrMode(MI); + IsAbs = AM == HexagonII::Absolute; + IsAbsSet = AM == HexagonII::AbsoluteSet; + } + + // If it's a def, remember all operands that need to be updated. + // If ED is a def, and Diff is not 0, then all uses of the register Rd + // defined by ED must be in the form (Rd, imm), i.e. the immediate offset + // must follow the Rd in the operand list. + std::vector> RegOps; + if (ED.IsDef && Diff != 0) { + for (MachineOperand &Op : MRI->use_operands(ED.Rd.Reg)) { + MachineInstr &UI = *Op.getParent(); + RegOps.push_back({&UI, getOperandIndex(UI, Op)}); + } + } + + // Replace the instruction. + bool Replaced = false; + if (Diff == 0 && DefEx.trivial() && !IsAbs && !IsAbsSet) + Replaced = replaceInstrExact(ED, ExtR); + else + Replaced = replaceInstrExpr(ED, ExtI, ExtR, Diff); + + if (Diff != 0 && Replaced && ED.IsDef) { + // Update offsets of the def's uses. + for (std::pair P : RegOps) { + unsigned J = P.second; + assert(P.first->getNumOperands() < J+1 && + P.first->getOperand(J+1).isImm()); + MachineOperand &ImmOp = P.first->getOperand(J+1); + ImmOp.setImm(ImmOp.getImm() + Diff); + } + // If it was an absolute-set instruction, the "set" part has been removed. + // ExtR will now be the register with the extended value, and since all + // users of Rd have been updated, all that needs to be done is to replace + // Rd with ExtR. + if (IsAbsSet) { + assert(ED.Rd.Sub == 0 && ExtR.Sub == 0); + MRI->replaceRegWith(ED.Rd.Reg, ExtR.Reg); + } + } + + return Replaced; +} + +bool HCE::replaceExtenders(const AssignmentMap &IMap) { + LocDefMap Defs; + bool Changed = false; + + for (const std::pair &P : IMap) { + const IndexList &Idxs = P.second; + if (Idxs.size() < CountThreshold) + continue; + + Defs.clear(); + calculatePlacement(P.first, Idxs, Defs); + for (const std::pair &Q : Defs) { + Register DefR = insertInitializer(Q.first, P.first); + NewRegs.push_back(DefR.Reg); + for (unsigned I : Q.second) + Changed |= replaceInstr(I, DefR, P.first); + } + } + return Changed; +} + +unsigned HCE::getOperandIndex(const MachineInstr &MI, + const MachineOperand &Op) const { + for (unsigned i = 0, n = MI.getNumOperands(); i != n; ++i) + if (&MI.getOperand(i) == &Op) + return i; + llvm_unreachable("Not an operand of MI"); +} + +const MachineOperand &HCE::getPredicateOp(const MachineInstr &MI) const { + assert(HII->isPredicated(MI)); + for (const MachineOperand &Op : MI.operands()) { + if (!Op.isReg() || !Op.isUse() || + MRI->getRegClass(Op.getReg()) != &Hexagon::PredRegsRegClass) + continue; + assert(Op.getSubReg() == 0 && "Predicate register with a subregister"); + return Op; + } + llvm_unreachable("Predicate operand not found"); +} + +const MachineOperand &HCE::getLoadResultOp(const MachineInstr &MI) const { + assert(MI.mayLoad()); + return MI.getOperand(0); +} + +const MachineOperand &HCE::getStoredValueOp(const MachineInstr &MI) const { + assert(MI.mayStore()); + return MI.getOperand(MI.getNumExplicitOperands()-1); +} + +bool HCE::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr)); + + HII = MF.getSubtarget().getInstrInfo(); + HRI = MF.getSubtarget().getRegisterInfo(); + MDT = &getAnalysis(); + MRI = &MF.getRegInfo(); + AssignmentMap IMap; + + collect(MF); + std::sort(Extenders.begin(), Extenders.end(), + [](const ExtDesc &A, const ExtDesc &B) { + return ExtValue(A) < ExtValue(B); + }); + + bool Changed = false; + DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n"); + for (unsigned I = 0, E = Extenders.size(); I != E; ) { + unsigned B = I; + const ExtRoot &T = Extenders[B].getOp(); + while (I != E && ExtRoot(Extenders[I].getOp()) == T) + ++I; + + IMap.clear(); + assignInits(T, B, I, IMap); + Changed |= replaceExtenders(IMap); + } + + DEBUG({ + if (Changed) + MF.print(dbgs() << "After " << getPassName() << '\n', nullptr); + else + dbgs() << "No changes\n"; + }); + return Changed; +} + +FunctionPass *llvm::createHexagonConstExtenders() { + return new HexagonConstExtenders(); +} diff --git a/lib/Target/Hexagon/HexagonDepArch.h b/lib/Target/Hexagon/HexagonDepArch.h index 1009aa39cefb9..92573d3313267 100644 --- a/lib/Target/Hexagon/HexagonDepArch.h +++ b/lib/Target/Hexagon/HexagonDepArch.h @@ -7,4 +7,11 @@ // //===----------------------------------------------------------------------===// -enum HexagonArchEnum { V4,V5,V55,V60,V62 }; +#ifndef HEXAGON_DEP_ARCH_H +#define HEXAGON_DEP_ARCH_H +namespace llvm { +namespace Hexagon { +enum class ArchEnum { V4, V5, V55, V60, V62 }; +} // namespace Hexagon +} // namespace llvm +#endif // HEXAGON_DEP_ARCH_H diff --git a/lib/Target/Hexagon/HexagonDepArch.td b/lib/Target/Hexagon/HexagonDepArch.td index 5b1d02c136f02..98403956e6ad1 100644 --- a/lib/Target/Hexagon/HexagonDepArch.td +++ b/lib/Target/Hexagon/HexagonDepArch.td @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// -def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "V62", "Enable Hexagon V62 architecture">; +def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "Hexagon::ArchEnum::V62", "Enable Hexagon V62 architecture">; def HasV62T : Predicate<"HST->hasV62TOps()">, AssemblerPredicate<"ArchV62">; -def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Enable Hexagon V60 architecture">; +def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V60", "Enable Hexagon V60 architecture">; def HasV60T : Predicate<"HST->hasV60TOps()">, AssemblerPredicate<"ArchV60">; -def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Enable Hexagon V55 architecture">; +def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "Hexagon::ArchEnum::V55", "Enable Hexagon V55 architecture">; def HasV55T : Predicate<"HST->hasV55TOps()">, AssemblerPredicate<"ArchV55">; -def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "V4", "Enable Hexagon V4 architecture">; +def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "Hexagon::ArchEnum::V4", "Enable Hexagon V4 architecture">; def HasV4T : Predicate<"HST->hasV4TOps()">, AssemblerPredicate<"ArchV4">; -def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "V5", "Enable Hexagon V5 architecture">; +def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "Hexagon::ArchEnum::V5", "Enable Hexagon V5 architecture">; def HasV5T : Predicate<"HST->hasV5TOps()">, AssemblerPredicate<"ArchV5">; diff --git a/lib/Target/Hexagon/HexagonDepDecoders.h b/lib/Target/Hexagon/HexagonDepDecoders.h deleted file mode 100644 index aa9787ecf0c84..0000000000000 --- a/lib/Target/Hexagon/HexagonDepDecoders.h +++ /dev/null @@ -1,64 +0,0 @@ -//===--- HexagonDepDecoders.h ---------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<4>(MI, tmp, Decoder); - return MCDisassembler::Success; -} -static DecodeStatus s29_3ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<14>(MI, tmp, Decoder); - return MCDisassembler::Success; -} -static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<8>(MI, tmp, Decoder); - return MCDisassembler::Success; -} -static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<7>(MI, tmp, Decoder); - return MCDisassembler::Success; -} -static DecodeStatus s31_1ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<12>(MI, tmp, Decoder); - return MCDisassembler::Success; -} -static DecodeStatus s3_0ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<3>(MI, tmp, Decoder); - return MCDisassembler::Success; -} -static DecodeStatus s30_2ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<13>(MI, tmp, Decoder); - return MCDisassembler::Success; -} -static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<6>(MI, tmp, Decoder); - return MCDisassembler::Success; -} -static DecodeStatus s6_3ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<9>(MI, tmp, Decoder); - return MCDisassembler::Success; -} -static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<5>(MI, tmp, Decoder); - return MCDisassembler::Success; -} -static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, - uint64_t, const void *Decoder) { - signedDecoder<6>(MI, tmp, Decoder); - return MCDisassembler::Success; -} diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td index 9d36b2d263b00..e42229fd57a51 100644 --- a/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -26219,6 +26219,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26242,7 +26243,7 @@ def V6_vL32b_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii)", -tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26253,13 +26254,15 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isCVLoadable = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_cur_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii)", -tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -26270,13 +26273,15 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_cur_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_cur_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26288,13 +26293,14 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_cur_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_cur_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26307,6 +26313,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_cur_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26314,7 +26321,7 @@ def V6_vL32b_cur_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26326,6 +26333,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_cur_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26333,7 +26341,7 @@ def V6_vL32b_cur_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii)", -tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -26344,6 +26352,8 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_cur_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26351,7 +26361,7 @@ def V6_vL32b_cur_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2)", -tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -26361,6 +26371,8 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_cur_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26368,7 +26380,7 @@ def V6_vL32b_cur_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii)", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26379,13 +26391,14 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_cur_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_cur_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26397,6 +26410,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_cur_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26404,7 +26418,7 @@ def V6_vL32b_cur_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26415,6 +26429,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_cur_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26422,7 +26437,7 @@ def V6_vL32b_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii)", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b011; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -26433,13 +26448,14 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -26451,6 +26467,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26458,7 +26475,7 @@ def V6_vL32b_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000011; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -26469,6 +26486,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26476,7 +26494,7 @@ def V6_vL32b_nt_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii):nt", -tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -26488,13 +26506,15 @@ let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; let isCVLoadable = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_nt_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii):nt", -tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { +tc_b712833a, TypeCVI_VM_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -26506,13 +26526,15 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_nt_cur_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b101; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26525,13 +26547,14 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_cur_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26545,6 +26568,7 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_cur_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26552,7 +26576,7 @@ def V6_vL32b_nt_cur_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000101; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26565,6 +26589,7 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_cur_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26572,7 +26597,7 @@ def V6_vL32b_nt_cur_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii):nt", -tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -26584,6 +26609,8 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_nt_cur_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26591,7 +26618,7 @@ def V6_vL32b_nt_cur_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.cur = vmem($Rx32++$Mu2):nt", -tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{12-5} = 0b00000001; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -26602,6 +26629,8 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_nt_cur_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26609,7 +26638,7 @@ def V6_vL32b_nt_cur_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rt32+#$Ii):nt", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b100; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26621,13 +26650,14 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_cur_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_cur_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.cur = vmem($Rx32++#$Ii):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26640,6 +26670,7 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_cur_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26647,7 +26678,7 @@ def V6_vL32b_nt_cur_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.cur = vmem($Rx32++$Mu2):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000100; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26659,6 +26690,7 @@ let isCVLoad = 1; let CVINew = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_cur_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26666,7 +26698,7 @@ def V6_vL32b_nt_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rt32+#$Ii):nt", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b011; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26678,13 +26710,14 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32 = vmem($Rx32++#$Ii):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26697,6 +26730,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26704,7 +26738,7 @@ def V6_vL32b_nt_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32 = vmem($Rx32++$Mu2):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000011; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26716,6 +26750,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26723,7 +26758,7 @@ def V6_vL32b_nt_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii):nt", -tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -26735,6 +26770,8 @@ let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; let isCVLoadable = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_nt_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26742,7 +26779,7 @@ def V6_vL32b_nt_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmem($Rx32++$Mu2):nt", -tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -26753,6 +26790,8 @@ let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; let isCVLoadable = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_nt_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26760,7 +26799,7 @@ def V6_vL32b_nt_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rt32+#$Ii):nt", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b010; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26771,13 +26810,14 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rx32++#$Ii):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26789,6 +26829,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26796,7 +26837,7 @@ def V6_vL32b_nt_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32 = vmem($Rx32++$Mu2):nt", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000010; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26807,6 +26848,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26814,7 +26856,7 @@ def V6_vL32b_nt_tmp_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii):nt", -tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { +tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -26825,13 +26867,15 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_nt_tmp_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_tmp_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt", -tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26843,13 +26887,14 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_tmp_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_tmp_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26862,6 +26907,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_tmp_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26869,7 +26915,7 @@ def V6_vL32b_nt_tmp_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26881,6 +26927,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_tmp_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26888,7 +26935,7 @@ def V6_vL32b_nt_tmp_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii):nt", -tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -26899,6 +26946,8 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_nt_tmp_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26906,7 +26955,7 @@ def V6_vL32b_nt_tmp_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.tmp = vmem($Rx32++$Mu2):nt", -tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{12-5} = 0b00000010; let Inst{31-21} = 0b00101011010; let hasNewValue = 1; @@ -26916,6 +26965,8 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_nt_tmp_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26923,7 +26974,7 @@ def V6_vL32b_nt_tmp_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii):nt", -tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000110; let isPredicated = 1; @@ -26934,13 +26985,14 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_tmp_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_nt_tmp_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii):nt", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001110; @@ -26952,6 +27004,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_tmp_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26959,7 +27012,7 @@ def V6_vL32b_nt_tmp_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2):nt", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011110; let isPredicated = 1; @@ -26970,6 +27023,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isNonTemporal = 1; +let BaseOpcode = "V6_vL32b_nt_tmp_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26977,7 +27031,7 @@ def V6_vL32b_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii)", -tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -26988,6 +27042,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isCVLoadable = 1; +let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -26995,7 +27050,7 @@ def V6_vL32b_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32 = vmem($Rx32++$Mu2)", -tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { +tc_eb669007, TypeCVI_VM_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{12-5} = 0b00000000; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -27005,6 +27060,8 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; let isCVLoadable = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27012,7 +27069,7 @@ def V6_vL32b_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rt32+#$Ii)", -tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_5cbf490b, TypeCVI_VM_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b010; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -27022,13 +27079,14 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32 = vmem($Rx32++#$Ii)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -27039,6 +27097,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27046,7 +27105,7 @@ def V6_vL32b_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32 = vmem($Rx32++$Mu2)", -tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_da979fb3, TypeCVI_VM_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000010; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -27056,6 +27115,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27063,7 +27123,7 @@ def V6_vL32b_tmp_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii)", -tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]> { +tc_77a4c701, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -27073,13 +27133,15 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_tmp_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_tmp_npred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)", -tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b111; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -27090,13 +27152,14 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_tmp_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_tmp_npred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -27108,6 +27171,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_tmp_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27115,7 +27179,7 @@ def V6_vL32b_tmp_npred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if (!$Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000111; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -27126,6 +27190,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_tmp_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27133,7 +27198,7 @@ def V6_vL32b_tmp_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii)", -tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -27143,6 +27208,8 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_tmp_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27150,7 +27217,7 @@ def V6_vL32b_tmp_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, ModRegs:$Mu2), "$Vd32.tmp = vmem($Rx32++$Mu2)", -tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]> { +tc_9c267309, TypeCVI_VM_TMP_LD>, Enc_2ebe3b, Requires<[HasV60T,UseHVX]>, PredRel { let Inst{12-5} = 0b00000010; let Inst{31-21} = 0b00101011000; let hasNewValue = 1; @@ -27159,6 +27226,8 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let isPredicable = 1; +let BaseOpcode = "V6_vL32b_tmp_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27166,7 +27235,7 @@ def V6_vL32b_tmp_pred_ai : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32, s4_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rt32+#$Ii)", -tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]> { +tc_51cd3aab, TypeCVI_VM_TMP_LD>, Enc_8d8a30, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b110; let Inst{31-21} = 0b00101000100; let isPredicated = 1; @@ -27176,13 +27245,14 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_tmp_ai"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32b_tmp_pred_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii), "if ($Pv4) $Vd32.tmp = vmem($Rx32++#$Ii)", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_58a8bf, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001100; @@ -27193,6 +27263,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_tmp_pi"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27200,7 +27271,7 @@ def V6_vL32b_tmp_pred_ppu : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2), "if ($Pv4) $Vd32.tmp = vmem($Rx32++$Mu2)", -tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]> { +tc_38208312, TypeCVI_VM_TMP_LD>, Enc_f8c1c4, Requires<[HasV62T,UseHVX]>, PredRel { let Inst{10-5} = 0b000110; let Inst{31-21} = 0b00101011100; let isPredicated = 1; @@ -27210,6 +27281,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let mayLoad = 1; +let BaseOpcode = "V6_vL32b_tmp_ppu"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -28109,7 +28181,7 @@ def V6_vS32b_pred_pi : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "if ($Pv4) vmem($Rx32++#$Ii) = $Vs32", -tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]> { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_865390, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00101001101; @@ -28126,13 +28198,14 @@ def V6_vS32b_pred_ppu : HInst< (outs IntRegs:$Rx32), (ins PredRegs:$Pv4, IntRegs:$Rx32in, ModRegs:$Mu2, HvxVR:$Vs32), "if ($Pv4) vmem($Rx32++$Mu2) = $Vs32", -tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]> { +tc_0317c6ca, TypeCVI_VM_ST>, Enc_1ef990, Requires<[HasV60T,UseHVX]>, NewValueRel { let Inst{10-5} = 0b000000; let Inst{31-21} = 0b00101011101; let isPredicated = 1; let addrMode = PostInc; let accessSize = HVXVectorAccess; let mayStore = 1; +let BaseOpcode = "V6_vS32b_ppu"; let isNVStorable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; diff --git a/lib/Target/Hexagon/HexagonDepTimingClasses.h b/lib/Target/Hexagon/HexagonDepTimingClasses.h index 52963034543d8..2a3fb832733b5 100644 --- a/lib/Target/Hexagon/HexagonDepTimingClasses.h +++ b/lib/Target/Hexagon/HexagonDepTimingClasses.h @@ -6,8 +6,14 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +#ifndef TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H +#define TARGET_HEXAGON_HEXAGON_DEP_TIMING_CLASSES_H -static bool is_TC3x(unsigned SchedClass) { +#include "HexagonInstrInfo.h" + +namespace llvm { + +inline bool is_TC3x(unsigned SchedClass) { switch (SchedClass) { case Hexagon::Sched::tc_1000eb10: case Hexagon::Sched::tc_2aaab1e0: @@ -30,7 +36,7 @@ static bool is_TC3x(unsigned SchedClass) { } } -static bool is_TC2early(unsigned SchedClass) { +inline bool is_TC2early(unsigned SchedClass) { switch (SchedClass) { case Hexagon::Sched::tc_35fb9d13: case Hexagon::Sched::tc_cbe45117: @@ -40,7 +46,7 @@ static bool is_TC2early(unsigned SchedClass) { } } -static bool is_TC4x(unsigned SchedClass) { +inline bool is_TC4x(unsigned SchedClass) { switch (SchedClass) { case Hexagon::Sched::tc_09c86199: case Hexagon::Sched::tc_2d1e6f5c: @@ -54,7 +60,7 @@ static bool is_TC4x(unsigned SchedClass) { } } -static bool is_TC2(unsigned SchedClass) { +inline bool is_TC2(unsigned SchedClass) { switch (SchedClass) { case Hexagon::Sched::tc_090485bb: case Hexagon::Sched::tc_1fe8323c: @@ -86,7 +92,7 @@ static bool is_TC2(unsigned SchedClass) { } } -static bool is_TC1(unsigned SchedClass) { +inline bool is_TC1(unsigned SchedClass) { switch (SchedClass) { case Hexagon::Sched::tc_07ac815d: case Hexagon::Sched::tc_1b6011fb: @@ -130,3 +136,6 @@ static bool is_TC1(unsigned SchedClass) { return false; } } +} // namespace llvm + +#endif diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp index 7c6de6d513e8d..08a016b74650f 100644 --- a/lib/Target/Hexagon/HexagonGenExtract.cpp +++ b/lib/Target/Hexagon/HexagonGenExtract.cpp @@ -1,4 +1,4 @@ -//===--- HexagonGenExtract.cpp --------------------------------------------===// +//===- HexagonGenExtract.cpp ----------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/APInt.h" -#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/GraphTraits.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -47,8 +47,8 @@ static cl::opt NeedAnd("extract-needand", cl::init(true), cl::Hidden, namespace llvm { - void initializeHexagonGenExtractPass(PassRegistry&); - FunctionPass *createHexagonGenExtract(); +void initializeHexagonGenExtractPass(PassRegistry&); +FunctionPass *createHexagonGenExtract(); } // end namespace llvm @@ -58,7 +58,7 @@ namespace { public: static char ID; - HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) { + HexagonGenExtract() : FunctionPass(ID) { initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry()); } @@ -78,14 +78,14 @@ namespace { bool visitBlock(BasicBlock *B); bool convert(Instruction *In); - unsigned ExtractCount; + unsigned ExtractCount = 0; DominatorTree *DT; }; - char HexagonGenExtract::ID = 0; - } // end anonymous namespace +char HexagonGenExtract::ID = 0; + INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate " "\"extract\" instructions", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index d504bf810fac1..c1998518114ac 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -111,9 +111,7 @@ namespace { public: static char ID; - HexagonHardwareLoops() : MachineFunctionPass(ID) { - initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry()); - } + HexagonHardwareLoops() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -513,8 +511,8 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, int64_t IVBump) const { Comparison::Kind Cmp = (Comparison::Kind)0; switch (CondOpc) { - case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpeqp: Cmp = Comparison::EQ; break; @@ -522,21 +520,35 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, case Hexagon::C4_cmpneqi: Cmp = Comparison::NE; break; + case Hexagon::C2_cmplt: + Cmp = Comparison::LTs; + break; + case Hexagon::C2_cmpltu: + Cmp = Comparison::LTu; + break; case Hexagon::C4_cmplte: + case Hexagon::C4_cmpltei: Cmp = Comparison::LEs; break; case Hexagon::C4_cmplteu: + case Hexagon::C4_cmplteui: Cmp = Comparison::LEu; break; - case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtp: + Cmp = Comparison::GTs; + break; case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtui: case Hexagon::C2_cmpgtup: Cmp = Comparison::GTu; break; - case Hexagon::C2_cmpgti: - case Hexagon::C2_cmpgt: - case Hexagon::C2_cmpgtp: - Cmp = Comparison::GTs; + case Hexagon::C2_cmpgei: + Cmp = Comparison::GEs; + break; + case Hexagon::C2_cmpgeui: + Cmp = Comparison::GEs; break; default: return (Comparison::Kind)0; @@ -685,15 +697,21 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, if (InitialValue->isReg()) { unsigned R = InitialValue->getReg(); MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); - if (!MDT->properlyDominates(DefBB, Header)) - return nullptr; + if (!MDT->properlyDominates(DefBB, Header)) { + int64_t V; + if (!checkForImmediate(*InitialValue, V)) + return nullptr; + } OldInsts.push_back(MRI->getVRegDef(R)); } if (EndValue->isReg()) { unsigned R = EndValue->getReg(); MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); - if (!MDT->properlyDominates(DefBB, Header)) - return nullptr; + if (!MDT->properlyDominates(DefBB, Header)) { + int64_t V; + if (!checkForImmediate(*EndValue, V)) + return nullptr; + } OldInsts.push_back(MRI->getVRegDef(R)); } diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 74405374665c4..b23da692498e5 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -43,6 +43,9 @@ cl::opt RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced")); +static cl::opt CheckSingleUse("hexagon-isel-su", cl::Hidden, + cl::init(true), cl::desc("Enable checking of SDNode's single-use status")); + //===----------------------------------------------------------------------===// // Instruction Selector Implementation //===----------------------------------------------------------------------===// @@ -82,10 +85,19 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { // Complex Pattern Selectors. inline bool SelectAddrGA(SDValue &N, SDValue &R); inline bool SelectAddrGP(SDValue &N, SDValue &R); - bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP); + inline bool SelectAnyImm(SDValue &N, SDValue &R); + inline bool SelectAnyInt(SDValue &N, SDValue &R); + bool SelectAnyImmediate(SDValue &N, SDValue &R, uint32_t LogAlign); + bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP, + uint32_t LogAlign); bool SelectAddrFI(SDValue &N, SDValue &R); bool DetectUseSxtw(SDValue &N, SDValue &R); + inline bool SelectAnyImm0(SDValue &N, SDValue &R); + inline bool SelectAnyImm1(SDValue &N, SDValue &R); + inline bool SelectAnyImm2(SDValue &N, SDValue &R); + inline bool SelectAnyImm3(SDValue &N, SDValue &R); + StringRef getPassName() const override { return "Hexagon DAG->DAG Pattern Instruction Selection"; } @@ -126,6 +138,7 @@ class HexagonDAGToDAGISel : public SelectionDAGISel { bool isAlignedMemNode(const MemSDNode *N) const; bool isSmallStackStore(const StoreSDNode *N) const; bool isPositiveHalfWord(const SDNode *N) const; + bool hasOneUse(const SDNode *N) const; // DAG preprocessing functions. void ppSimplifyOrSelect0(std::vector &&Nodes); @@ -1250,15 +1263,88 @@ bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) { } inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) { - return SelectGlobalAddress(N, R, false); + return SelectGlobalAddress(N, R, false, 0); } inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) { - return SelectGlobalAddress(N, R, true); + return SelectGlobalAddress(N, R, true, 0); +} + +inline bool HexagonDAGToDAGISel::SelectAnyImm(SDValue &N, SDValue &R) { + return SelectAnyImmediate(N, R, 0); +} + +inline bool HexagonDAGToDAGISel::SelectAnyImm0(SDValue &N, SDValue &R) { + return SelectAnyImmediate(N, R, 0); +} +inline bool HexagonDAGToDAGISel::SelectAnyImm1(SDValue &N, SDValue &R) { + return SelectAnyImmediate(N, R, 1); +} +inline bool HexagonDAGToDAGISel::SelectAnyImm2(SDValue &N, SDValue &R) { + return SelectAnyImmediate(N, R, 2); +} +inline bool HexagonDAGToDAGISel::SelectAnyImm3(SDValue &N, SDValue &R) { + return SelectAnyImmediate(N, R, 3); +} + +inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) { + EVT T = N.getValueType(); + if (!T.isInteger() || T.getSizeInBits() != 32 || !isa(N)) + return false; + R = N; + return true; +} + +bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R, + uint32_t LogAlign) { + auto IsAligned = [LogAlign] (uint64_t V) -> bool { + return alignTo(V, (uint64_t)1 << LogAlign) == V; + }; + + switch (N.getOpcode()) { + case ISD::Constant: { + if (N.getValueType() != MVT::i32) + return false; + int32_t V = cast(N)->getZExtValue(); + if (!IsAligned(V)) + return false; + R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType()); + return true; + } + case HexagonISD::JT: + case HexagonISD::CP: + // These are assumed to always be aligned at at least 8-byte boundary. + if (LogAlign > 3) + return false; + R = N.getOperand(0); + return true; + case ISD::ExternalSymbol: + // Symbols may be aligned at any boundary. + if (LogAlign > 0) + return false; + R = N; + return true; + case ISD::BlockAddress: + // Block address is always aligned at at least 4-byte boundary. + if (LogAlign > 2 || !IsAligned(cast(N)->getOffset())) + return false; + R = N; + return true; + } + + if (SelectGlobalAddress(N, R, false, LogAlign) || + SelectGlobalAddress(N, R, true, LogAlign)) + return true; + + return false; } bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, - bool UseGP) { + bool UseGP, uint32_t LogAlign) { + auto IsAligned = [LogAlign] (uint64_t V) -> bool { + return alignTo(V, (uint64_t)1 << LogAlign) == V; + }; + switch (N.getOpcode()) { case ISD::ADD: { SDValue N0 = N.getOperand(0); @@ -1270,6 +1356,9 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, return false; if (ConstantSDNode *Const = dyn_cast(N1)) { SDValue Addr = N0.getOperand(0); + // For the purpose of alignment, sextvalue and zextvalue are the same. + if (!IsAligned(Const->getZExtValue())) + return false; if (GlobalAddressSDNode *GA = dyn_cast(Addr)) { if (GA->getOpcode() == ISD::TargetGlobalAddress) { uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue(); @@ -1281,6 +1370,8 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, } break; } + case HexagonISD::CP: + case HexagonISD::JT: case HexagonISD::CONST32: // The operand(0) of CONST32 is TargetGlobalAddress, which is what we // want in the instruction. @@ -1319,7 +1410,6 @@ bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) { if (N.getValueType() != MVT::i64) return false; - EVT SrcVT; unsigned Opc = N.getOpcode(); switch (Opc) { case ISD::SIGN_EXTEND: @@ -1435,7 +1525,8 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits, bool HexagonDAGToDAGISel::isOrEquivalentToAdd(const SDNode *N) const { assert(N->getOpcode() == ISD::OR); auto *C = dyn_cast(N->getOperand(1)); - assert(C); + if (!C) + return false; // Detect when "or" is used to add an offset to a stack object. if (auto *FN = dyn_cast(N->getOperand(0))) { @@ -1481,6 +1572,10 @@ bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const { return false; } +bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const { + return !CheckSingleUse || N->hasOneUse(); +} + //////////////////////////////////////////////////////////////////////////////// // Rebalancing of address calculation trees diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index fcde4224a0075..bd5050aae308d 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -356,10 +356,8 @@ static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, }; auto &MF = State.getMachineFunction(); auto &HST = MF.getSubtarget(); - bool UseHVX = HST.useHVXOps(); - bool UseHVXDbl = HST.useHVXDblOps(); - if ((UseHVX && !UseHVXDbl) && + if (HST.useHVX64BOps() && (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) { if (unsigned Reg = State.AllocateReg(VecLstS)) { @@ -370,9 +368,8 @@ static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } - if ((UseHVX && !UseHVXDbl) && - (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || - LocVT == MVT::v128i8)) { + if (HST.useHVX64BOps() && (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || + LocVT == MVT::v64i16 || LocVT == MVT::v128i8)) { if (unsigned Reg = State.AllocateReg(VecLstD)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; @@ -381,9 +378,9 @@ static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } - if ((UseHVX && UseHVXDbl) && - (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || - LocVT == MVT::v256i8)) { + // 128B Mode + if (HST.useHVX128BOps() && (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || + LocVT == MVT::v128i16 || LocVT == MVT::v256i8)) { if (unsigned Reg = State.AllocateReg(VecLstD)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; @@ -392,7 +389,7 @@ static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); return false; } - if ((UseHVX && UseHVXDbl) && + if (HST.useHVX128BOps() && (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) { if (unsigned Reg = State.AllocateReg(VecLstS)) { @@ -411,8 +408,6 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, ISD::ArgFlagsTy ArgFlags, CCState &State) { auto &MF = State.getMachineFunction(); auto &HST = MF.getSubtarget(); - bool UseHVX = HST.useHVXOps(); - bool UseHVXDbl = HST.useHVXDblOps(); if (LocVT == MVT::i1) { // Return values of type MVT::i1 still need to be assigned to R0, but @@ -442,7 +437,7 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::Full; } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 || LocVT == MVT::v32i32 || LocVT == MVT::v16i64 || - (LocVT == MVT::v1024i1 && UseHVX && UseHVXDbl)) { + (LocVT == MVT::v1024i1 && HST.useHVX128BOps())) { LocVT = MVT::v32i32; ValVT = MVT::v32i32; LocInfo = CCValAssign::Full; @@ -505,8 +500,6 @@ static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT, ISD::ArgFlagsTy ArgFlags, CCState &State) { auto &MF = State.getMachineFunction(); auto &HST = MF.getSubtarget(); - bool UseHVX = HST.useHVXOps(); - bool UseHVXDbl = HST.useHVXDblOps(); if (LocVT == MVT::v16i32) { if (unsigned Reg = State.AllocateReg(Hexagon::V0)) { @@ -514,7 +507,7 @@ static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT, return false; } } else if (LocVT == MVT::v32i32) { - unsigned Req = (UseHVX && UseHVXDbl) ? Hexagon::V0 : Hexagon::W0; + unsigned Req = HST.useHVX128BOps() ? Hexagon::V0 : Hexagon::W0; if (unsigned Reg = State.AllocateReg(Req)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; @@ -684,13 +677,14 @@ SDValue HexagonTargetLowering::LowerCallResult( // as an implicit def to the call (EmitMachineNode). RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1); Glue = TPR.getValue(1); + Chain = TPR.getValue(0); } else { RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(), RVLocs[i].getValVT(), Glue); Glue = RetVal.getValue(2); + Chain = RetVal.getValue(1); } InVals.push_back(RetVal.getValue(0)); - Chain = RetVal.getValue(1); } return Chain; @@ -834,9 +828,9 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DEBUG(dbgs() << "Function needs byte stack align due to call args\n"); // V6 vectors passed by value have 64 or 128 byte alignment depending // on whether we are 64 byte vector mode or 128 byte. - bool UseHVXDbl = Subtarget.useHVXDblOps(); + bool UseHVX128B = Subtarget.useHVX128BOps(); assert(Subtarget.useHVXOps()); - const unsigned ObjAlign = UseHVXDbl ? 128 : 64; + const unsigned ObjAlign = UseHVX128B ? 128 : 64; LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign); MFI.ensureMaxAlignment(LargestAlignSeen); } @@ -946,18 +940,16 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, return false; auto &HST = static_cast(DAG.getSubtarget()); - bool UseHVX = HST.useHVXOps(); - bool UseHVXDbl = HST.useHVXDblOps(); - bool ValidHVXDblType = - (UseHVX && UseHVXDbl) && (VT == MVT::v32i32 || VT == MVT::v16i64 || + bool ValidHVX128BType = + HST.useHVX128BOps() && (VT == MVT::v32i32 || VT == MVT::v16i64 || VT == MVT::v64i16 || VT == MVT::v128i8); bool ValidHVXType = - UseHVX && !UseHVXDbl && (VT == MVT::v16i32 || VT == MVT::v8i64 || + HST.useHVX64BOps() && (VT == MVT::v16i32 || VT == MVT::v8i64 || VT == MVT::v32i16 || VT == MVT::v64i8); - if (ValidHVXDblType || ValidHVXType || - VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) { + if (ValidHVX128BType || ValidHVXType || VT == MVT::i64 || VT == MVT::i32 || + VT == MVT::i16 || VT == MVT::i8) { IsInc = (Ptr->getOpcode() == ISD::ADD); Base = Ptr->getOperand(0); Offset = Ptr->getOperand(1); @@ -978,7 +970,6 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SelectionDAG &DAG) const { EVT VT; - SDValue Ptr; if (LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); @@ -1144,7 +1135,6 @@ SDValue HexagonTargetLowering::LowerFormalArguments( // callee return the result direclty through R0/R1. SmallVector MemOps; - bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1193,9 +1183,9 @@ SDValue HexagonTargetLowering::LowerFormalArguments( RegInfo.createVirtualRegister(&Hexagon::HvxVRRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); - } else if (UseHVX && UseHVXDbl && - ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 || - RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) { + } else if (Subtarget.useHVX128BOps() && + ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 || + RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) { unsigned VReg = RegInfo.createVirtualRegister(&Hexagon::HvxVRRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); @@ -1208,9 +1198,9 @@ SDValue HexagonTargetLowering::LowerFormalArguments( RegInfo.createVirtualRegister(&Hexagon::HvxWRRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); - } else if (UseHVX && UseHVXDbl && - ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 || - RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) { + } else if (Subtarget.useHVX128BOps() && + ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 || + RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) { unsigned VReg = RegInfo.createVirtualRegister(&Hexagon::HvxWRRegClass); RegInfo.addLiveIn(VA.getLocReg(), VReg); @@ -1710,9 +1700,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, Subtarget(ST) { bool IsV4 = !Subtarget.hasV5TOps(); auto &HRI = *Subtarget.getRegisterInfo(); - bool UseHVX = Subtarget.useHVXOps(); - bool UseHVXSgl = Subtarget.useHVXSglOps(); - bool UseHVXDbl = Subtarget.useHVXDblOps(); setPrefLoopAlignment(4); setPrefFunctionAlignment(4); @@ -1757,7 +1744,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, } if (Subtarget.hasV60TOps()) { - if (Subtarget.useHVXSglOps()) { + if (Subtarget.useHVX64BOps()) { addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass); @@ -1767,7 +1754,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v16i64, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass); - } else if (Subtarget.useHVXDblOps()) { + } else if (Subtarget.useHVX128BOps()) { addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass); @@ -1981,6 +1968,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); } + // Extending loads from (native) vectors of i8 into (native) vectors of i16 + // are legal. + setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal); + // Types natively supported: for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32, @@ -2005,8 +2001,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - if (UseHVX) { - if (UseHVXSgl) { + if (Subtarget.useHVXOps()) { + if (Subtarget.useHVX64BOps()) { setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); @@ -2018,7 +2014,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i8, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i16, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom); - } else if (UseHVXDbl) { + } else if (Subtarget.useHVX128BOps()) { setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); @@ -2096,13 +2092,13 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setIndexedStoreAction(ISD::POST_INC, VT, Legal); } - if (UseHVXSgl) { + if (Subtarget.useHVX64BOps()) { for (MVT VT : {MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64, MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) { setIndexedLoadAction(ISD::POST_INC, VT, Legal); setIndexedStoreAction(ISD::POST_INC, VT, Legal); } - } else if (UseHVXDbl) { + } else if (Subtarget.useHVX128BOps()) { for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64, MVT::v256i8, MVT::v128i16, MVT::v64i32, MVT::v32i64}) { setIndexedLoadAction(ISD::POST_INC, VT, Legal); @@ -2367,8 +2363,8 @@ HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) size_t MaskLen = Mask.size(); unsigned SizeInBits = VT.getScalarSizeInBits() * MaskLen; - if ((Subtarget.useHVXSglOps() && SizeInBits == 64 * 8) || - (Subtarget.useHVXDblOps() && SizeInBits == 128 * 8)) { + if ((Subtarget.useHVX64BOps() && SizeInBits == 64 * 8) || + (Subtarget.useHVX128BOps() && SizeInBits == 128 * 8)) { StridedLoadKind Pattern = isStridedLoad(Mask); if (Pattern == StridedLoadKind::NoPattern) return SDValue(); @@ -2631,11 +2627,11 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0); if (UseHVX) { - assert((Width == 64*8 && Subtarget.useHVXSglOps()) || - (Width == 128*8 && Subtarget.useHVXDblOps())); + assert((Width == 64 * 8 && Subtarget.useHVX64BOps()) || + (Width == 128 * 8 && Subtarget.useHVX128BOps())); SDValue Vec1 = Op.getOperand(1); - MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32; - MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32; + MVT OpTy = Subtarget.useHVX64BOps() ? MVT::v16i32 : MVT::v32i32; + MVT ReTy = Subtarget.useHVX64BOps() ? MVT::v32i32 : MVT::v64i32; SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0); SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1); SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0); @@ -2681,7 +2677,7 @@ HexagonTargetLowering::LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, EVT VT = Op.getOperand(0).getValueType(); SDLoc dl(Op); bool UseHVX = Subtarget.useHVXOps(); - bool UseHVXSgl = Subtarget.useHVXSglOps(); + bool UseHVX64B = Subtarget.useHVX64BOps(); // Just in case... if (!VT.isVector() || !UseHVX) @@ -2689,7 +2685,7 @@ HexagonTargetLowering::LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, EVT ResVT = Op.getValueType(); unsigned ResSize = ResVT.getSizeInBits(); - unsigned VectorSizeInBits = UseHVXSgl ? (64 * 8) : (128 * 8); + unsigned VectorSizeInBits = UseHVX64B ? (64 * 8) : (128 * 8); unsigned OpSize = VT.getSizeInBits(); // We deal only with cases where the result is the vector size @@ -2750,7 +2746,13 @@ HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op, MVT SVT = VecVT.getSimpleVT(); uint64_t W = CW->getZExtValue(); - if (W == 32) { + if (W == 1) { + MVT LocVT = MVT::getIntegerVT(SVT.getSizeInBits()); + SDValue VecCast = DAG.getNode(ISD::BITCAST, dl, LocVT, Vec); + SDValue Shifted = DAG.getNode(ISD::SRA, dl, LocVT, VecCast, Offset); + return DAG.getNode(ISD::AND, dl, LocVT, Shifted, + DAG.getConstant(1, dl, LocVT)); + } else if (W == 32) { // Translate this node into EXTRACT_SUBREG. unsigned Subreg = (X == 0) ? Hexagon::isub_lo : 0; @@ -2972,53 +2974,53 @@ HexagonTargetLowering::getConstraintType(StringRef Constraint) const { std::pair HexagonTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { - bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': // R0-R31 switch (VT.SimpleTy) { default: - llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type"); + return {0u, nullptr}; case MVT::i1: case MVT::i8: case MVT::i16: case MVT::i32: case MVT::f32: - return std::make_pair(0U, &Hexagon::IntRegsRegClass); + return {0u, &Hexagon::IntRegsRegClass}; case MVT::i64: case MVT::f64: - return std::make_pair(0U, &Hexagon::DoubleRegsRegClass); + return {0u, &Hexagon::DoubleRegsRegClass}; } break; case 'a': // M0-M1 - return std::make_pair(0U, &Hexagon::ModRegsRegClass); + if (VT != MVT::i32) + return {0u, nullptr}; + return {0u, &Hexagon::ModRegsRegClass}; case 'q': // q0-q3 switch (VT.getSizeInBits()) { default: - llvm_unreachable("getRegForInlineAsmConstraint Unhandled vector size"); + return {0u, nullptr}; case 512: - return std::make_pair(0U, &Hexagon::HvxQRRegClass); case 1024: - return std::make_pair(0U, &Hexagon::HvxQRRegClass); + return {0u, &Hexagon::HvxQRRegClass}; } break; case 'v': // V0-V31 switch (VT.getSizeInBits()) { default: - llvm_unreachable("getRegForInlineAsmConstraint Unhandled vector size"); + return {0u, nullptr}; case 512: - return std::make_pair(0U, &Hexagon::HvxVRRegClass); + return {0u, &Hexagon::HvxVRRegClass}; case 1024: - if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl) - return std::make_pair(0U, &Hexagon::HvxVRRegClass); - return std::make_pair(0U, &Hexagon::HvxWRRegClass); + if (Subtarget.hasV60TOps() && Subtarget.useHVX128BOps()) + return {0u, &Hexagon::HvxVRRegClass}; + return {0u, &Hexagon::HvxWRRegClass}; case 2048: - return std::make_pair(0U, &Hexagon::HvxWRRegClass); + return {0u, &Hexagon::HvxWRRegClass}; } break; default: - llvm_unreachable("Unknown asm register class"); + return {0u, nullptr}; } } @@ -3213,7 +3215,7 @@ HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, case MVT::v32i32: case MVT::v16i64: if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() && - Subtarget.useHVXDblOps()) + Subtarget.useHVX128BOps()) RRC = &Hexagon::HvxVRRegClass; else RRC = &Hexagon::HvxWRRegClass; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 2f6da901d8975..a5381c1fb1a83 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1,4 +1,4 @@ -//===-- HexagonInstrInfo.cpp - Hexagon Instruction Information ------------===// +//===- HexagonInstrInfo.cpp - Hexagon Instruction Information -------------===// // // The LLVM Compiler Infrastructure // @@ -13,9 +13,11 @@ #include "HexagonInstrInfo.h" #include "Hexagon.h" +#include "HexagonFrameLowering.h" #include "HexagonHazardRecognizer.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -32,7 +34,9 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" @@ -44,12 +48,17 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include #include #include #include #include +#include +#include using namespace llvm; @@ -91,9 +100,7 @@ static cl::opt UseDFAHazardRec("dfa-hazard-rec", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Use the DFA based hazard recognizer.")); -/// /// Constants for Hexagon instructions. -/// const int Hexagon_MEMW_OFFSET_MAX = 4095; const int Hexagon_MEMW_OFFSET_MIN = -4096; const int Hexagon_MEMD_OFFSET_MAX = 8191; @@ -109,7 +116,8 @@ const int Hexagon_ADDI_OFFSET_MIN = -32768; void HexagonInstrInfo::anchor() {} HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) - : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP) {} + : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), + Subtarget(ST) {} static bool isIntRegForSubInst(unsigned Reg) { return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || @@ -339,7 +347,6 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI, /// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode /// Cond[1] = R /// Cond[2] = Imm -/// bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -576,7 +583,7 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB, SmallPtrSet VisitedBBs; MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, Cond[1].getMBB(), VisitedBBs); - assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + assert(Loop != nullptr && "Inserting an ENDLOOP without a LOOP"); Loop->getOperand(0).setMBB(TBB); // Add the ENDLOOP after the finding the LOOP0. BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); @@ -617,7 +624,7 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB, SmallPtrSet VisitedBBs; MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, Cond[1].getMBB(), VisitedBBs); - assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP"); + assert(Loop != nullptr && "Inserting an ENDLOOP without a LOOP"); Loop->getOperand(0).setMBB(TBB); // Add the ENDLOOP after the finding the LOOP0. BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB); @@ -692,7 +699,7 @@ unsigned HexagonInstrInfo::reduceLoopCount(MachineBasicBlock &MBB, unsigned NewLoopCount = createVR(MF, MVT::i32); MachineInstr *NewAdd = BuildMI(&MBB, DL, get(Hexagon::A2_addi), NewLoopCount). addReg(LoopCount).addImm(-1); - const auto &HRI = *MF->getSubtarget().getRegisterInfo(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); // Update the previously generated instructions with the new loop counter. for (SmallVectorImpl::iterator I = PrevInsts.begin(), E = PrevInsts.end(); I != E; ++I) @@ -735,8 +742,7 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - MachineFunction &MF = *MBB.getParent(); - auto &HRI = *MF.getSubtarget().getRegisterInfo(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); unsigned KillFlag = getKillRegState(KillSrc); if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) { @@ -840,8 +846,7 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, unsigned RegAlign = TRI->getSpillAlignment(*RC); unsigned KillFlag = getKillRegState(isKill); bool HasAlloca = MFI.hasVarSizedObjects(); - const auto &HST = MF.getSubtarget(); - const HexagonFrameLowering &HFI = *HST.getFrameLowering(); + const HexagonFrameLowering &HFI = *Subtarget.getFrameLowering(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, @@ -906,8 +911,7 @@ void HexagonInstrInfo::loadRegFromStackSlot( unsigned SlotAlign = MFI.getObjectAlignment(FI); unsigned RegAlign = TRI->getSpillAlignment(*RC); bool HasAlloca = MFI.hasVarSizedObjects(); - const auto &HST = MF.getSubtarget(); - const HexagonFrameLowering &HFI = *HST.getFrameLowering(); + const HexagonFrameLowering &HFI = *Subtarget.getFrameLowering(); MachineMemOperand *MMO = MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, @@ -973,7 +977,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const auto &HRI = *MF.getSubtarget().getRegisterInfo(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); DebugLoc DL = MI.getDebugLoc(); unsigned Opc = MI.getOpcode(); @@ -1368,8 +1372,7 @@ bool HexagonInstrInfo::SubsumesPredicate(ArrayRef Pred1, bool HexagonInstrInfo::DefinesPredicate(MachineInstr &MI, std::vector &Pred) const { - MachineFunction &MF = *MI.getParent()->getParent(); - const auto &HRI = *MF.getSubtarget().getRegisterInfo(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); for (unsigned oper = 0; oper < MI.getNumOperands(); ++oper) { MachineOperand MO = MI.getOperand(oper); @@ -1399,10 +1402,34 @@ bool HexagonInstrInfo::isPredicable(const MachineInstr &MI) const { return false; if (MI.isCall() || isTailCall(MI)) { - const MachineFunction &MF = *MI.getParent()->getParent(); - if (!MF.getSubtarget().usePredicatedCalls()) + if (!Subtarget.usePredicatedCalls()) return false; } + + // HVX loads are not predicable on v60, but are on v62. + if (!Subtarget.hasV62TOps()) { + switch (MI.getOpcode()) { + case Hexagon::V6_vL32b_ai: + case Hexagon::V6_vL32b_pi: + case Hexagon::V6_vL32b_ppu: + case Hexagon::V6_vL32b_cur_ai: + case Hexagon::V6_vL32b_cur_pi: + case Hexagon::V6_vL32b_cur_ppu: + case Hexagon::V6_vL32b_nt_ai: + case Hexagon::V6_vL32b_nt_pi: + case Hexagon::V6_vL32b_nt_ppu: + case Hexagon::V6_vL32b_tmp_ai: + case Hexagon::V6_vL32b_tmp_pi: + case Hexagon::V6_vL32b_tmp_ppu: + case Hexagon::V6_vL32b_nt_cur_ai: + case Hexagon::V6_vL32b_nt_cur_pi: + case Hexagon::V6_vL32b_nt_cur_ppu: + case Hexagon::V6_vL32b_nt_tmp_ai: + case Hexagon::V6_vL32b_nt_tmp_pi: + case Hexagon::V6_vL32b_nt_tmp_ppu: + return false; + } + } return true; } @@ -1478,10 +1505,8 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, ScheduleHazardRecognizer* HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( const InstrItineraryData *II, const ScheduleDAG *DAG) const { - if (UseDFAHazardRec) { - auto &HST = DAG->MF.getSubtarget(); - return new HexagonHazardRecognizer(II, this, HST); - } + if (UseDFAHazardRec) + return new HexagonHazardRecognizer(II, this, Subtarget); return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } @@ -1565,10 +1590,14 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, case Hexagon::A4_cmpbgtui: case Hexagon::A4_cmpheqi: case Hexagon::A4_cmphgti: - case Hexagon::A4_cmphgtui: + case Hexagon::A4_cmphgtui: { SrcReg2 = 0; + const MachineOperand &Op2 = MI.getOperand(2); + if (!Op2.isImm()) + return false; Value = MI.getOperand(2).getImm(); return true; + } } return false; @@ -1580,7 +1609,6 @@ unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return getInstrTimingClassLatency(ItinData, MI); } - DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( const TargetSubtargetInfo &STI) const { const InstrItineraryData *II = STI.getInstrItineraryData(); @@ -1652,12 +1680,20 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, int &Value) const { if (isPostIncrement(MI)) { - unsigned AccessSize; - return getBaseAndOffset(MI, Value, AccessSize); - } - if (MI.getOpcode() == Hexagon::A2_addi) { - Value = MI.getOperand(2).getImm(); - return true; + unsigned BasePos = 0, OffsetPos = 0; + if (!getBaseAndOffsetPosition(MI, BasePos, OffsetPos)) + return false; + const MachineOperand &OffsetOp = MI.getOperand(OffsetPos); + if (OffsetOp.isImm()) { + Value = OffsetOp.getImm(); + return true; + } + } else if (MI.getOpcode() == Hexagon::A2_addi) { + const MachineOperand &AddOp = MI.getOperand(2); + if (AddOp.isImm()) { + Value = AddOp.getImm(); + return true; + } } return false; @@ -1672,6 +1708,7 @@ HexagonInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { ArrayRef> HexagonInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { using namespace HexagonII; + static const std::pair Flags[] = { {MO_PCREL, "hexagon-pcrel"}, {MO_GOT, "hexagon-got"}, @@ -1690,6 +1727,7 @@ HexagonInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { ArrayRef> HexagonInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { using namespace HexagonII; + static const std::pair Flags[] = { {HMOTF_ConstExtended, "hexagon-ext"} }; @@ -1723,23 +1761,11 @@ bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const { } bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const { - const MachineFunction *MF = MI.getParent()->getParent(); - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - - if (!(isTC1(MI)) - && !(QII->isTC2Early(MI)) - && !(MI.getDesc().mayLoad()) - && !(MI.getDesc().mayStore()) - && (MI.getDesc().getOpcode() != Hexagon::S2_allocframe) - && (MI.getDesc().getOpcode() != Hexagon::L2_deallocframe) - && !(QII->isMemOp(MI)) - && !(MI.isBranch()) - && !(MI.isReturn()) - && !MI.isCall()) - return true; - - return false; + return !isTC1(MI) && !isTC2Early(MI) && !MI.getDesc().mayLoad() && + !MI.getDesc().mayStore() && + MI.getDesc().getOpcode() != Hexagon::S2_allocframe && + MI.getDesc().getOpcode() != Hexagon::L2_deallocframe && + !isMemOp(MI) && !MI.isBranch() && !MI.isReturn() && !MI.isCall(); } // Return true if the instruction is a compund branch instruction. @@ -1794,13 +1820,13 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { switch (MI.getOpcode()) { - case Hexagon::L4_return : - case Hexagon::L4_return_t : - case Hexagon::L4_return_f : - case Hexagon::L4_return_tnew_pnt : - case Hexagon::L4_return_fnew_pnt : - case Hexagon::L4_return_tnew_pt : - case Hexagon::L4_return_fnew_pt : + case Hexagon::L4_return: + case Hexagon::L4_return_t: + case Hexagon::L4_return_f: + case Hexagon::L4_return_tnew_pnt: + case Hexagon::L4_return_fnew_pnt: + case Hexagon::L4_return_tnew_pt: + case Hexagon::L4_return_fnew_pt: return true; } return false; @@ -1811,8 +1837,7 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { if (!ProdMI.getDesc().getNumDefs()) return false; - const MachineFunction &MF = *ProdMI.getParent()->getParent(); - const auto &HRI = *MF.getSubtarget().getRegisterInfo(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); SmallVector DefsA; SmallVector DefsB; @@ -1950,10 +1975,10 @@ bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I, bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const { switch (MI.getOpcode()) { - case Hexagon::J2_callr : - case Hexagon::J2_callrf : - case Hexagon::J2_callrt : - case Hexagon::PS_call_nr : + case Hexagon::J2_callr: + case Hexagon::J2_callrf: + case Hexagon::J2_callrt: + case Hexagon::PS_call_nr: return true; } return false; @@ -1961,13 +1986,13 @@ bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const { bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const { switch (MI.getOpcode()) { - case Hexagon::L4_return : - case Hexagon::L4_return_t : - case Hexagon::L4_return_f : - case Hexagon::L4_return_fnew_pnt : - case Hexagon::L4_return_fnew_pt : - case Hexagon::L4_return_tnew_pnt : - case Hexagon::L4_return_tnew_pt : + case Hexagon::L4_return: + case Hexagon::L4_return_t: + case Hexagon::L4_return_f: + case Hexagon::L4_return_fnew_pnt: + case Hexagon::L4_return_fnew_pt: + case Hexagon::L4_return_tnew_pnt: + case Hexagon::L4_return_tnew_pt: return true; } return false; @@ -1975,13 +2000,13 @@ bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const { bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { switch (MI.getOpcode()) { - case Hexagon::J2_jumpr : - case Hexagon::J2_jumprt : - case Hexagon::J2_jumprf : - case Hexagon::J2_jumprtnewpt : - case Hexagon::J2_jumprfnewpt : - case Hexagon::J2_jumprtnew : - case Hexagon::J2_jumprfnew : + case Hexagon::J2_jumpr: + case Hexagon::J2_jumprt: + case Hexagon::J2_jumprf: + case Hexagon::J2_jumprtnewpt: + case Hexagon::J2_jumprfnewpt: + case Hexagon::J2_jumprtnew: + case Hexagon::J2_jumprfnew: return true; } return false; @@ -2089,24 +2114,24 @@ bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const { bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return false; - case Hexagon::L4_iadd_memopw_io : - case Hexagon::L4_isub_memopw_io : - case Hexagon::L4_add_memopw_io : - case Hexagon::L4_sub_memopw_io : - case Hexagon::L4_and_memopw_io : - case Hexagon::L4_or_memopw_io : - case Hexagon::L4_iadd_memoph_io : - case Hexagon::L4_isub_memoph_io : - case Hexagon::L4_add_memoph_io : - case Hexagon::L4_sub_memoph_io : - case Hexagon::L4_and_memoph_io : - case Hexagon::L4_or_memoph_io : - case Hexagon::L4_iadd_memopb_io : - case Hexagon::L4_isub_memopb_io : - case Hexagon::L4_add_memopb_io : - case Hexagon::L4_sub_memopb_io : - case Hexagon::L4_and_memopb_io : - case Hexagon::L4_or_memopb_io : + case Hexagon::L4_iadd_memopw_io: + case Hexagon::L4_isub_memopw_io: + case Hexagon::L4_add_memopw_io: + case Hexagon::L4_sub_memopw_io: + case Hexagon::L4_and_memopw_io: + case Hexagon::L4_or_memopw_io: + case Hexagon::L4_iadd_memoph_io: + case Hexagon::L4_isub_memoph_io: + case Hexagon::L4_add_memoph_io: + case Hexagon::L4_sub_memoph_io: + case Hexagon::L4_and_memoph_io: + case Hexagon::L4_or_memoph_io: + case Hexagon::L4_iadd_memopb_io: + case Hexagon::L4_isub_memopb_io: + case Hexagon::L4_add_memopb_io: + case Hexagon::L4_sub_memopb_io: + case Hexagon::L4_and_memopb_io: + case Hexagon::L4_or_memopb_io: case Hexagon::L4_ior_memopb_io: case Hexagon::L4_ior_memoph_io: case Hexagon::L4_ior_memopw_io: @@ -2293,8 +2318,8 @@ bool HexagonInstrInfo::isSolo(const MachineInstr &MI) const { bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { - case Hexagon::STriw_pred : - case Hexagon::LDriw_pred : + case Hexagon::STriw_pred: + case Hexagon::LDriw_pred: return true; default: return false; @@ -2357,7 +2382,6 @@ bool HexagonInstrInfo::isHVXVec(const MachineInstr &MI) const { } // Check if the Offset is a valid auto-inc imm by Load/Store Type. -// bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, int Offset) const { int Size = VT.getSizeInBits() / 8; if (Offset % Size != 0) @@ -2469,28 +2493,28 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, return (Offset >= Hexagon_ADDI_OFFSET_MIN) && (Offset <= Hexagon_ADDI_OFFSET_MAX); - case Hexagon::L4_iadd_memopw_io : - case Hexagon::L4_isub_memopw_io : - case Hexagon::L4_add_memopw_io : - case Hexagon::L4_sub_memopw_io : - case Hexagon::L4_and_memopw_io : - case Hexagon::L4_or_memopw_io : + case Hexagon::L4_iadd_memopw_io: + case Hexagon::L4_isub_memopw_io: + case Hexagon::L4_add_memopw_io: + case Hexagon::L4_sub_memopw_io: + case Hexagon::L4_and_memopw_io: + case Hexagon::L4_or_memopw_io: return (0 <= Offset && Offset <= 255); - case Hexagon::L4_iadd_memoph_io : - case Hexagon::L4_isub_memoph_io : - case Hexagon::L4_add_memoph_io : - case Hexagon::L4_sub_memoph_io : - case Hexagon::L4_and_memoph_io : - case Hexagon::L4_or_memoph_io : + case Hexagon::L4_iadd_memoph_io: + case Hexagon::L4_isub_memoph_io: + case Hexagon::L4_add_memoph_io: + case Hexagon::L4_sub_memoph_io: + case Hexagon::L4_and_memoph_io: + case Hexagon::L4_or_memoph_io: return (0 <= Offset && Offset <= 127); - case Hexagon::L4_iadd_memopb_io : - case Hexagon::L4_isub_memopb_io : - case Hexagon::L4_add_memopb_io : - case Hexagon::L4_sub_memopb_io : - case Hexagon::L4_and_memopb_io : - case Hexagon::L4_or_memopb_io : + case Hexagon::L4_iadd_memopb_io: + case Hexagon::L4_isub_memopb_io: + case Hexagon::L4_add_memopb_io: + case Hexagon::L4_sub_memopb_io: + case Hexagon::L4_and_memopb_io: + case Hexagon::L4_or_memopb_io: return (0 <= Offset && Offset <= 63); // LDriw_xxx and STriw_xxx are pseudo operations, so it has to take offset of @@ -2714,19 +2738,19 @@ bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const { // Check addressing mode and retrieve non-ext equivalent instruction. switch (getAddrMode(MI)) { - case HexagonII::Absolute : + case HexagonII::Absolute: // Load/store with absolute addressing mode can be converted into // base+offset mode. - NonExtOpcode = Hexagon::getBaseWithImmOffset(MI.getOpcode()); + NonExtOpcode = Hexagon::changeAddrMode_abs_io(MI.getOpcode()); break; - case HexagonII::BaseImmOffset : + case HexagonII::BaseImmOffset: // Load/store with base+offset addressing mode can be converted into // base+register offset addressing mode. However left shift operand should // be set to 0. - NonExtOpcode = Hexagon::getBaseWithRegOffset(MI.getOpcode()); + NonExtOpcode = Hexagon::changeAddrMode_io_rr(MI.getOpcode()); break; case HexagonII::BaseLongOffset: - NonExtOpcode = Hexagon::getRegShlForm(MI.getOpcode()); + NonExtOpcode = Hexagon::changeAddrMode_ur_rr(MI.getOpcode()); break; default: return false; @@ -2756,10 +2780,9 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) // Returns true, if a LD insn can be promoted to a cur load. bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const { - auto &HST = MI.getParent()->getParent()->getSubtarget(); const uint64_t F = MI.getDesc().TSFlags; return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) && - HST.hasV60TOps(); + Subtarget.hasV60TOps(); } // Returns true, if a ST insn can be promoted to a new-value store. @@ -2841,10 +2864,6 @@ bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef Cond) const { return !isPredicatedTrue(Cond[0].getImm()); } -short HexagonInstrInfo::getAbsoluteForm(const MachineInstr &MI) const { - return Hexagon::getAbsoluteForm(MI.getOpcode()); -} - unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; @@ -2977,20 +2996,6 @@ SmallVector HexagonInstrInfo::getBranchingInstrs( return Jumpers; } -short HexagonInstrInfo::getBaseWithLongOffset(short Opcode) const { - if (Opcode < 0) - return -1; - return Hexagon::getBaseWithLongOffset(Opcode); -} - -short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr &MI) const { - return Hexagon::getBaseWithLongOffset(MI.getOpcode()); -} - -short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr &MI) const { - return Hexagon::getBaseWithRegOffset(MI.getOpcode()); -} - // Returns Operand Index for the constant extended instruction. unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; @@ -3081,7 +3086,6 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( case Hexagon::RESTORE_DEALLOC_RET_JMP_V4: case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC: return HexagonII::HCG_C; - break; } return HexagonII::HCG_None; @@ -3148,7 +3152,6 @@ int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const { return 0; } - // The diagram below shows the steps involved in the conversion of a predicated // store instruction to its .new predicated new-value form. // @@ -3238,8 +3241,8 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: - llvm::report_fatal_error(std::string("Unknown .new type: ") + - std::to_string(MI.getOpcode()).c_str()); + report_fatal_error(std::string("Unknown .new type: ") + + std::to_string(MI.getOpcode())); case Hexagon::S4_storerb_ur: return Hexagon::S4_storerbnew_ur; @@ -3374,15 +3377,13 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, } int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { - const MachineFunction &MF = *MI.getParent()->getParent(); - const HexagonSubtarget &HST = MF.getSubtarget(); int NewOp = MI.getOpcode(); if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form NewOp = Hexagon::getPredOldOpcode(NewOp); // All Hexagon architectures have prediction bits on dot-new branches, // but only Hexagon V60+ has prediction bits on dot-old ones. Make sure // to pick the right opcode when converting back to dot-old. - if (!HST.getFeatureBits()[Hexagon::ArchV60]) { + if (!Subtarget.getFeatureBits()[Hexagon::ArchV60]) { switch (NewOp) { case Hexagon::J2_jumptpt: NewOp = Hexagon::J2_jumpt; @@ -3407,7 +3408,7 @@ int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); } - if (HST.hasV60TOps()) + if (Subtarget.hasV60TOps()) return NewOp; // Subtargets prior to V60 didn't support 'taken' forms of predicated jumps. @@ -3429,8 +3430,7 @@ int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( const MachineInstr &MI) const { unsigned DstReg, SrcReg, Src1Reg, Src2Reg; - const MachineFunction &MF = *MI.getParent()->getParent(); - const auto &HRI = *MF.getSubtarget().getRegisterInfo(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); switch (MI.getOpcode()) { default: @@ -3535,12 +3535,12 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))) return HexagonII::HSIG_L2; break; - case Hexagon::L4_return_t : - case Hexagon::L4_return_f : - case Hexagon::L4_return_tnew_pnt : - case Hexagon::L4_return_fnew_pnt : - case Hexagon::L4_return_tnew_pt : - case Hexagon::L4_return_fnew_pt : + case Hexagon::L4_return_t: + case Hexagon::L4_return_f: + case Hexagon::L4_return_tnew_pnt: + case Hexagon::L4_return_fnew_pnt: + case Hexagon::L4_return_tnew_pt: + case Hexagon::L4_return_fnew_pt: // [if ([!]p0[.new])] dealloc_return SrcReg = MI.getOperand(0).getReg(); if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg)) @@ -3798,8 +3798,7 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const { - const MachineFunction &MF = *DefMI.getParent()->getParent(); - const auto &HRI = *MF.getSubtarget().getRegisterInfo(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); // Get DefIdx and UseIdx for super registers. MachineOperand DefMO = DefMI.getOperand(DefIdx); @@ -3869,16 +3868,15 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const { unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const { using namespace HexagonII; + const uint64_t F = MI.getDesc().TSFlags; unsigned S = (F >> MemAccessSizePos) & MemAccesSizeMask; unsigned Size = getMemAccessSizeInBytes(MemAccessSize(S)); if (Size != 0) return Size; - const MachineFunction &MF = *MI.getParent()->getParent(); - const auto &HRI = *MF.getSubtarget().getRegisterInfo(); - // Handle vector access sizes. + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); switch (S) { case HexagonII::HVXVectorAccess: return HRI.getSpillSize(Hexagon::HvxVRRegClass); @@ -3912,12 +3910,12 @@ short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const { if (MI.getDesc().mayLoad() || MI.getDesc().mayStore()) { // Check addressing mode and retrieve non-ext equivalent instruction. switch (getAddrMode(MI)) { - case HexagonII::Absolute : - return Hexagon::getBaseWithImmOffset(MI.getOpcode()); - case HexagonII::BaseImmOffset : - return Hexagon::getBaseWithRegOffset(MI.getOpcode()); + case HexagonII::Absolute: + return Hexagon::changeAddrMode_abs_io(MI.getOpcode()); + case HexagonII::BaseImmOffset: + return Hexagon::changeAddrMode_io_rr(MI.getOpcode()); case HexagonII::BaseLongOffset: - return Hexagon::getRegShlForm(MI.getOpcode()); + return Hexagon::changeAddrMode_ur_rr(MI.getOpcode()); default: return -1; @@ -3998,8 +3996,7 @@ uint64_t HexagonInstrInfo::getType(const MachineInstr &MI) const { } unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { - const TargetSubtargetInfo &ST = MI.getParent()->getParent()->getSubtarget(); - const InstrItineraryData &II = *ST.getInstrItineraryData(); + const InstrItineraryData &II = *Subtarget.getInstrItineraryData(); const InstrStage &IS = *II.beginStage(MI.getDesc().getSchedClass()); return IS.getUnits(); @@ -4097,6 +4094,27 @@ bool HexagonInstrInfo::validateBranchCond(const ArrayRef &Cond) return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1)); } -short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr &MI) const { - return Hexagon::xformRegToImmOffset(MI.getOpcode()); +// Addressing mode relations. +short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const { + return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc; +} + +short HexagonInstrInfo::changeAddrMode_io_abs(short Opc) const { + return Opc >= 0 ? Hexagon::changeAddrMode_io_abs(Opc) : Opc; +} + +short HexagonInstrInfo::changeAddrMode_io_rr(short Opc) const { + return Opc >= 0 ? Hexagon::changeAddrMode_io_rr(Opc) : Opc; +} + +short HexagonInstrInfo::changeAddrMode_rr_io(short Opc) const { + return Opc >= 0 ? Hexagon::changeAddrMode_rr_io(Opc) : Opc; +} + +short HexagonInstrInfo::changeAddrMode_rr_ur(short Opc) const { + return Opc >= 0 ? Hexagon::changeAddrMode_rr_ur(Opc) : Opc; +} + +short HexagonInstrInfo::changeAddrMode_ur_rr(short Opc) const { + return Opc >= 0 ? Hexagon::changeAddrMode_ur_rr(Opc) : Opc; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 5f81fc59f4f10..2f172340c4e51 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -18,8 +18,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/Target/TargetInstrInfo.h" #include #include @@ -29,18 +29,21 @@ namespace llvm { -struct EVT; class HexagonSubtarget; -class HexagonRegisterInfo; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineInstr; +class MachineOperand; +class TargetRegisterInfo; class HexagonInstrInfo : public HexagonGenInstrInfo { + const HexagonSubtarget &Subtarget; virtual void anchor(); public: explicit HexagonInstrInfo(HexagonSubtarget &ST); /// TargetInstrInfo overrides. - /// /// If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of @@ -82,7 +85,6 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// /// If AllowModify is true, then this routine is allowed to modify the basic /// block (e.g. delete instructions after the unconditional branch). - /// bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, @@ -249,7 +251,7 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { /// Allocate and return a hazard recognizer to use for this target when /// scheduling the machine instructions after register allocation. ScheduleHazardRecognizer* - CreateTargetPostRAHazardRecognizer(const InstrItineraryData*, + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; /// For a comparison instruction, return the source registers @@ -323,7 +325,6 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { bool isTailCall(const MachineInstr &MI) const override; /// HexagonInstrInfo specifics. - /// unsigned createVR(MachineFunction* MF, MVT VT) const; @@ -410,13 +411,9 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { bool PredOpcodeHasJMP_c(unsigned Opcode) const; bool predOpcodeHasNot(ArrayRef Cond) const; - short getAbsoluteForm(const MachineInstr &MI) const; unsigned getAddrMode(const MachineInstr &MI) const; unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset, unsigned &AccessSize) const; - short getBaseWithLongOffset(short Opcode) const; - short getBaseWithLongOffset(const MachineInstr &MI) const; - short getBaseWithRegOffset(const MachineInstr &MI) const; SmallVector getBranchingInstrs(MachineBasicBlock& MBB) const; unsigned getCExtOpNum(const MachineInstr &MI) const; HexagonII::CompoundGroup @@ -464,7 +461,33 @@ class HexagonInstrInfo : public HexagonGenInstrInfo { bool reversePredSense(MachineInstr &MI) const; unsigned reversePrediction(unsigned Opcode) const; bool validateBranchCond(const ArrayRef &Cond) const; - short xformRegToImmOffset(const MachineInstr &MI) const; + + // Addressing mode relations. + short changeAddrMode_abs_io(short Opc) const; + short changeAddrMode_io_abs(short Opc) const; + short changeAddrMode_io_rr(short Opc) const; + short changeAddrMode_rr_io(short Opc) const; + short changeAddrMode_rr_ur(short Opc) const; + short changeAddrMode_ur_rr(short Opc) const; + + short changeAddrMode_abs_io(const MachineInstr &MI) const { + return changeAddrMode_abs_io(MI.getOpcode()); + } + short changeAddrMode_io_abs(const MachineInstr &MI) const { + return changeAddrMode_io_abs(MI.getOpcode()); + } + short changeAddrMode_io_rr(const MachineInstr &MI) const { + return changeAddrMode_io_rr(MI.getOpcode()); + } + short changeAddrMode_rr_io(const MachineInstr &MI) const { + return changeAddrMode_rr_io(MI.getOpcode()); + } + short changeAddrMode_rr_ur(const MachineInstr &MI) const { + return changeAddrMode_rr_ur(MI.getOpcode()); + } + short changeAddrMode_ur_rr(const MachineInstr &MI) const { + return changeAddrMode_ur_rr(MI.getOpcode()); + } }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td deleted file mode 100644 index 400c17333f73c..0000000000000 --- a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td +++ /dev/null @@ -1,40 +0,0 @@ -//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Multiply 64-bit and use lower result -// -// Optimized with intrinisics accumulates -// -def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), - (i64 - (A2_combinew - (M2_maci - (M2_maci - (i32 - (EXTRACT_SUBREG - (i64 - (M2_dpmpyuu_s0 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - isub_lo)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - isub_lo)))), - isub_hi)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_lo)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), isub_hi))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), isub_lo)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_hi))), - (i32 - (EXTRACT_SUBREG - (i64 - (M2_dpmpyuu_s0 - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_lo)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - isub_lo)))), isub_lo))))>; - - - diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 6a252df7fc9a1..93f1fd4109a93 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -13,13 +13,37 @@ //===----------------------------------------------------------------------===// #include "HexagonMachineScheduler.h" +#include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Function.h" - +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include #include +#include +#include #include +using namespace llvm; + +#define DEBUG_TYPE "machine-scheduler" + static cl::opt IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, cl::ZeroOrMore, cl::init(false)); @@ -40,10 +64,6 @@ static cl::opt DisableTCTie("disable-tc-tie", static cl::opt CheckEarlyAvail("check-early-avail", cl::Hidden, cl::ZeroOrMore, cl::init(true)); -using namespace llvm; - -#define DEBUG_TYPE "machine-scheduler" - /// Save the last formed packet void VLIWResourceModel::savePacket() { OldPacket = Packet; @@ -246,7 +266,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); - assert((!llvm::ForceTopDown || !llvm::ForceBottomUp) && + assert((!ForceTopDown || !ForceBottomUp) && "-misched-topdown incompatible with -misched-bottomup"); } @@ -328,7 +348,8 @@ void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { unsigned Width = SchedModel->getIssueWidth(); IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; - assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + assert(MinReadyCycle < std::numeric_limits::max() && + "MinReadyCycle uninitialized"); unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle); if (!HazardRec->isEnabled()) { @@ -383,7 +404,7 @@ void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { void ConvergingVLIWScheduler::VLIWSchedBoundary::releasePending() { // If the available queue is empty, it is safe to reset MinReadyCycle. if (Available.empty()) - MinReadyCycle = UINT_MAX; + MinReadyCycle = std::numeric_limits::max(); // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. @@ -883,7 +904,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { return nullptr; } SUnit *SU; - if (llvm::ForceTopDown) { + if (ForceTopDown) { SU = Top.pickOnlyChoice(); if (!SU) { SchedCandidate TopCand; @@ -894,7 +915,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { SU = TopCand.SU; } IsTopNode = true; - } else if (llvm::ForceBottomUp) { + } else if (ForceBottomUp) { SU = Bot.pickOnlyChoice(); if (!SU) { SchedCandidate BotCand; diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index 935bcc9f82928..2525d27266680 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -1,4 +1,4 @@ -//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler. ----===// +//===- HexagonMachineScheduler.h - Custom Hexagon MI scheduler --*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,25 +14,25 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H -#include "llvm/ADT/PriorityQueue.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineScheduler.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" -#include "llvm/CodeGen/ResourcePriorityQueue.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Target/TargetInstrInfo.h" - -using namespace llvm; +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include +#include namespace llvm { +class SUnit; + class VLIWResourceModel { /// ResourcesModel - Represents VLIW state. /// Not limited to VLIW targets per se, but assumes @@ -43,19 +43,18 @@ class VLIWResourceModel { /// Local packet/bundle model. Purely /// internal to the MI schedulre at the time. - std::vector Packet; + std::vector Packet; /// Total packets created. - unsigned TotalPackets; + unsigned TotalPackets = 0; public: /// Save the last formed packet. - std::vector OldPacket; + std::vector OldPacket; -public: VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM) - : SchedModel(SM), TotalPackets(0) { - ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI); + : SchedModel(SM) { + ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI); // This hard requirement could be relaxed, // but for now do not let it proceed. @@ -89,7 +88,6 @@ class VLIWResourceModel { bool reserveResources(SUnit *SU); void savePacket(); unsigned getTotalPackets() const { return TotalPackets; } - bool isInPacket(SUnit *SU) const { return is_contained(Packet, SU); } }; @@ -114,20 +112,19 @@ class VLIWMachineScheduler : public ScheduleDAGMILive { /// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics /// to balance the schedule. class ConvergingVLIWScheduler : public MachineSchedStrategy { - /// Store the state used by ConvergingVLIWScheduler heuristics, required /// for the lifetime of one invocation of pickNode(). struct SchedCandidate { // The best SUnit candidate. - SUnit *SU; + SUnit *SU = nullptr; // Register pressure values for the best candidate. RegPressureDelta RPDelta; // Best scheduling cost. - int SCost; + int SCost = 0; - SchedCandidate(): SU(nullptr), SCost(0) {} + SchedCandidate() = default; }; /// Represent the type of SchedCandidate found within a single queue. enum CandResult { @@ -138,33 +135,30 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { /// current cycle in whichever direction at has moved, and maintains the state /// of "hazards" and other interlocks at the current cycle. struct VLIWSchedBoundary { - VLIWMachineScheduler *DAG; - const TargetSchedModel *SchedModel; + VLIWMachineScheduler *DAG = nullptr; + const TargetSchedModel *SchedModel = nullptr; ReadyQueue Available; ReadyQueue Pending; - bool CheckPending; + bool CheckPending = false; - ScheduleHazardRecognizer *HazardRec; - VLIWResourceModel *ResourceModel; + ScheduleHazardRecognizer *HazardRec = nullptr; + VLIWResourceModel *ResourceModel = nullptr; - unsigned CurrCycle; - unsigned IssueCount; + unsigned CurrCycle = 0; + unsigned IssueCount = 0; /// MinReadyCycle - Cycle of the soonest available instruction. - unsigned MinReadyCycle; + unsigned MinReadyCycle = std::numeric_limits::max(); // Remember the greatest min operand latency. - unsigned MaxMinLatency; + unsigned MaxMinLatency = 0; /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. - VLIWSchedBoundary(unsigned ID, const Twine &Name): - DAG(nullptr), SchedModel(nullptr), Available(ID, Name+".A"), - Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"), - CheckPending(false), HazardRec(nullptr), ResourceModel(nullptr), - CurrCycle(0), IssueCount(0), - MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} + VLIWSchedBoundary(unsigned ID, const Twine &Name) + : Available(ID, Name+".A"), + Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P") {} ~VLIWSchedBoundary() { delete ResourceModel; @@ -196,8 +190,8 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { SUnit *pickOnlyChoice(); }; - VLIWMachineScheduler *DAG; - const TargetSchedModel *SchedModel; + VLIWMachineScheduler *DAG = nullptr; + const TargetSchedModel *SchedModel = nullptr; // State of the top and bottom scheduled instruction boundaries. VLIWSchedBoundary Top; @@ -211,9 +205,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { LogMaxQID = 2 }; - ConvergingVLIWScheduler() - : DAG(nullptr), SchedModel(nullptr), Top(TopQID, "TopQ"), - Bot(BotQID, "BotQ") {} + ConvergingVLIWScheduler() : Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} void initialize(ScheduleDAGMI *dag) override; @@ -249,6 +241,6 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { #endif }; -} // namespace +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINESCHEDULER_H diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index ae5745bd0227d..f197cc48df28b 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -1,4 +1,4 @@ -//===----- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -------===// +//===- HexagonNewValueJump.cpp - Hexagon Backend New Value Jump -----------===// // // The LLVM Compiler Infrastructure // @@ -19,28 +19,36 @@ // all, it collapses compare and jump instruction into a new valu jump // intstructions. // -// //===----------------------------------------------------------------------===// + #include "Hexagon.h" #include "HexagonInstrInfo.h" -#include "HexagonMachineFunctionInfo.h" #include "HexagonRegisterInfo.h" -#include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" -#include "llvm/PassSupport.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "hexagon-nvj" @@ -56,12 +64,14 @@ static cl::opt DisableNewValueJumps("disable-nvjump", cl::Hidden, cl::desc("Disable New Value Jumps")); namespace llvm { - FunctionPass *createHexagonNewValueJump(); - void initializeHexagonNewValueJumpPass(PassRegistry&); -} +FunctionPass *createHexagonNewValueJump(); +void initializeHexagonNewValueJumpPass(PassRegistry&); + +} // end namespace llvm namespace { + struct HexagonNewValueJump : public MachineFunctionPass { static char ID; @@ -75,6 +85,7 @@ namespace { StringRef getPassName() const override { return "Hexagon NewValueJump"; } bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -90,7 +101,7 @@ namespace { bool isNewValueJumpCandidate(const MachineInstr &MI) const; }; -} // end of anonymous namespace +} // end anonymous namespace char HexagonNewValueJump::ID = 0; @@ -100,7 +111,6 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_END(HexagonNewValueJump, "hexagon-nvj", "Hexagon NewValueJump", false, false) - // We have identified this II could be feeder to NVJ, // verify that it can be. static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, @@ -109,7 +119,6 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, MachineBasicBlock::iterator end, MachineBasicBlock::iterator skip, MachineFunction &MF) { - // Predicated instruction can not be feeder to NVJ. if (QII->isPredicated(*II)) return false; @@ -144,7 +153,6 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, // p0 = cmp.eq(r21, #0) // if (p0.new) jump:t .LBB29_45 // and result WAR hazards if converted to New Value Jump. - for (unsigned i = 0; i < II->getNumOperands(); ++i) { if (II->getOperand(i).isReg() && (II->getOperand(i).isUse() || II->getOperand(i).isDef())) { @@ -171,7 +179,6 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, // 2. feeder to the compare instruction can be moved before jump. static bool commonChecksToProhibitNewValueJump(bool afterRA, MachineBasicBlock::iterator MII) { - // If store in path, bail out. if (MII->mayStore()) return false; @@ -216,13 +223,16 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, bool optLocation, MachineBasicBlock::iterator end, MachineFunction &MF) { - MachineInstr &MI = *II; // If the second operand of the compare is an imm, make sure it's in the // range specified by the arch. if (!secondReg) { - int64_t v = MI.getOperand(2).getImm(); + const MachineOperand &Op2 = MI.getOperand(2); + if (!Op2.isImm()) + return false; + + int64_t v = Op2.getImm(); bool Valid = false; switch (MI.getOpcode()) { @@ -417,9 +427,7 @@ bool HexagonNewValueJump::isNewValueJumpCandidate( } } - bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" << "********** Function: " << MF.getName() << "\n"); @@ -536,10 +544,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { if (foundJump && !foundCompare && MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == predReg) { - // Not all compares can be new value compare. Arch Spec: 7.6.1.1 if (isNewValueJumpCandidate(MI)) { - assert( (MI.getDesc().isCompare()) && "Only compare instruction can be collapsed into New Value Jump"); @@ -566,7 +572,6 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { } if (foundCompare && foundJump) { - // If "common" checks fail, bail out on this BB. if (!commonChecksToProhibitNewValueJump(afterRA, MII)) break; diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td index f80e0ef9e39fd..232946ec15791 100644 --- a/lib/Target/Hexagon/HexagonOperands.td +++ b/lib/Target/Hexagon/HexagonOperands.td @@ -29,17 +29,5 @@ def u64_0Imm : Operand { let ParserMatchClass = u64_0ImmOperand; } def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; } def n1Const : Operand { let ParserMatchClass = n1ConstOperand; } -// This complex pattern exists only to create a machine instruction operand -// of type "frame index". There doesn't seem to be a way to do that directly -// in the patterns. -def AddrFI : ComplexPattern; - -// These complex patterns are not strictly necessary, since global address -// folding will happen during DAG combining. For distinguishing between GA -// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used. -def AddrGA : ComplexPattern; -def AddrGP : ComplexPattern; - - def bblabel : Operand; def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">; diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 6481f97e99e57..c7e5e55a6a715 100644 --- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -128,10 +128,10 @@ bool HexagonOptAddrMode::hasRepForm(MachineInstr &MI, unsigned TfrDefR) { if (HII->getAddrMode(MI) == HexagonII::BaseRegOffset) // Tranform to Absolute plus register offset. - return (HII->getBaseWithLongOffset(MI) >= 0); + return (HII->changeAddrMode_rr_ur(MI) >= 0); else if (HII->getAddrMode(MI) == HexagonII::BaseImmOffset) // Tranform to absolute addressing mode. - return (HII->getAbsoluteForm(MI) >= 0); + return (HII->changeAddrMode_io_abs(MI) >= 0); return false; } @@ -337,7 +337,7 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, if (ImmOpNum == 1) { if (HII->getAddrMode(*OldMI) == HexagonII::BaseRegOffset) { - short NewOpCode = HII->getBaseWithLongOffset(*OldMI); + short NewOpCode = HII->changeAddrMode_rr_ur(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); MIB.add(OldMI->getOperand(0)); @@ -347,7 +347,7 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, OpStart = 4; Changed = true; } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) { - short NewOpCode = HII->getAbsoluteForm(*OldMI); + short NewOpCode = HII->changeAddrMode_io_abs(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)) .add(OldMI->getOperand(0)); @@ -361,9 +361,9 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, Changed = false; DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); - DEBUG(dbgs() << "[TO]: " << MIB << "\n"); + DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); } else if (ImmOpNum == 2 && OldMI->getOperand(3).getImm() == 0) { - short NewOpCode = HII->xformRegToImmOffset(*OldMI); + short NewOpCode = HII->changeAddrMode_rr_io(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); MIB.add(OldMI->getOperand(0)); @@ -372,7 +372,7 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, OpStart = 4; Changed = true; DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); - DEBUG(dbgs() << "[TO]: " << MIB << "\n"); + DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); } if (Changed) @@ -394,7 +394,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, MachineInstrBuilder MIB; if (ImmOpNum == 0) { if (HII->getAddrMode(*OldMI) == HexagonII::BaseRegOffset) { - short NewOpCode = HII->getBaseWithLongOffset(*OldMI); + short NewOpCode = HII->changeAddrMode_rr_ur(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); MIB.add(OldMI->getOperand(1)); @@ -403,7 +403,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, MIB.add(OldMI->getOperand(3)); OpStart = 4; } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) { - short NewOpCode = HII->getAbsoluteForm(*OldMI); + short NewOpCode = HII->changeAddrMode_io_abs(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); const GlobalValue *GV = ImmOp.getGlobal(); @@ -414,18 +414,17 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, } Changed = true; DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); - DEBUG(dbgs() << "[TO]: " << MIB << "\n"); + DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); } else if (ImmOpNum == 1 && OldMI->getOperand(2).getImm() == 0) { - short NewOpCode = HII->xformRegToImmOffset(*OldMI); + short NewOpCode = HII->changeAddrMode_rr_io(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode)); MIB.add(OldMI->getOperand(0)); MIB.add(ImmOp); - MIB.add(OldMI->getOperand(1)); - OpStart = 2; + OpStart = 3; Changed = true; DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); - DEBUG(dbgs() << "[TO]: " << MIB << "\n"); + DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); } if (Changed) for (unsigned i = OpStart; i < OpEnd; ++i) @@ -436,10 +435,10 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, short HexagonOptAddrMode::getBaseWithLongOffset(const MachineInstr &MI) const { if (HII->getAddrMode(MI) == HexagonII::BaseImmOffset) { - short TempOpCode = HII->getBaseWithRegOffset(MI); - return HII->getBaseWithLongOffset(TempOpCode); - } else - return HII->getBaseWithLongOffset(MI); + short TempOpCode = HII->changeAddrMode_io_rr(MI); + return HII->changeAddrMode_rr_ur(TempOpCode); + } + return HII->changeAddrMode_rr_ur(MI); } bool HexagonOptAddrMode::changeAddAsl(NodeAddr AddAslUN, diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index f185c49b85d63..908355700ecdd 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -7,16 +7,105 @@ // //===----------------------------------------------------------------------===// -// Pattern fragment that combines the value type and the register class -// into a single parameter. +// Table of contents: +// (0) Definitions +// (1) Immediates +// (2) Type casts +// (3) Extend/truncate +// (4) Logical +// (5) Compare +// (6) Select +// (7) Insert/extract +// (8) Shift/permute +// (9) Arithmetic/bitwise +// (10) Bit +// (11) Load +// (12) Store +// (13) Memop +// (14) PIC +// (15) Call +// (16) Branch +// (17) Misc + +// Guidelines (in no particular order): +// 1. Avoid relying on pattern ordering to give preference to one pattern +// over another, prefer using AddedComplexity instead. The reason for +// this is to avoid unintended conseqeuences (caused by altering the +// order) when making changes. The current order of patterns in this +// file obviously does play some role, but none of the ordering was +// deliberately chosen (other than to create a logical structure of +// this file). When making changes, adding AddedComplexity to existing +// patterns may be needed. +// 2. Maintain the logical structure of the file, try to put new patterns +// in designated sections. +// 3. Do not use A2_combinew instruction directly, use Combinew fragment +// instead. It uses REG_SEQUENCE, which is more amenable to optimizations. +// 4. Most selection macros are based on PatFrags. For DAGs that involve +// SDNodes, use pf1/pf2 to convert them to PatFrags. Use common frags +// whenever possible (see the Definitions section). When adding new +// macro, try to make is general to enable reuse across sections. +// 5. Compound instructions (e.g. Rx+Rs*Rt) are generated under the condition +// that the nested operation has only one use. Having it separated in case +// of multiple uses avoids duplication of (processor) work. +// 6. The v4 vector instructions (64-bit) are treated as core instructions, +// for example, A2_vaddh is in the "arithmetic" section with A2_add. +// 7. When adding a pattern for an instruction with a constant-extendable +// operand, allow all possible kinds of inputs for the immediate value +// (see AnyImm/anyimm and their variants in the Definitions section). + + +// --(0) Definitions ----------------------------------------------------- +// + +// This complex pattern exists only to create a machine instruction operand +// of type "frame index". There doesn't seem to be a way to do that directly +// in the patterns. +def AddrFI: ComplexPattern; + +// These complex patterns are not strictly necessary, since global address +// folding will happen during DAG combining. For distinguishing between GA +// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used. +def AddrGA: ComplexPattern; +def AddrGP: ComplexPattern; +def AnyImm: ComplexPattern; +def AnyInt: ComplexPattern; + +// Global address or a constant being a multiple of 2^n. +def AnyImm0: ComplexPattern; +def AnyImm1: ComplexPattern; +def AnyImm2: ComplexPattern; +def AnyImm3: ComplexPattern; + + +// Type helper frags. +def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; +def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; +def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; +def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; +def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; + +def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; +def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; +def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; + +def HVI8: PatLeaf<(VecI8 HvxVR:$R)>; +def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; +def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; +def HVI64: PatLeaf<(VecI64 HvxVR:$R)>; + +def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; +def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; +def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; +def HWI64: PatLeaf<(VecPI64 HvxWR:$R)>; // Pattern fragments to extract the low and high subregisters from a // 64-bit value. def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>; -def IsOrAdd: PatFrag<(ops node:$Addr, node:$off), - (or node:$Addr, node:$off), [{ return isOrEquivalentToAdd(N); }]>; +def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{ + return isOrEquivalentToAdd(N); +}]>; def IsVecOff : PatLeaf<(i32 imm), [{ int32_t V = N->getSExtValue(); @@ -28,922 +117,1266 @@ def IsVecOff : PatLeaf<(i32 imm), [{ return isInt<4>(V >> L); }]>; -def IsPow2_32 : PatLeaf<(i32 imm), [{ +def IsPow2_32: PatLeaf<(i32 imm), [{ uint32_t V = N->getZExtValue(); return isPowerOf2_32(V); }]>; -def IsPow2_64 : PatLeaf<(i64 imm), [{ +def IsPow2_64: PatLeaf<(i64 imm), [{ uint64_t V = N->getZExtValue(); return isPowerOf2_64(V); }]>; -def IsNPow2_32 : PatLeaf<(i32 imm), [{ +def IsNPow2_32: PatLeaf<(i32 imm), [{ uint32_t NV = ~N->getZExtValue(); return isPowerOf2_32(NV); }]>; -def IsPow2_64L : PatLeaf<(i64 imm), [{ +def IsPow2_64L: PatLeaf<(i64 imm), [{ uint64_t V = N->getZExtValue(); return isPowerOf2_64(V) && Log2_64(V) < 32; }]>; -def IsPow2_64H : PatLeaf<(i64 imm), [{ +def IsPow2_64H: PatLeaf<(i64 imm), [{ uint64_t V = N->getZExtValue(); return isPowerOf2_64(V) && Log2_64(V) >= 32; }]>; -def IsNPow2_64L : PatLeaf<(i64 imm), [{ +def IsNPow2_64L: PatLeaf<(i64 imm), [{ uint64_t NV = ~N->getZExtValue(); return isPowerOf2_64(NV) && Log2_64(NV) < 32; }]>; -def IsNPow2_64H : PatLeaf<(i64 imm), [{ +def IsNPow2_64H: PatLeaf<(i64 imm), [{ uint64_t NV = ~N->getZExtValue(); return isPowerOf2_64(NV) && Log2_64(NV) >= 32; }]>; -def SDEC1 : SDNodeXForm: PatLeaf<(i32 imm), + "uint64_t V = N->getZExtValue();" # + "return isUInt<" # Width # ">(V) && V > " # Arg # ";" +>; + +def SDEC1: SDNodeXFormgetSExtValue(); return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32); }]>; -def UDEC1 : SDNodeXFormgetZExtValue(); assert(V >= 1); return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32); }]>; -def UDEC32 : SDNodeXFormgetZExtValue(); assert(V >= 32); return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32); }]>; -def Log2_32 : SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); }]>; -def Log2_64 : SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(Log2_64(V), SDLoc(N), MVT::i32); }]>; -def LogN2_32 : SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32); }]>; -def LogN2_64 : SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(Log2_64(NV), SDLoc(N), MVT::i32); }]>; -def ToZext64: OutPatFrag<(ops node:$Rs), - (i64 (A4_combineir 0, (i32 $Rs)))>; -def ToSext64: OutPatFrag<(ops node:$Rs), - (i64 (A2_sxtw (i32 $Rs)))>; +def NegImm8: SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); +}]>; + +def NegImm16: SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); +}]>; +def NegImm32: SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); +}]>; -class T_CMP_pat - : Pat<(i1 (OpNode I32:$src1, ImmPred:$src2)), - (MI IntRegs:$src1, ImmPred:$src2)>; -def : T_CMP_pat ; -def : T_CMP_pat ; -def : T_CMP_pat ; +// Helpers for type promotions/contractions. +def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>; +def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_tfrrp (i32 $Rs)))>; +def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>; +def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>; -def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, - [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt), + (REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>; -def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; -def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; +def addrga: PatLeaf<(i32 AddrGA:$Addr)>; +def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; +def anyimm: PatLeaf<(i32 AnyImm:$Imm)>; +def anyint: PatLeaf<(i32 AnyInt:$Imm)>; -// Pats for instruction selection. -class BinOp32_pat - : Pat<(ResT (Op I32:$Rs, I32:$Rt)), - (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>; +// Global address or an aligned constant. +def anyimm0: PatLeaf<(i32 AnyImm0:$Addr)>; +def anyimm1: PatLeaf<(i32 AnyImm1:$Addr)>; +def anyimm2: PatLeaf<(i32 AnyImm2:$Addr)>; +def anyimm3: PatLeaf<(i32 AnyImm3:$Addr)>; -def: BinOp32_pat; -def: BinOp32_pat; -def: BinOp32_pat; -def: BinOp32_pat; -def: BinOp32_pat; +def f32ImmPred : PatLeaf<(f32 fpimm:$F)>; +def f64ImmPred : PatLeaf<(f64 fpimm:$F)>; -def: BinOp32_pat; -def: BinOp32_pat; +// This complex pattern is really only to detect various forms of +// sign-extension i32->i64. The selected value will be of type i64 +// whose low word is the value being extended. The high word is +// unspecified. +def Usxtw: ComplexPattern; -// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones -// that reverse the order of the operands. -class RevCmp : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>; +def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; +def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; +def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; -// Pats for compares. They use PatFrags as operands, not SDNodes, -// since seteq/setgt/etc. are defined as ParFrags. -class T_cmp32_rr_pat - : Pat<(VT (Op I32:$Rs, I32:$Rt)), - (MI IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), + (PS_fi (i32 AddrFI:$Rs), imm:$off)>; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat, i1>; -def: T_cmp32_rr_pat, i1>; +def alignedload: PatFrag<(ops node:$a), (load $a), [{ + return isAlignedMemNode(dyn_cast(N)); +}]>; -def: Pat<(select I1:$Pu, I32:$Rs, I32:$Rt), - (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>; +def unalignedload: PatFrag<(ops node:$a), (load $a), [{ + return !isAlignedMemNode(dyn_cast(N)); +}]>; -def: Pat<(add I32:$Rs, s32_0ImmPred:$s16), - (A2_addi I32:$Rs, imm:$s16)>; +def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ + return isAlignedMemNode(dyn_cast(N)); +}]>; -def: Pat<(or I32:$Rs, s32_0ImmPred:$s10), - (A2_orir IntRegs:$Rs, imm:$s10)>; -def: Pat<(and I32:$Rs, s32_0ImmPred:$s10), - (A2_andir IntRegs:$Rs, imm:$s10)>; +def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ + return !isAlignedMemNode(dyn_cast(N)); +}]>; -def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs), - (A2_subri imm:$s10, IntRegs:$Rs)>; -// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). -def: Pat<(not I32:$src1), - (A2_subri -1, IntRegs:$src1)>; +// Converters from unary/binary SDNode to PatFrag. +class pf1 : PatFrag<(ops node:$a), (Op node:$a)>; +class pf2 : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>; -def TruncI64ToI32: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); -}]>; +class Not2 + : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; -def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>; -def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi (TruncI64ToI32 $s8))>; +class Su + : PatFrag; -def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs), - (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; +// Main selection macros. -def : Pat<(select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8), - (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>; +class OpR_R_pat + : Pat<(ResVT (Op RegPred:$Rs)), (MI RegPred:$Rs)>; -def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8), - (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>; +class OpR_RI_pat + : Pat<(ResType (Op RegPred:$Rs, ImmPred:$I)), + (MI RegPred:$Rs, imm:$I)>; -def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>; -def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>; -def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>; -def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>; +class OpR_RR_pat + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (MI RsPred:$Rs, RtPred:$Rt)>; -class T_vcmp_pat - : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))), - (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>; +class AccRRI_pat + : Pat<(AccOp RegPred:$Rx, (Op RegPred:$Rs, ImmPred:$I)), + (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; +class AccRRR_pat + : Pat<(AccOp RsPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), + (MI RsPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; -// Add halfword. -def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16), - (A2_addh_l16_ll I32:$src1, I32:$src2)>; +multiclass SelMinMax_pats { + def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$A, Val:$B), + (InstA Val:$A, Val:$B)>; + def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$B, Val:$A), + (InstB Val:$A, Val:$B)>; +} -def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)), - (A2_addh_l16_hl I32:$src1, I32:$src2)>; -def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)), - (A2_addh_h16_ll I32:$src1, I32:$src2)>; +// Frags for commonly used SDNodes. +def Add: pf2; def And: pf2; def Sra: pf2; +def Sub: pf2; def Or: pf2; def Srl: pf2; +def Mul: pf2; def Xor: pf2; def Shl: pf2; -// Subtract halfword. -def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16), - (A2_subh_l16_ll I32:$src1, I32:$src2)>; -def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)), - (A2_subh_h16_ll I32:$src1, I32:$src2)>; +// --(1) Immediate ------------------------------------------------------- +// -// Here, depending on the operand being selected, we'll either generate a -// min or max instruction. -// Ex: -// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected -// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'. -// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value -// is selected and the corresponding HexagonInst is passed in 'SwapInst'. +def SDTHexagonCONST32 + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<0>]>; -multiclass T_MinMax_pats { - def: Pat<(select (i1 (Op Val:$src1, Val:$src2)), Val:$src1, Val:$src2), - (Inst Val:$src1, Val:$src2)>; - def: Pat<(select (i1 (Op Val:$src1, Val:$src2)), Val:$src2, Val:$src1), - (SwapInst Val:$src1, Val:$src2)>; -} +def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; +def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; +def HexagonCONST32: SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; +def HexagonCONST32_GP: SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; -def IsPosHalf : PatLeaf<(i32 IntRegs:$a), [{ - return isPositiveHalfWord(N); +def TruncI64ToI32: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); }]>; -multiclass MinMax_pats { - defm: T_MinMax_pats; +def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>; +def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi (TruncI64ToI32 $s8))>; - def: Pat<(sext_inreg (select (i1 (Op IsPosHalf:$src1, IsPosHalf:$src2)), - IsPosHalf:$src1, IsPosHalf:$src2), - i16), - (Inst IntRegs:$src1, IntRegs:$src2)>; +def: Pat<(HexagonCONST32 tglobaltlsaddr:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonCONST32 bbl:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonCONST32 tglobaladdr:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonCONST32_GP tblockaddress:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonCONST32_GP tglobaladdr:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonJT tjumptable:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonCP tconstpool:$A), (A2_tfrsi imm:$A)>; - def: Pat<(sext_inreg (select (i1 (Op IsPosHalf:$src1, IsPosHalf:$src2)), - IsPosHalf:$src2, IsPosHalf:$src1), - i16), - (SwapInst IntRegs:$src1, IntRegs:$src2)>; -} +def: Pat<(i1 0), (PS_false)>; +def: Pat<(i1 1), (PS_true)>; +def: Pat<(i64 imm:$v), (CONST64 imm:$v)>; -let AddedComplexity = 200 in { - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; -} +def ftoi : SDNodeXFormgetValueAPF().bitcastToAPInt(); + return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N), + MVT::getIntegerVT(I.getBitWidth())); +}]>; -class T_cmp64_rr_pat - : Pat<(i1 (CmpOp I64:$Rs, I64:$Rt)), - (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>; +def: Pat<(f32ImmPred:$f), (A2_tfrsi (ftoi $f))>; +def: Pat<(f64ImmPred:$f), (CONST64 (ftoi $f))>; -def: T_cmp64_rr_pat; -def: T_cmp64_rr_pat; -def: T_cmp64_rr_pat; -def: T_cmp64_rr_pat>; -def: T_cmp64_rr_pat>; +def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>; -def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>; +// --(2) Type cast ------------------------------------------------------- +// -def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>; +let Predicates = [HasV5T] in { + def: OpR_R_pat, f64, F32>; + def: OpR_R_pat, f32, F64>; -def: Pat<(i1 (not I1:$Ps)), (C2_not PredRegs:$Ps)>; + def: OpR_R_pat, f32, I32>; + def: OpR_R_pat, f32, I64>; + def: OpR_R_pat, f64, I32>; + def: OpR_R_pat, f64, I64>; -def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>; + def: OpR_R_pat, f32, I32>; + def: OpR_R_pat, f32, I64>; + def: OpR_R_pat, f64, I32>; + def: OpR_R_pat, f64, I64>; -def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; + def: OpR_R_pat, i32, F32>; + def: OpR_R_pat, i32, F64>; + def: OpR_R_pat, i64, F32>; + def: OpR_R_pat, i64, F64>; -def: Pat<(br bb:$dst), (J2_jump b30_2Imm:$dst)>; -def: Pat<(brcond I1:$src1, bb:$block), (J2_jumpt PredRegs:$src1, bb:$block)>; -def: Pat<(brind I32:$dst), (J2_jumpr IntRegs:$dst)>; + def: OpR_R_pat, i32, F32>; + def: OpR_R_pat, i32, F64>; + def: OpR_R_pat, i64, F32>; + def: OpR_R_pat, i64, F64>; +} -def: Pat<(retflag), (PS_jmpret (i32 R31))>; -def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>; +// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. +let Predicates = [HasV5T] in { + def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; + def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; + def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; + def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>; +} -// Patterns to select load-indexed (i.e. load from base+offset). -multiclass Loadx_pat { - def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; - def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), - (VT (MI AddrFI:$fi, imm:$Off))>; - def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), - (VT (MI AddrFI:$fi, imm:$Off))>; - def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))), - (VT (MI IntRegs:$Rs, imm:$Off))>; - def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>; +multiclass Cast_pat { + def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>; + def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>; } +// Bit convert vector types to integers. +defm: Cast_pat; +defm: Cast_pat; +defm: Cast_pat; +defm: Cast_pat; +defm: Cast_pat; + + +// --(3) Extend/truncate ------------------------------------------------- +// + +def: Pat<(sext_inreg I32:$Rs, i8), (A2_sxtb I32:$Rs)>; +def: Pat<(sext_inreg I32:$Rs, i16), (A2_sxth I32:$Rs)>; +def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>; +def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>; +def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>; + +def: Pat<(i64 (sext I1:$Pu)), + (Combinew (C2_muxii PredRegs:$Pu, -1, 0), + (C2_muxii PredRegs:$Pu, -1, 0))>; + +def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; +def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; +def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; + +def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>; +def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>; +def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>; + +def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>; +def: Pat<(i1 (trunc I64:$Rs)), (C2_tfrrp (LoReg $Rs))>; + let AddedComplexity = 20 in { - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - // No sextloadi1. + def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>; + def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>; } -// Sign-extending loads of i1 need to replicate the lowest bit throughout -// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should -// do the trick. -let AddedComplexity = 20 in -def: Pat<(i32 (sextloadi1 I32:$Rs)), - (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; - -def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>; -def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>; -def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>; +def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; +def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; -def: Pat<(mul IntRegs:$Rs, u32_0ImmPred:$u8), - (M2_mpysip IntRegs:$Rs, imm:$u8)>; -def: Pat<(ineg (mul IntRegs:$Rs, u8_0ImmPred:$u8)), - (M2_mpysin IntRegs:$Rs, imm:$u8)>; -def: Pat<(mul IntRegs:$src1, s32_0ImmPred:$src2), - (M2_mpysmi IntRegs:$src1, imm:$src2)>; -def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), - (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>; -def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1), - (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; -def: Pat<(add (add IntRegs:$src2, s32_0ImmPred:$src3), IntRegs:$src1), - (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>; -def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1), - (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -class T_MType_acc_pat1 - : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)), - (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>; - -class T_MType_acc_pat2 - : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))), - (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def : T_MType_acc_pat2 ; -def : T_MType_acc_pat1 ; - -def : T_MType_acc_pat1 ; -def : T_MType_acc_pat2 ; - -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; - -class T_MType_acc_pat3 - : Pat <(secOp I32:$src1, (firstOp I32:$src2, (not I32:$src3))), - (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: T_MType_acc_pat3 ; -def: T_MType_acc_pat3 ; -def: T_MType_acc_pat3 ; +def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; -// This complex pattern is really only to detect various forms of -// sign-extension i32->i64. The selected value will be of type i64 -// whose low word is the value being extended. The high word is -// unspecified. -def Usxtw : ComplexPattern; +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), + (Combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; -def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; -def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; -def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), + (Combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; -def: Pat<(i32 (trunc (sra (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), - (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(i32 (trunc (srl (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), - (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +// Truncate: from vector B copy all 'E'ven 'B'yte elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; +def: Pat<(v4i8 (trunc V4I16:$Rs)), + (S2_vtrunehb V4I16:$Rs)>; -def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)), - (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; +// Truncate: from vector B copy all 'O'dd 'B'yte elements: +// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; +// S2_vtrunohb -def: Pat<(mul Sext64:$Rs, Sext64:$Rt), - (M2_dpmpyss_s0 (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; +// S2_vtruneh -// Multiply and accumulate, use full result. -// Rxx[+-]=mpy(Rs,Rt) +def: Pat<(v2i16 (trunc V2I32:$Rs)), + (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; -def: Pat<(add I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)), - (M2_dpmpyss_acc_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(sub I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)), - (M2_dpmpyss_nac_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +// --(4) Logical --------------------------------------------------------- +// -def: Pat<(add I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))), - (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; +def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>; +def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>; -def: Pat<(add I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))), - (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, I1>; +def: OpR_RR_pat, i1, I1>; -def: Pat<(sub I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))), - (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; +// op(Ps, op(Pt, Pu)) +def: AccRRR_pat, I1, I1>; +def: AccRRR_pat, I1, I1>; +def: AccRRR_pat, I1, I1>; +def: AccRRR_pat, I1, I1>; -def: Pat<(sub I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))), - (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; +// op(Ps, op(Pt, ~Pu)) +def: AccRRR_pat>, I1, I1>; +def: AccRRR_pat>, I1, I1>; +def: AccRRR_pat>, I1, I1>; +def: AccRRR_pat>, I1, I1>; -class Storepi_pat - : Pat<(Store Value:$src1, I32:$src2, Offset:$offset), - (MI I32:$src2, imm:$offset, Value:$src1)>; -def: Storepi_pat; -def: Storepi_pat; -def: Storepi_pat; -def: Storepi_pat; +// --(5) Compare --------------------------------------------------------- +// -// Patterns for generating stores, where the address takes different forms: -// - frameindex, -// - frameindex + offset, -// - base + offset, -// - simple (base address without offset). -// These would usually be used together (via Storex_pat defined below), but -// in some cases one may want to apply different properties (such as -// AddedComplexity) to the individual patterns. -class Storex_fi_pat - : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; -multiclass Storex_fi_add_pat { - def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; - def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; -} -multiclass Storex_add_pat { - def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; - def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; -} -class Storex_simple_pat - : Pat<(Store Value:$Rt, I32:$Rs), - (MI IntRegs:$Rs, 0, Value:$Rt)>; +// Avoid negated comparisons, i.e. those of form "Pd = !cmp(...)". +// These cannot form compounds (e.g. J4_cmpeqi_tp0_jump_nt). -// Patterns for generating stores, where the address takes different forms, -// and where the value being stored is transformed through the value modifier -// ValueMod. The address forms are same as above. -class Storexm_fi_pat - : Pat<(Store Value:$Rs, AddrFI:$fi), - (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; -multiclass Storexm_fi_add_pat { - def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; - def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; -} -multiclass Storexm_add_pat { - def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; - def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; -} -class Storexm_simple_pat - : Pat<(Store Value:$Rt, I32:$Rs), - (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; -multiclass Storex_pat { - def: Storex_fi_pat ; - defm: Storex_fi_add_pat ; - defm: Storex_add_pat ; -} +def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)), + (C2_cmpgti I32:$Rs, (SDEC1 imm:$s10))>; +def: Pat<(i1 (setuge I32:$Rs, u32_0ImmPred:$u9)), + (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9))>; -multiclass Storexm_pat { - def: Storexm_fi_pat ; - defm: Storexm_fi_add_pat ; - defm: Storexm_add_pat ; -} +def: Pat<(i1 (setlt I32:$Rs, s32_0ImmPred:$s10)), + (C2_not (C2_cmpgti I32:$Rs, (SDEC1 imm:$s10)))>; +def: Pat<(i1 (setult I32:$Rs, u32_0ImmPred:$u9)), + (C2_not (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9)))>; -// Regular stores in the DAG have two operands: value and address. -// Atomic stores also have two, but they are reversed: address, value. -// To use atomic stores with the patterns, they need to have their operands -// swapped. This relies on the knowledge that the F.Fragment uses names -// "ptr" and "val". -class SwapSt - : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, +// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones +// that reverse the order of the operands. +class RevCmp + : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment, F.PredicateCode, F.OperandTransform>; -let AddedComplexity = 20 in { - defm: Storex_pat; - defm: Storex_pat; - defm: Storex_pat; - defm: Storex_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, I32>; +def: OpR_RR_pat, i1, I32>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, I64>; +def: OpR_RR_pat, i1, I64>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V8I8>; +def: OpR_RR_pat, v8i1, V8I8>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V8I8>; +def: OpR_RR_pat, v8i1, V8I8>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V4I16>; +def: OpR_RR_pat, v4i1, V4I16>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V4I16>; +def: OpR_RR_pat, v4i1, V4I16>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V2I32>; +def: OpR_RR_pat, v2i1, V2I32>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V2I32>; +def: OpR_RR_pat, v2i1, V2I32>; +def: OpR_RR_pat; +def: OpR_RR_pat; - defm: Storex_pat, I32, s32_0ImmPred, S2_storerb_io>; - defm: Storex_pat, I32, s31_1ImmPred, S2_storerh_io>; - defm: Storex_pat, I32, s30_2ImmPred, S2_storeri_io>; - defm: Storex_pat, I64, s29_3ImmPred, S2_storerd_io>; -} +let Predicates = [HasV5T] in { + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat, i1, F32>; + def: OpR_RR_pat, i1, F32>; + def: OpR_RR_pat, i1, F32>; + def: OpR_RR_pat, i1, F32>; + def: OpR_RR_pat; + + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat, i1, F64>; + def: OpR_RR_pat, i1, F64>; + def: OpR_RR_pat, i1, F64>; + def: OpR_RR_pat, i1, F64>; + def: OpR_RR_pat; +} + +// Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds. + +def: Pat<(i1 (setne I32:$Rs, anyimm:$u5)), + (C2_not (C2_cmpeqi I32:$Rs, imm:$u5))>; +def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)), + (C2_not (C2_cmpgti I32:$Rs, imm:$u5))>; +def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)), + (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>; + +def: Pat<(i1 (setne I32:$Rs, I32:$Rt)), + (C2_not (C2_cmpeq I32:$Rs, I32:$Rt))>; +def: Pat<(i1 (setle I32:$Rs, I32:$Rt)), + (C2_not (C2_cmpgt I32:$Rs, I32:$Rt))>; +def: Pat<(i1 (setule I32:$Rs, I32:$Rt)), + (C2_not (C2_cmpgtu I32:$Rs, I32:$Rt))>; +def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), + (C2_not (C2_cmpgt I32:$Rt, I32:$Rs))>; +def: Pat<(i1 (setuge I32:$Rs, I32:$Rt)), + (C2_not (C2_cmpgtu I32:$Rt, I32:$Rs))>; + +def: Pat<(i1 (setle I64:$Rs, I64:$Rt)), + (C2_not (C2_cmpgtp I64:$Rs, I64:$Rt))>; +def: Pat<(i1 (setne I64:$Rs, I64:$Rt)), + (C2_not (C2_cmpeqp I64:$Rs, I64:$Rt))>; +def: Pat<(i1 (setge I64:$Rs, I64:$Rt)), + (C2_not (C2_cmpgtp I64:$Rt, I64:$Rs))>; +def: Pat<(i1 (setuge I64:$Rs, I64:$Rt)), + (C2_not (C2_cmpgtup I64:$Rt, I64:$Rs))>; +def: Pat<(i1 (setule I64:$Rs, I64:$Rt)), + (C2_not (C2_cmpgtup I64:$Rs, I64:$Rt))>; -// Simple patterns should be tried with the least priority. -def: Storex_simple_pat; -def: Storex_simple_pat; -def: Storex_simple_pat; -def: Storex_simple_pat; +let AddedComplexity = 100 in { + def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)), + (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 255), 0)), + (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 65535), 0)), + (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 65535), 0)), + (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; +} -def: Storex_simple_pat, I32, S2_storerb_io>; -def: Storex_simple_pat, I32, S2_storerh_io>; -def: Storex_simple_pat, I32, S2_storeri_io>; -def: Storex_simple_pat, I64, S2_storerd_io>; +// PatFrag for AsserZext which takes the original type as a parameter. +def SDTAssertZext: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0,1>]>; +def AssertZextSD: SDNode<"ISD::AssertZext", SDTAssertZext>; +class AssertZext: PatFrag<(ops node:$A), (AssertZextSD $A, T)>; -let AddedComplexity = 20 in { - defm: Storexm_pat; - defm: Storexm_pat; - defm: Storexm_pat; +multiclass Cmpb_pat { + def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)), + (MI I32:$Rs, imm:$I)>; + def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)), + (MI I32:$Rs, imm:$I)>; } -def: Storexm_simple_pat; -def: Storexm_simple_pat; -def: Storexm_simple_pat; +multiclass CmpbN_pat { + def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)), + (C2_not (MI I32:$Rs, imm:$I))>; + def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)), + (C2_not (MI I32:$Rs, imm:$I))>; +} -def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; -def: Pat <(i64 (sext_inreg I64:$src, i32)), (A2_sxtw (LoReg I64:$src))>; +multiclass CmpbND_pat { + def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)), + (C2_not (MI I32:$Rs, (UDEC1 imm:$I)))>; + def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)), + (C2_not (MI I32:$Rs, (UDEC1 imm:$I)))>; +} -def: Pat<(select (i1 (setlt I32:$src, 0)), (sub 0, I32:$src), I32:$src), - (A2_abs IntRegs:$src)>; +let AddedComplexity = 200 in { + defm: Cmpb_pat , IsUGT<8,31>, 255>; + defm: CmpbN_pat , IsUGT<8,31>, 255>; + defm: Cmpb_pat , IsUGT<32,31>, 255>; + defm: CmpbN_pat , IsUGT<32,31>, 255>; + defm: Cmpb_pat , IsUGT<32,31>, 65535>; + defm: CmpbN_pat , IsUGT<32,31>, 65535>; + defm: CmpbND_pat, IsUGT<32,32>, 255>; + defm: CmpbND_pat, IsUGT<32,32>, 65535>; +} + +def: Pat<(i32 (zext (i1 (seteq I32:$Rs, I32:$Rt)))), + (A4_rcmpeq I32:$Rs, I32:$Rt)>; +def: Pat<(i32 (zext (i1 (setne I32:$Rs, I32:$Rt)))), + (A4_rcmpneq I32:$Rs, I32:$Rt)>; +def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))), + (A4_rcmpeqi I32:$Rs, imm:$s8)>; +def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))), + (A4_rcmpneqi I32:$Rs, imm:$s8)>; + +def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), + (C2_xor I1:$Ps, I1:$Pt)>; -let AddedComplexity = 50 in -def: Pat<(xor (add (sra I32:$src, (i32 31)), - I32:$src), - (sra I32:$src, (i32 31))), - (A2_abs IntRegs:$src)>; +def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), + (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(sra I32:$src, u5_0ImmPred:$u5), - (S2_asr_i_r IntRegs:$src, imm:$u5)>; -def: Pat<(srl I32:$src, u5_0ImmPred:$u5), - (S2_lsr_i_r IntRegs:$src, imm:$u5)>; -def: Pat<(shl I32:$src, u5_0ImmPred:$u5), - (S2_asl_i_r IntRegs:$src, imm:$u5)>; +def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), + (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(sra (add (sra I32:$src1, u5_0ImmPred:$src2), 1), (i32 1)), - (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>; +def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), + (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; -def : Pat<(not I64:$src1), - (A2_notp DoubleRegs:$src1)>; +// Floating-point comparisons with checks for ordered/unordered status. -// Count leading zeros. -def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>; -def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; +class T3 + : OutPatFrag<(ops node:$Rs, node:$Rt), + (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>; -// Count trailing zeros: 32-bit. -def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>; +class OpmR_RR_pat + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (Output RsPred:$Rs, RtPred:$Rt)>; -// Count leading ones. -def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>; -def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; +class Cmpuf: T3; +class Cmpud: T3; -// Count trailing ones: 32-bit. -def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>; +class Cmpufn: T3; +class Cmpudn: T3; -let AddedComplexity = 20 in { // Complexity greater than and/or/xor - def: Pat<(and I32:$Rs, IsNPow2_32:$V), - (S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>; - def: Pat<(or I32:$Rs, IsPow2_32:$V), - (S2_setbit_i IntRegs:$Rs, (Log2_32 $V))>; - def: Pat<(xor I32:$Rs, IsPow2_32:$V), - (S2_togglebit_i IntRegs:$Rs, (Log2_32 $V))>; +let Predicates = [HasV5T] in { + def: OpmR_RR_pat, setueq, i1, F32>; + def: OpmR_RR_pat, setuge, i1, F32>; + def: OpmR_RR_pat, setugt, i1, F32>; + def: OpmR_RR_pat, RevCmp, i1, F32>; + def: OpmR_RR_pat, RevCmp, i1, F32>; + def: OpmR_RR_pat, setune, i1, F32>; - def: Pat<(and I32:$Rs, (not (shl 1, I32:$Rt))), - (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(or I32:$Rs, (shl 1, I32:$Rt)), - (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(xor I32:$Rs, (shl 1, I32:$Rt)), - (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>; + def: OpmR_RR_pat, setueq, i1, F64>; + def: OpmR_RR_pat, setuge, i1, F64>; + def: OpmR_RR_pat, setugt, i1, F64>; + def: OpmR_RR_pat, RevCmp, i1, F64>; + def: OpmR_RR_pat, RevCmp, i1, F64>; + def: OpmR_RR_pat, setune, i1, F64>; } -// Clr/set/toggle bit for 64-bit values with immediate bit index. -let AddedComplexity = 20 in { // Complexity greater than and/or/xor - def: Pat<(and I64:$Rss, IsNPow2_64L:$V), - (REG_SEQUENCE DoubleRegs, - (i32 (HiReg $Rss)), isub_hi, - (S2_clrbit_i (LoReg $Rss), (LogN2_64 $V)), isub_lo)>; - def: Pat<(and I64:$Rss, IsNPow2_64H:$V), - (REG_SEQUENCE DoubleRegs, - (S2_clrbit_i (HiReg $Rss), (UDEC32 (i32 (LogN2_64 $V)))), - isub_hi, - (i32 (LoReg $Rss)), isub_lo)>; +class Outn + : OutPatFrag<(ops node:$Rs, node:$Rt), + (C2_not (MI $Rs, $Rt))>; - def: Pat<(or I64:$Rss, IsPow2_64L:$V), - (REG_SEQUENCE DoubleRegs, - (i32 (HiReg $Rss)), isub_hi, - (S2_setbit_i (LoReg $Rss), (Log2_64 $V)), isub_lo)>; - def: Pat<(or I64:$Rss, IsPow2_64H:$V), - (REG_SEQUENCE DoubleRegs, - (S2_setbit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), - isub_hi, - (i32 (LoReg $Rss)), isub_lo)>; +let Predicates = [HasV5T] in { + def: OpmR_RR_pat, setone, i1, F32>; + def: OpmR_RR_pat, setne, i1, F32>; - def: Pat<(xor I64:$Rss, IsPow2_64L:$V), - (REG_SEQUENCE DoubleRegs, - (i32 (HiReg $Rss)), isub_hi, - (S2_togglebit_i (LoReg $Rss), (Log2_64 $V)), isub_lo)>; - def: Pat<(xor I64:$Rss, IsPow2_64H:$V), - (REG_SEQUENCE DoubleRegs, - (S2_togglebit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), - isub_hi, - (i32 (LoReg $Rss)), isub_lo)>; + def: OpmR_RR_pat, setone, i1, F64>; + def: OpmR_RR_pat, setne, i1, F64>; + + def: OpmR_RR_pat, seto, i1, F32>; + def: OpmR_RR_pat, seto, i1, F64>; } -let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. - def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), - (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; - def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)), - (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (trunc I32:$Rs)), - (S2_tstbit_i IntRegs:$Rs, 0)>; - def: Pat<(i1 (trunc I64:$Rs)), - (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; + +// --(6) Select ---------------------------------------------------------- +// + +def: Pat<(select I1:$Pu, I32:$Rs, I32:$Rt), + (C2_mux I1:$Pu, I32:$Rs, I32:$Rt)>; +def: Pat<(select I1:$Pu, anyimm:$s8, I32:$Rs), + (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; +def: Pat<(select I1:$Pu, I32:$Rs, anyimm:$s8), + (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>; +def: Pat<(select I1:$Pu, anyimm:$s8, s8_0ImmPred:$S8), + (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>; + +def: Pat<(select (not I1:$Pu), I32:$Rs, I32:$Rt), + (C2_mux I1:$Pu, I32:$Rt, I32:$Rs)>; +def: Pat<(select (not I1:$Pu), s8_0ImmPred:$S8, anyimm:$s8), + (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>; +def: Pat<(select (not I1:$Pu), anyimm:$s8, I32:$Rs), + (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>; +def: Pat<(select (not I1:$Pu), I32:$Rs, anyimm:$s8), + (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; + +// Map from a 64-bit select to an emulated 64-bit mux. +// Hexagon does not support 64-bit MUXes; so emulate with combines. +def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt), + (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), + (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; + +let Predicates = [HasV5T] in { + def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I), + (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; + def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt), + (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; + def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt), + (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>; + def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt), + (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), + (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; + + def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt), + (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>; + def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt), + (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>; + + def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs), + (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; + def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I), + (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; +} + +def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt), + (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; +def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt), + (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; +def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt), + (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), + (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; + +def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt), + (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; +def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt), + (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; +def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt), + (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; + + +class HvxSel_pat + : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt), + (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>; + +let Predicates = [HasV60T,UseHVX] in { + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; } -let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. - def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)), - (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>; - def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), 0)), - (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>; +// From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw). +def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw), + (C2_or (C2_and I1:$Pu, I1:$Pv), + (C2_andn I1:$Pw, I1:$Pu))>; + + +def IsPosHalf : PatLeaf<(i32 IntRegs:$a), [{ + return isPositiveHalfWord(N); +}]>; + +multiclass SelMinMax16_pats { + def: Pat<(sext_inreg (select (i1 (CmpOp IsPosHalf:$Rs, IsPosHalf:$Rt)), + IsPosHalf:$Rs, IsPosHalf:$Rt), i16), + (InstA IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(sext_inreg (select (i1 (CmpOp IsPosHalf:$Rs, IsPosHalf:$Rt)), + IsPosHalf:$Rt, IsPosHalf:$Rs), i16), + (InstB IntRegs:$Rs, IntRegs:$Rt)>; } -let AddedComplexity = 10 in // Complexity greater than compare reg-reg. -def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)), - (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; +let AddedComplexity = 200 in { + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; +} -def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))), - (i32 8)), - (i32 (zextloadi8 (add I32:$b, 2)))), - (i32 16)), - (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), - (zextloadi8 I32:$b)), - (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; +let AddedComplexity = 200 in { + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; +} -// Patterns for loads of i1: -def: Pat<(i1 (load AddrFI:$fi)), - (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; -def: Pat<(i1 (load (add I32:$Rs, s32_0ImmPred:$Off))), - (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; -def: Pat<(i1 (load I32:$Rs)), - (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; +let AddedComplexity = 100, Predicates = [HasV5T] in { + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; +} + + +// --(7) Insert/extract -------------------------------------------------- +// + +def SDTHexagonINSERT: + SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; +def SDTHexagonINSERTRP: + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i64>]>; + +def HexagonINSERT: SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; +def HexagonINSERTRP: SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; + +def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>; +def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>; +def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), + (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; +def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), + (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; -def I1toI32: OutPatFrag<(ops node:$Rs), - (C2_muxii (i1 $Rs), 1, 0)>; +def SDTHexagonEXTRACTU + : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDTHexagonEXTRACTURP + : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i64>]>; -def I32toI1: OutPatFrag<(ops node:$Rs), - (i1 (C2_tfrrp (i32 $Rs)))>; +def HexagonEXTRACTU: SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; +def HexagonEXTRACTURP: SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; -defm: Storexm_pat; -def: Storexm_simple_pat; +def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5), + (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>; +def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6), + (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>; +def: Pat<(HexagonEXTRACTURP I32:$Rs, I64:$Rt), + (S2_extractu_rp I32:$Rs, I64:$Rt)>; +def: Pat<(HexagonEXTRACTURP I64:$Rs, I64:$Rt), + (S2_extractup_rp I64:$Rs, I64:$Rt)>; -def: Pat<(sra (add (sra I64:$src, u6_0ImmPred:$u6), 1), (i32 1)), - (S2_asr_i_p_rnd DoubleRegs:$src, imm:$u6)>, Requires<[HasV5T]>; -def: Pat<(sra I64:$src, u6_0ImmPred:$u6), - (S2_asr_i_p DoubleRegs:$src, imm:$u6)>; -def: Pat<(srl I64:$src, u6_0ImmPred:$u6), - (S2_lsr_i_p DoubleRegs:$src, imm:$u6)>; -def: Pat<(shl I64:$src, u6_0ImmPred:$u6), - (S2_asl_i_p DoubleRegs:$src, imm:$u6)>; +def SDTHexagonVSPLAT: + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; -let AddedComplexity = 100 in +def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>; + +def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>; +def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>; +def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), + (A2_combineii imm:$s8, imm:$s8)>; +def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>; + + +// --(8) Shift/permute --------------------------------------------------- +// + +def SDTHexagonI64I32I32: SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, + SDTCisSubVecOfVec<1, 0>]>; +def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>; + +def HexagonPACKHL: SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; +def HexagonCOMBINE: SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; +def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; +def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>; +def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>; + +def: OpR_RR_pat, i64, I32>; + +def: Pat<(HexagonCOMBINE I32:$Rs, I32:$Rt), (Combinew $Rs, $Rt)>; + +// The complexity of the combines involving immediates should be greater +// than the complexity of the combine with two registers. +let AddedComplexity = 50 in { + def: Pat<(HexagonCOMBINE I32:$Rs, anyimm:$s8), + (A4_combineri IntRegs:$Rs, imm:$s8)>; + def: Pat<(HexagonCOMBINE anyimm:$s8, I32:$Rs), + (A4_combineir imm:$s8, IntRegs:$Rs)>; +} + +// The complexity of the combine with two immediates should be greater than +// the complexity of a combine involving a register. +let AddedComplexity = 75 in { + def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, anyimm:$u6), + (A4_combineii imm:$s8, imm:$u6)>; + def: Pat<(HexagonCOMBINE anyimm:$s8, s8_0ImmPred:$S8), + (A2_combineii imm:$s8, imm:$S8)>; +} + +let Predicates = [UseHVX] in { + def: OpR_RR_pat, VecPI32, HVI32>; + def: OpR_RR_pat, VecI8, HVI8>; + def: OpR_RR_pat, VecI8, HVI8>; + def: OpR_RR_pat, VecI16, HVI16>; + def: OpR_RR_pat, VecI16, HVI16>; +} + +def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>; +def: Pat<(bswap I64:$Rss), (Combinew (A2_swiz (LoReg $Rss)), + (A2_swiz (HiReg $Rss)))>; + +def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt), (S4_lsli imm:$s6, I32:$Rt)>; +def: Pat<(shl I32:$Rs, (i32 16)), (A2_aslh I32:$Rs)>; +def: Pat<(sra I32:$Rs, (i32 16)), (A2_asrh I32:$Rs)>; + +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; + + +def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), + (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; +def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)), + (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5T]>; + +// Prefer S2_addasl_rrri over S2_asl_i_r_acc. +let AddedComplexity = 120 in def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)), (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>; -def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; -def: Pat<(HexagonBARRIER), (Y2_barrier)>; +let AddedComplexity = 100 in { + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; +} -def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), - (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>; +let AddedComplexity = 100 in { + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; +} + + +class OpshIRI_pat + : Pat<(Op anyimm:$u8, (ShOp RegPred:$Rs, ImmPred:$U5)), + (MI anyimm:$u8, RegPred:$Rs, imm:$U5)>; + +let AddedComplexity = 200 in { + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; +} + +// Prefer this pattern to S2_asl_i_p_or for the special case of joining +// two 32-bit words into a 64-bit word. +let AddedComplexity = 200 in +def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)), + (Combinew I32:$a, I32:$b)>; +def: Pat<(or (or (or (shl (Zext64 (and I32:$b, (i32 65535))), (i32 16)), + (Zext64 (and I32:$a, (i32 65535)))), + (shl (Aext64 (and I32:$c, (i32 65535))), (i32 32))), + (shl (Aext64 I32:$d), (i32 48))), + (Combinew (A2_combine_ll I32:$d, I32:$c), + (A2_combine_ll I32:$b, I32:$a))>; -// Support for generating global address. -// Taken from X86InstrInfo.td. -def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, i32>, - SDTCisPtrTy<0>]>; -def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; -def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; +def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))), + (i32 8)), + (i32 (zextloadi8 (add I32:$b, 2)))), + (i32 16)), + (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), + (zextloadi8 I32:$b)), + (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; -// Map TLS addressses to A2_tfrsi. -def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s32_0Imm:$addr)>; -def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s32_0Imm:$label)>; -def: Pat<(i64 imm:$v), (CONST64 imm:$v)>; -def: Pat<(i1 0), (PS_false)>; -def: Pat<(i1 1), (PS_true)>; +def SDTHexagonVShift + : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>; -// Pseudo instructions. -def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; -def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; +def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>; +def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>; +def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>; -def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def: OpR_RI_pat, v2i32, V2I32, u5_0ImmPred>; +def: OpR_RI_pat, v4i16, V4I16, u4_0ImmPred>; +def: OpR_RI_pat, v2i32, V2I32, u5_0ImmPred>; +def: OpR_RI_pat, v4i16, V4I16, u4_0ImmPred>; +def: OpR_RI_pat, v2i32, V2I32, u5_0ImmPred>; +def: OpR_RI_pat, v4i16, V4I16, u4_0ImmPred>; + +def: OpR_RR_pat, v2i32, V2I32, I32>; +def: OpR_RR_pat, v4i16, V4I16, I32>; +def: OpR_RR_pat, v2i32, V2I32, I32>; +def: OpR_RR_pat, v4i16, V4I16, I32>; +def: OpR_RR_pat, v2i32, V2I32, I32>; +def: OpR_RR_pat, v4i16, V4I16, I32>; + +def: Pat<(sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))), + (S2_asr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))), + (S2_lsr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))), + (S2_asl_i_vw V2I32:$b, imm:$c)>; +def: Pat<(sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), + (S2_asr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), + (S2_lsr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), + (S2_asl_i_vh V4I16:$b, imm:$c)>; -def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; -// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, -// Optional Flag and Variable Arguments. -// Its 1 Operand has pointer type. -def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// --(9) Arithmetic/bitwise ---------------------------------------------- +// +def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; +def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; +def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; -def: Pat<(callseq_start timm:$amt, timm:$amt2), - (ADJCALLSTACKDOWN imm:$amt, imm:$amt2)>; -def: Pat<(callseq_end timm:$amt1, timm:$amt2), - (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>; +let Predicates = [HasV5T] in { + def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>; + def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>; -//Tail calls. -def: Pat<(HexagonTCRet tglobaladdr:$dst), - (PS_tailcall_i tglobaladdr:$dst)>; -def: Pat<(HexagonTCRet texternalsym:$dst), - (PS_tailcall_i texternalsym:$dst)>; -def: Pat<(HexagonTCRet I32:$dst), - (PS_tailcall_r I32:$dst)>; - -// Map from r0 = and(r1, 65535) to r0 = zxth(r1) -def: Pat<(and I32:$src1, 65535), - (A2_zxth IntRegs:$src1)>; - -// Map from r0 = and(r1, 255) to r0 = zxtb(r1). -def: Pat<(and I32:$src1, 255), - (A2_zxtb IntRegs:$src1)>; - -// Map Add(p1, true) to p1 = not(p1). -// Add(p1, false) should never be produced, -// if it does, it got to be mapped to NOOP. -def: Pat<(add I1:$src1, -1), - (C2_not PredRegs:$src1)>; - -// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def: Pat<(select (not I1:$src1), s8_0ImmPred:$src2, s32_0ImmPred:$src3), - (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>; - -// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = C2_muxir(p0, r1, #i) -def: Pat<(select (not I1:$src1), s32_0ImmPred:$src2, - I32:$src3), - (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>; - -// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = C2_muxri (p0, #i, r1) -def: Pat<(select (not I1:$src1), IntRegs:$src2, s32_0ImmPred:$src3), - (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>; - -// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. -def: Pat<(brcond (not I1:$src1), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo). -def: Pat<(i64 (sext_inreg I64:$src1, i32)), - (A2_sxtw (LoReg DoubleRegs:$src1))>; - -// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)). -def: Pat<(i64 (sext_inreg I64:$src1, i16)), - (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>; - -// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)). -def: Pat<(i64 (sext_inreg I64:$src1, i8)), - (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; - -def: Pat<(brcond (i1 (setne I32:$Rs, I32:$Rt)), bb:$offset), - (J2_jumpf (C2_cmpeq I32:$Rs, I32:$Rt), bb:$offset)>; -def: Pat<(brcond (i1 (setne I32:$Rs, s10_0ImmPred:$s10)), bb:$offset), - (J2_jumpf (C2_cmpeqi I32:$Rs, imm:$s10), bb:$offset)>; -def: Pat<(brcond (i1 (setne I1:$Pu, (i1 -1))), bb:$offset), - (J2_jumpf PredRegs:$Pu, bb:$offset)>; -def: Pat<(brcond (i1 (setne I1:$Pu, (i1 0))), bb:$offset), - (J2_jumpt PredRegs:$Pu, bb:$offset)>; - -// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def: Pat<(brcond (i1 (setlt I32:$Rs, s8_0ImmPred:$s8)), bb:$offset), - (J2_jumpf (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s8)), bb:$offset)>; + def: Pat<(fabs F64:$Rs), + (Combinew (S2_clrbit_i (HiReg $Rs), 31), + (i32 (LoReg $Rs)))>; + def: Pat<(fneg F64:$Rs), + (Combinew (S2_togglebit_i (HiReg $Rs), 31), + (i32 (LoReg $Rs)))>; +} +let AddedComplexity = 50 in +def: Pat<(xor (add (sra I32:$Rs, (i32 31)), + I32:$Rs), + (sra I32:$Rs, (i32 31))), + (A2_abs I32:$Rs)>; + + +def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; +def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; +def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; +def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i64, I64>; +def: OpR_RR_pat, i64, I64>; + +def: OpR_RR_pat; +def: OpR_RR_pat; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; + +def: OpR_RR_pat; +def: OpR_RR_pat, i32, I32>; +def: OpR_RR_pat, i32, I32>; +def: OpR_RI_pat; +def: OpR_RI_pat; + +// Arithmetic on predicates. +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; -// Map from a 64-bit select to an emulated 64-bit mux. -// Hexagon does not support 64-bit MUXes; so emulate with combines. -def: Pat<(select I1:$src1, I64:$src2, - I64:$src3), - (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2), - (HiReg DoubleRegs:$src3)), - (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2), - (LoReg DoubleRegs:$src3)))>; - -// Map from a 1-bit select to logical ops. -// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). -def: Pat<(select I1:$src1, I1:$src2, I1:$src3), - (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), - (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; - -// Map for truncating from 64 immediates to 32 bit immediates. -def: Pat<(i32 (trunc I64:$src)), - (LoReg DoubleRegs:$src)>; - -// Map for truncating from i64 immediates to i1 bit immediates. -def: Pat<(i1 (trunc I64:$src)), - (C2_tfrrp (LoReg DoubleRegs:$src))>; - -// rs <= rt -> !(rs > rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setle I32:$src1, s32_0ImmPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>; - -// rs <= rt -> !(rs > rt). -def : Pat<(i1 (setle I32:$src1, I32:$src2)), - (i1 (C2_not (C2_cmpgt I32:$src1, I32:$src2)))>; - -// Rss <= Rtt -> !(Rss > Rtt). -def: Pat<(i1 (setle I64:$src1, I64:$src2)), - (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Map cmpne -> cmpeq. -// Hexagon_TODO: We should improve on this. -// rs != rt -> !(rs == rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)), - (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>; - -// Convert setne back to xor for hexagon since we compute w/ pred registers. -def: Pat<(i1 (setne I1:$src1, I1:$src2)), - (C2_xor PredRegs:$src1, PredRegs:$src2)>; - -// Map cmpne(Rss) -> !cmpew(Rss). -// rs != rt -> !(rs == rt). -def: Pat<(i1 (setne I64:$src1, I64:$src2)), - (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; - -// rs >= rt -> rt <= rs -def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), - (C4_cmplte I32:$Rt, I32:$Rs)>; +let Predicates = [HasV5T] in { + def: OpR_RR_pat, f32, F32>; + def: OpR_RR_pat, f32, F32>; + def: OpR_RR_pat, f32, F32>; + def: OpR_RR_pat, f32, F32>; + def: OpR_RR_pat, f32, F32>; +} -let AddedComplexity = 30 in -def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)), - (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s10))>; - -// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). -// rss >= rtt -> !(rtt > rss). -def: Pat<(i1 (setge I64:$src1, I64:$src2)), - (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>; - -// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). -// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). -// rs < rt -> !(rs >= rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2)))>; - -// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) -def: Pat<(i1 (setuge I32:$src1, 0)), - (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; - -// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) -def: Pat<(i1 (setuge I32:$src1, u32_0ImmPred:$src2)), - (C2_cmpgtui IntRegs:$src1, (UDEC1 u32_0ImmPred:$src2))>; - -// Generate cmpgtu(Rs, #u9) -def: Pat<(i1 (setugt I32:$src1, u32_0ImmPred:$src2)), - (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>; - -// Map from Rs >= Rt -> !(Rt > Rs). -// rs >= rt -> !(rt > rs). -def: Pat<(i1 (setuge I64:$src1, I64:$src2)), - (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>; - -// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). -// Map from (Rs <= Rt) -> !(Rs > Rt). -def: Pat<(i1 (setule I64:$src1, I64:$src2)), - (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Sign extends. -// sext i1->i32 -def: Pat<(i32 (sext I1:$Pu)), - (C2_muxii I1:$Pu, -1, 0)>; - -// sext i1->i64 -def: Pat<(i64 (sext I1:$Pu)), - (A2_combinew (C2_muxii PredRegs:$Pu, -1, 0), - (C2_muxii PredRegs:$Pu, -1, 0))>; +// In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add, +// over add-add with individual multiplies as inputs. +let AddedComplexity = 10 in { + def: AccRRI_pat, I32, u32_0ImmPred>; + def: AccRRI_pat, I32, u32_0ImmPred>; + def: AccRRR_pat, I32, I32>; +} -// Zero extends. -// zext i1->i32 -def: Pat<(i32 (zext I1:$Pu)), - (C2_muxii PredRegs:$Pu, 1, 0)>; +def: AccRRI_pat, I32, s32_0ImmPred>; +def: AccRRI_pat, I32, s32_0ImmPred>; +def: AccRRR_pat, I32, I32>; -// zext i1->i64 -def: Pat<(i64 (zext I1:$Pu)), - (ToZext64 (C2_muxii PredRegs:$Pu, 1, 0))>; -// zext i32->i64 -def: Pat<(Zext64 I32:$Rs), - (ToZext64 IntRegs:$Rs)>; +def: Pat<(ineg (mul I32:$Rs, u8_0ImmPred:$u8)), + (M2_mpysin IntRegs:$Rs, imm:$u8)>; -// Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def: Pat<(i32 (anyext I1:$Pu)), - (C2_muxii PredRegs:$Pu, 1, 0)>; +def n8_0ImmPred: PatLeaf<(i32 imm), [{ + int64_t V = N->getSExtValue(); + return -255 <= V && V <= 0; +}]>; -// Map from Rss = Pd to Rdd = combine(#0, (mux(Pd, #1, #0))) -def: Pat<(i64 (anyext I1:$Pu)), - (ToZext64 (C2_muxii PredRegs:$Pu, 1, 0))>; +// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) +def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8), + (M2_mpysin I32:$Rs, (NegImm8 imm:$n8))>; -// Clear the sign bit in a 64-bit register. -def ClearSign : OutPatFrag<(ops node:$Rss), - (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>; +def: Pat<(add Sext64:$Rs, I64:$Rt), + (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; + +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I64, I64>; + +def: AccRRR_pat>, I32, I32>; +def: AccRRR_pat>, I32, I32>; +def: AccRRR_pat>, I32, I32>; + +// S4_addaddi and S4_subaddi don't have tied operands, so give them +// a bit of preference. +let AddedComplexity = 30 in { + def: Pat<(add I32:$Rs, (Su I32:$Ru, anyimm:$s6)), + (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>; + def: Pat<(add anyimm:$s6, (Su I32:$Rs, I32:$Ru)), + (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>; + def: Pat<(add I32:$Rs, (Su anyimm:$s6, I32:$Ru)), + (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>; + def: Pat<(sub (Su I32:$Rs, anyimm:$s6), I32:$Ru), + (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>; + def: Pat<(add (Su I32:$Rs, I32:$Ru), anyimm:$s6), + (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>; +} + +def: Pat<(or I32:$Ru, (Su I32:$Rx, anyimm:$s10)), + (S4_or_andix IntRegs:$Ru, IntRegs:$Rx, imm:$s10)>; +def: Pat<(or I32:$Rx, (Su I32:$Rs, anyimm:$s10)), + (S4_or_andi IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>; +def: Pat<(or I32:$Rx, (Su I32:$Rs, anyimm:$s10)), + (S4_or_ori IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>; + + +def: Pat<(i32 (trunc (sra (Su Sext64:$Rs, Sext64:$Rt), (i32 32)))), + (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +def: Pat<(i32 (trunc (srl (Su Sext64:$Rs, Sext64:$Rt), (i32 32)))), + (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; + +def: Pat<(mul (Zext64 I32:$Rs), (Zext64 I32:$Rt)), + (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; +def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)), + (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; +def: Pat<(mul Sext64:$Rs, Sext64:$Rt), + (M2_dpmpyss_s0 (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; + +def: Pat<(add I64:$Rx, (Su Sext64:$Rs, Sext64:$Rt)), + (M2_dpmpyss_acc_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +def: Pat<(sub I64:$Rx, (Su Sext64:$Rs, Sext64:$Rt)), + (M2_dpmpyss_nac_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +def: Pat<(add I64:$Rx, (Su (Aext64 I32:$Rs), (Aext64 I32:$Rt))), + (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; +def: Pat<(add I64:$Rx, (Su (Zext64 I32:$Rs), (Zext64 I32:$Rt))), + (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; +def: Pat<(sub I64:$Rx, (Su (Aext64 I32:$Rs), (Aext64 I32:$Rt))), + (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; +def: Pat<(sub I64:$Rx, (Su (Zext64 I32:$Rs), (Zext64 I32:$Rt))), + (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; + +// Add halfword. +def: Pat<(sext_inreg (add I32:$Rt, I32:$Rs), i16), + (A2_addh_l16_ll I32:$Rt, I32:$Rs)>; +def: Pat<(sra (add (shl I32:$Rt, (i32 16)), I32:$Rs), (i32 16)), + (A2_addh_l16_hl I32:$Rt, I32:$Rs)>; +def: Pat<(shl (add I32:$Rt, I32:$Rs), (i32 16)), + (A2_addh_h16_ll I32:$Rt, I32:$Rs)>; + +// Subtract halfword. +def: Pat<(sext_inreg (sub I32:$Rt, I32:$Rs), i16), + (A2_subh_l16_ll I32:$Rt, I32:$Rs)>; +def: Pat<(sra (add (shl I32:$Rt, (i32 16)), I32:$Rs), (i32 16)), + (A2_addh_l16_hl I32:$Rt, I32:$Rs)>; +def: Pat<(shl (sub I32:$Rt, I32:$Rs), (i32 16)), + (A2_subh_h16_ll I32:$Rt, I32:$Rs)>; + +def: Pat<(mul I64:$Rss, I64:$Rtt), + (Combinew + (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))), + (LoReg $Rss), + (HiReg $Rtt)), + (LoReg $Rtt), + (HiReg $Rss)), + (i32 (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)))))>; def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt), (A2_addp @@ -954,8 +1387,7 @@ def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt), (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32), (HiReg $Rss), (LoReg $Rtt)), - (A2_combinew (A2_tfrsi 0), - (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))), + (A4_combineir 0, (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))), 32), (HiReg $Rss), (HiReg $Rtt)), @@ -975,6 +1407,10 @@ def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>; // = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A'] // = (unsigned product AB) - 2^64 [s(A)B'+s(B)A'] +// Clear the sign bit in a 64-bit register. +def ClearSign : OutPatFrag<(ops node:$Rss), + (Combinew (S2_clrbit_i (HiReg $Rss), 31), (i32 (LoReg $Rss)))>; + def : Pat <(mulhs I64:$Rss, I64:$Rtt), (A2_subp (MulHU $Rss, $Rtt), @@ -982,466 +1418,660 @@ def : Pat <(mulhs I64:$Rss, I64:$Rtt), (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)), (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>; -// Hexagon specific ISD nodes. -def SDTHexagonALLOCA : SDTypeProfile<1, 2, - [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, - [SDNPHasChain]>; - +def: Pat<(add (Su I32:$Rs, u6_0ImmPred:$U6), anyimm:$u6), + (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; +def: Pat<(add (Su I32:$Rs, I32:$Rt), anyimm:$u6), + (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(add I32:$Ru, (Su I32:$Rs, u6_2ImmPred:$u6_2)), + (M4_mpyri_addr_u2 IntRegs:$Ru, imm:$u6_2, IntRegs:$Rs)>; +def: Pat<(add I32:$Ru, (Su I32:$Rs, anyimm:$u6)), + (M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>; +def: Pat<(add I32:$Ru, (Su I32:$Ry, I32:$Rs)), + (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>; -def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)), - (PS_alloca IntRegs:$Rs, imm:$A)>; -def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; -def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; +let Predicates = [HasV5T] in { + def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx), + (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; + def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), + (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; + def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx), + (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; +} -def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi imm:$dst)>; -def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi imm:$dst)>; -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(sub I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(and I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(or I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +def: Pat<(mul V2I32:$Rs, V2I32:$Rt), + (PS_vmulw V2I32:$Rs, V2I32:$Rt)>; +def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)), + (PS_vmulw_acc V2I32:$Rx, V2I32:$Rs, V2I32:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(sub I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(and I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(or I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +// Add/subtract two v4i8: Hexagon does not have an insn for this one, so +// we use the double add v8i8, and use only the low part of the result. +def: Pat<(add V4I8:$Rs, V4I8:$Rt), + (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>; +def: Pat<(sub V4I8:$Rs, V4I8:$Rt), + (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>; -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(sub I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(and I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(or I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -let AddedComplexity = 100 in -def: Pat<(xor I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +// Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two +// half-words, and saturates the result to a 32-bit value, except the +// saturation never happens (it can only occur with scaling). +def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), + (LoReg (S2_vtrunewh (A2_combineii 0, 0), + (M2_vmpy2s_s0 V2I16:$Rs, V2I16:$Rt)))>; +def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), + (S2_vtrunewh (M2_vmpy2s_s0 (HiReg $Rs), (HiReg $Rt)), + (M2_vmpy2s_s0 (LoReg $Rs), (LoReg $Rt)))>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(sub I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(and I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(or I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -let AddedComplexity = 100 in -def: Pat<(xor I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +// Multiplies two v4i8 vectors. +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>, + Requires<[HasV5T]>; -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(sub I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(and I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(or I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -let AddedComplexity = 100 in -def: Pat<(xor I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +// Multiplies two v8i8 vectors. +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>, + Requires<[HasV5T]>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(sub I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(and I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(or I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -let AddedComplexity = 100 in -def: Pat<(xor I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +// --(10) Bit ------------------------------------------------------------ +// -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(xor I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +// Count leading zeros. +def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(xor I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +// Count trailing zeros. +def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>; +def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; - -def: Pat<(sra I64:$src1, I32:$src2), (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>; -def: Pat<(srl I64:$src1, I32:$src2), (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>; -def: Pat<(shl I64:$src1, I32:$src2), (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>; -def: Pat<(shl I64:$src1, I32:$src2), (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>; - -def: Pat<(sra I32:$src1, I32:$src2), (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>; -def: Pat<(srl I32:$src1, I32:$src2), (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>; -def: Pat<(shl I32:$src1, I32:$src2), (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>; -def: Pat<(shl I32:$src1, I32:$src2), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>; +// Count leading ones. +def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; -def SDTHexagonINSERT: - SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; -def SDTHexagonINSERTRP: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i64>]>; +// Count trailing ones. +def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>; +def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; -def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; -def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; +// Define leading/trailing patterns that require zero-extensions to 64 bits. +def: Pat<(i64 (ctlz I64:$Rss)), (ToZext64 (S2_cl0p I64:$Rss))>; +def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>; +def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>; +def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>; -def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), - (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>; -def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), - (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>; -def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), - (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; -def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), - (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; +def: Pat<(i64 (ctpop I64:$Rss)), (ToZext64 (S5_popcountp I64:$Rss))>; +def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>; -let AddedComplexity = 100 in -def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), - (i32 (extloadi8 (add I32:$b, 3))), - 24, 8), - (i32 16)), - (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), - (zextloadi8 I32:$b)), - (A2_swiz (L2_loadri_io I32:$b, 0))>; +def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>; +def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>; -def SDTHexagonEXTRACTU: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; -def SDTHexagonEXTRACTURP: - SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i64>]>; - -def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; -def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; - -def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), - (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>; -def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), - (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>; -def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2), - (S2_extractu_rp I32:$src1, I64:$src2)>; -def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2), - (S2_extractup_rp I64:$src1, I64:$src2)>; -def n8_0ImmPred: PatLeaf<(i32 imm), [{ - int64_t V = N->getSExtValue(); - return -255 <= V && V <= 0; +let AddedComplexity = 20 in { // Complexity greater than and/or/xor + def: Pat<(and I32:$Rs, IsNPow2_32:$V), + (S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>; + def: Pat<(or I32:$Rs, IsPow2_32:$V), + (S2_setbit_i IntRegs:$Rs, (Log2_32 $V))>; + def: Pat<(xor I32:$Rs, IsPow2_32:$V), + (S2_togglebit_i IntRegs:$Rs, (Log2_32 $V))>; + + def: Pat<(and I32:$Rs, (not (shl 1, I32:$Rt))), + (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(or I32:$Rs, (shl 1, I32:$Rt)), + (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(xor I32:$Rs, (shl 1, I32:$Rt)), + (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>; +} + +// Clr/set/toggle bit for 64-bit values with immediate bit index. +let AddedComplexity = 20 in { // Complexity greater than and/or/xor + def: Pat<(and I64:$Rss, IsNPow2_64L:$V), + (Combinew (i32 (HiReg $Rss)), + (S2_clrbit_i (LoReg $Rss), (LogN2_64 $V)))>; + def: Pat<(and I64:$Rss, IsNPow2_64H:$V), + (Combinew (S2_clrbit_i (HiReg $Rss), (UDEC32 (i32 (LogN2_64 $V)))), + (i32 (LoReg $Rss)))>; + + def: Pat<(or I64:$Rss, IsPow2_64L:$V), + (Combinew (i32 (HiReg $Rss)), + (S2_setbit_i (LoReg $Rss), (Log2_64 $V)))>; + def: Pat<(or I64:$Rss, IsPow2_64H:$V), + (Combinew (S2_setbit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), + (i32 (LoReg $Rss)))>; + + def: Pat<(xor I64:$Rss, IsPow2_64L:$V), + (Combinew (i32 (HiReg $Rss)), + (S2_togglebit_i (LoReg $Rss), (Log2_64 $V)))>; + def: Pat<(xor I64:$Rss, IsPow2_64H:$V), + (Combinew (S2_togglebit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), + (i32 (LoReg $Rss)))>; +} + +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), + (S2_tstbit_i IntRegs:$Rs, imm:$u5)>; + def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)), + (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (trunc I32:$Rs)), + (S2_tstbit_i IntRegs:$Rs, 0)>; + def: Pat<(i1 (trunc I64:$Rs)), + (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; +} + +let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. + def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)), + (C2_bitsclri IntRegs:$Rs, imm:$u6)>; + def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), 0)), + (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>; +} + +let AddedComplexity = 10 in // Complexity greater than compare reg-reg. +def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)), + (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; + +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), + (S4_ntstbit_i I32:$Rs, imm:$u5)>; + def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)), + (S4_ntstbit_r I32:$Rs, I32:$Rt)>; +} + +// Add extra complexity to prefer these instructions over bitsset/bitsclr. +// The reason is that tstbit/ntstbit can be folded into a compound instruction: +// if ([!]tstbit(...)) jump ... +let AddedComplexity = 100 in +def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), + (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; + +let AddedComplexity = 100 in +def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), + (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; + +// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be +// represented as a compare against "value & 0xFF", which is an exact match +// for cmpb (same for cmph). The patterns below do not contain any additional +// complexity that would make them preferable, and if they were actually used +// instead of cmpb/cmph, they would result in a compare against register that +// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). +def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)), + (C4_nbitsclri I32:$Rs, imm:$u6)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), + (C4_nbitsclr I32:$Rs, I32:$Rt)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), + (C4_nbitsset I32:$Rs, I32:$Rt)>; + + +// --(11) Load ----------------------------------------------------------- +// + +def extloadv2i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2i8; +}]>; +def extloadv4i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4i8; }]>; -// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) -def: Pat<(mul I32:$src1, (ineg n8_0ImmPred:$src2)), - (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>; +def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2i8; +}]>; +def zextloadv4i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4i8; +}]>; -multiclass MinMax_pats_p { - defm: T_MinMax_pats; +def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2i8; +}]>; +def sextloadv4i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4i8; +}]>; + +// Patterns to select load-indexed: Rs + Off. +// - frameindex [+ imm], +multiclass Loadxfi_pat { + def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; } -def: Pat<(add Sext64:$Rs, I64:$Rt), - (A2_addsp (LoReg Sext64:$Rs), DoubleRegs:$Rt)>; +// Patterns to select load-indexed: Rs + Off. +// - base reg [+ imm] +multiclass Loadxgi_pat { + def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))), + (VT (MI IntRegs:$Rs, imm:$Off))>; + def: Pat<(VT (Load (IsOrAdd I32:$Rs, ImmPred:$Off))), + (VT (MI IntRegs:$Rs, imm:$Off))>; + def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>; +} -let AddedComplexity = 200 in { - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; +// Patterns to select load-indexed: Rs + Off. Combines Loadxfi + Loadxgi. +multiclass Loadxi_pat { + defm: Loadxfi_pat; + defm: Loadxgi_pat; } -def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Patterns to select load reg indexed: Rs + Off with a value modifier. +// - frameindex [+ imm] +multiclass Loadxfim_pat { + def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; + def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; + def: Pat<(VT (Load AddrFI:$fi)), (VT (ValueMod (MI AddrFI:$fi, 0)))>; +} -def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Patterns to select load reg indexed: Rs + Off with a value modifier. +// - base reg [+ imm] +multiclass Loadxgim_pat { + def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))), + (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; + def: Pat<(VT (Load (IsOrAdd I32:$Rs, ImmPred:$Off))), + (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; + def: Pat<(VT (Load I32:$Rs)), (VT (ValueMod (MI IntRegs:$Rs, 0)))>; +} +// Patterns to select load reg indexed: Rs + Off with a value modifier. +// Combines Loadxfim + Loadxgim. +multiclass Loadxim_pat { + defm: Loadxfim_pat; + defm: Loadxgim_pat; +} -// Map call instruction -def : Pat<(callv3 I32:$dst), - (J2_callr I32:$dst)>; -def : Pat<(callv3 tglobaladdr:$dst), - (J2_call tglobaladdr:$dst)>; -def : Pat<(callv3 texternalsym:$dst), - (J2_call texternalsym:$dst)>; -def : Pat<(callv3 tglobaltlsaddr:$dst), - (J2_call tglobaltlsaddr:$dst)>; +// Patterns to select load reg reg-indexed: Rs + Rt< { + let AddedComplexity = 40 in + def: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; -def : Pat<(callv3nr I32:$dst), - (PS_callr_nr I32:$dst)>; -def : Pat<(callv3nr tglobaladdr:$dst), - (PS_call_nr tglobaladdr:$dst)>; -def : Pat<(callv3nr texternalsym:$dst), - (PS_call_nr texternalsym:$dst)>; + let AddedComplexity = 20 in + def: Pat<(VT (Load (add I32:$Rs, I32:$Rt))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; +} +// Patterns to select load reg reg-indexed: Rs + Rt< { + let AddedComplexity = 40 in + def: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), + (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>; -def addrga: PatLeaf<(i32 AddrGA:$Addr)>; -def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; + let AddedComplexity = 20 in + def: Pat<(VT (Load (add I32:$Rs, I32:$Rt))), + (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>; +} +// Pattern to select load long-offset reg-indexed: Addr + Rt< + : Pat<(VT (Load (add (shl IntRegs:$Rt, u2_0ImmPred:$u2), ImmPred:$Addr))), + (VT (MI IntRegs:$Rt, imm:$u2, ImmPred:$Addr))>; -// Pats for instruction selection. +class Loadxum_pat + : Pat<(VT (Load (add (shl IntRegs:$Rt, u2_0ImmPred:$u2), ImmPred:$Addr))), + (VT (ValueMod (MI IntRegs:$Rt, imm:$u2, ImmPred:$Addr)))>; -// A class to embed the usual comparison patfrags within a zext to i32. -// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same -// names, or else the frag's "body" won't match the operands. -class CmpInReg - : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>; +// Pattern to select load absolute. +class Loada_pat + : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; -def: T_cmp32_rr_pat, i32>; -def: T_cmp32_rr_pat, i32>; +// Pattern to select load absolute with value modifier. +class Loadam_pat + : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat, i1>; -def: T_cmp32_rr_pat, i1>; +let AddedComplexity = 20 in { + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + // No sextloadi1. -let AddedComplexity = 100 in { - def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), - 255), 0)), - (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), - 255), 0)), - (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; - def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), - 65535), 0)), - (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), - 65535), 0)), - (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; } -def: Pat<(i32 (zext (i1 (seteq I32:$Rs, s32_0ImmPred:$s8)))), - (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>; -def: Pat<(i32 (zext (i1 (setne I32:$Rs, s32_0ImmPred:$s8)))), - (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; -// Preserve the S2_tstbit_r generation -def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, I32:$src2)), - I32:$src1)), 0)))), - (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>; +let AddedComplexity = 60 in { + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; +} + +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; + +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; + +// Absolute address -// The complexity of the combines involving immediates should be greater -// than the complexity of the combine with two registers. -let AddedComplexity = 50 in { -def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i), - (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>; +let AddedComplexity = 60 in { + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; +} -def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r), - (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>; +let AddedComplexity = 30 in { + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; +} + +// GP-relative address + +let AddedComplexity = 100 in { + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; +} + +let AddedComplexity = 70 in { + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; } -// The complexity of the combine with two immediates should be greater than -// the complexity of a combine involving a register. -let AddedComplexity = 75 in { -def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6), - (A4_combineii imm:$s8, imm:$u6)>; -def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8), - (A2_combineii imm:$s8, imm:$S8)>; + +// Sign-extending loads of i1 need to replicate the lowest bit throughout +// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should +// do the trick. +let AddedComplexity = 20 in +def: Pat<(i32 (sextloadi1 I32:$Rs)), + (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; + +// Patterns for loads of i1: +def: Pat<(i1 (load AddrFI:$fi)), + (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; +def: Pat<(i1 (load (add I32:$Rs, anyimm0:$Off))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; +def: Pat<(i1 (load I32:$Rs)), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; + +// HVX loads + +multiclass HvxLd_pat { + def: Pat<(VT (Load I32:$Rt)), (MI I32:$Rt, 0)>; + def: Pat<(VT (Load (add I32:$Rt, ImmPred:$s))), (MI I32:$Rt, imm:$s)>; +} + + +let Predicates = [UseHVX] in { + multiclass HvxLdVs_pat { + defm: HvxLd_pat; + defm: HvxLd_pat; + defm: HvxLd_pat; + defm: HvxLd_pat; + } + defm: HvxLdVs_pat; + defm: HvxLdVs_pat; + defm: HvxLdVs_pat; + + multiclass HvxLdWs_pat { + defm: HvxLd_pat; + defm: HvxLd_pat; + defm: HvxLd_pat; + defm: HvxLd_pat; + } + defm: HvxLdWs_pat; + defm: HvxLdWs_pat; + defm: HvxLdWs_pat; } -// Patterns to generate indexed loads with different forms of the address: +// --(12) Store ---------------------------------------------------------- +// + + +class Storepi_pat + : Pat<(Store Value:$Rt, I32:$Rx, Offset:$s4), + (MI I32:$Rx, imm:$s4, Value:$Rt)>; + +def: Storepi_pat; +def: Storepi_pat; +def: Storepi_pat; +def: Storepi_pat; + +// Patterns for generating stores, where the address takes different forms: // - frameindex, +// - frameindex + offset, // - base + offset, -// - base (without offset). -multiclass Loadxm_pat { - def: Pat<(VT (Load AddrFI:$fi)), - (VT (ValueMod (MI AddrFI:$fi, 0)))>; - def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))), - (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; - def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), - (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; - def: Pat<(VT (Load I32:$Rs)), - (VT (ValueMod (MI IntRegs:$Rs, 0)))>; -} - -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; - -// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). -def: Pat<(Aext64 I32:$src1), (ToZext64 IntRegs:$src1)>; - -multiclass T_LoadAbsReg_Pat { - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3)))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tglobaladdr:$src2)))), - (MI IntRegs:$src1, 0, tglobaladdr:$src2)>; - - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tconstpool:$src3)))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tconstpool:$src2)))), - (MI IntRegs:$src1, 0, tconstpool:$src2)>; - - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tjumptable:$src3)))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tjumptable:$src2)))), - (MI IntRegs:$src1, 0, tjumptable:$src2)>; +// - simple (base address without offset). +// These would usually be used together (via Storexi_pat defined below), but +// in some cases one may want to apply different properties (such as +// AddedComplexity) to the individual patterns. +class Storexi_fi_pat + : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; + +multiclass Storexi_fi_add_pat { + def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; + def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; } -let AddedComplexity = 60 in { -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; +multiclass Storexi_add_pat { + def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; + def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; +} + +class Storexi_base_pat + : Pat<(Store Value:$Rt, I32:$Rs), + (MI IntRegs:$Rs, 0, Value:$Rt)>; -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; +// Patterns for generating stores, where the address takes different forms, +// and where the value being stored is transformed through the value modifier +// ValueMod. The address forms are same as above. +class Storexim_fi_pat + : Pat<(Store Value:$Rs, AddrFI:$fi), + (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; +multiclass Storexim_fi_add_pat { + def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; + def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; } -// 'def pats' for load instructions with base + register offset and non-zero -// immediate value. Immediate value is used to left-shift the second -// register operand. -class Loadxs_pat - : Pat<(VT (Load (add I32:$Rs, - (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; +multiclass Storexim_add_pat { + def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; + def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; +} -let AddedComplexity = 40 in { - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; -} - -// 'def pats' for load instruction base + register offset and -// zero immediate value. -class Loadxs_simple_pat - : Pat<(VT (Load (add I32:$Rs, I32:$Rt))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; +class Storexim_base_pat + : Pat<(Store Value:$Rt, I32:$Rs), + (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; -let AddedComplexity = 20 in { - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; -} - -let AddedComplexity = 40 in -multiclass T_StoreAbsReg_Pats { - def : Pat<(stOp (VT RC:$src4), - (add (shl I32:$src1, u2_0ImmPred:$src2), - u32_0ImmPred:$src3)), - (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>; - - def : Pat<(stOp (VT RC:$src4), - (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; - - def : Pat<(stOp (VT RC:$src4), - (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))), - (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; -} - -defm : T_StoreAbsReg_Pats ; -defm : T_StoreAbsReg_Pats ; -defm : T_StoreAbsReg_Pats ; -defm : T_StoreAbsReg_Pats ; - -class Storexs_pat - : Pat<(Store Value:$Ru, (add I32:$Rs, - (i32 (shl I32:$Rt, u2_0ImmPred:$u2)))), - (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; +multiclass Storexi_pat { + defm: Storexi_fi_add_pat ; + def: Storexi_fi_pat ; + defm: Storexi_add_pat ; +} -let AddedComplexity = 40 in { - def: Storexs_pat; - def: Storexs_pat; - def: Storexs_pat; - def: Storexs_pat; +multiclass Storexim_pat { + defm: Storexim_fi_add_pat ; + def: Storexim_fi_pat ; + defm: Storexim_add_pat ; } -def s30_2ProperPred : PatLeaf<(i32 imm), [{ - int64_t v = (int64_t)N->getSExtValue(); - return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v); -}]>; -def RoundTo8 : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32); -}]>; +// Reg< + : Pat<(Store Value:$Rt, (add (shl I32:$Ru, u2_0ImmPred:$u2), ImmPred:$A)), + (MI IntRegs:$Ru, imm:$u2, ImmPred:$A, Value:$Rt)>; -let AddedComplexity = 40 in -def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)), - (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>; +// Reg< + : Pat<(Store Value:$Ru, (add I32:$Rs, (shl I32:$Rt, u2_0ImmPred:$u2))), + (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; -class Store_rr_pat +// Reg + Reg +class Storexr_add_pat : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)), (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>; -let AddedComplexity = 20 in { - def: Store_rr_pat; - def: Store_rr_pat; - def: Store_rr_pat; - def: Store_rr_pat; -} +class Storea_pat + : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; + +class Stoream_pat + : Pat<(Store Value:$val, Addr:$addr), + (MI Addr:$addr, (ValueMod Value:$val))>; +// Regular stores in the DAG have two operands: value and address. +// Atomic stores also have two, but they are reversed: address, value. +// To use atomic stores with the patterns, they need to have their operands +// swapped. This relies on the knowledge that the F.Fragment uses names +// "ptr" and "val". +class SwapSt + : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, + F.OperandTransform>; def IMM_BYTE : SDNodeXFormgetSExtValue(); return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); }]>; def IMM_HALF : SDNodeXFormgetSExtValue(); return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); }]>; def IMM_WORD : SDNodeXForm; def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; -// Emit store-immediate, but only when the stored value will not be constant- -// extended. The reason for that is that there is no pass that can optimize -// constant extenders in store-immediate instructions. In some cases we can -// end up will a number of such stores, all of which store the same extended -// value (e.g. after unrolling a loop that initializes floating point array). - -// Predicates to determine if the 16-bit immediate is expressible as a sign- -// extended 8-bit immediate. Store-immediate-halfword will ignore any bits -// beyond 0..15, so we don't care what is in there. - -def i16in8ImmPred: PatLeaf<(i32 imm), [{ - int64_t v = (int16_t)N->getSExtValue(); - return v == (int64_t)(int8_t)v; -}]>; - -// Predicates to determine if the 32-bit immediate is expressible as a sign- -// extended 8-bit immediate. -def i32in8ImmPred: PatLeaf<(i32 imm), [{ - int64_t v = (int32_t)N->getSExtValue(); - return v == (int64_t)(int8_t)v; -}]>; - +// Even though the offset is not extendable in the store-immediate, we +// can still generate the fi# in the base address. If the final offset +// is not valid for the instruction, we will replace it with a scratch +// register. class SmallStackStore : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ return isSmallStackStore(cast(N)); }]>; -let AddedComplexity = 40 in { - // Even though the offset is not extendable in the store-immediate, we - // can still generate the fi# in the base address. If the final offset - // is not valid for the instruction, we will replace it with a scratch - // register. - def: Storexm_fi_pat , s32_0ImmPred, - ToImmByte, S4_storeirb_io>; - def: Storexm_fi_pat , i16in8ImmPred, - ToImmHalf, S4_storeirh_io>; - def: Storexm_fi_pat , i32in8ImmPred, - ToImmWord, S4_storeiri_io>; - -// defm: Storexm_fi_add_pat ; -// defm: Storexm_fi_add_pat ; -// defm: Storexm_fi_add_pat ; - - defm: Storexm_add_pat; - defm: Storexm_add_pat; - defm: Storexm_add_pat; -} - -def: Storexm_simple_pat; -def: Storexm_simple_pat; -def: Storexm_simple_pat; - -// op(Ps, op(Pt, Pu)) -class LogLog_pat - : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))), - (MI I1:$Ps, I1:$Pt, I1:$Pu)>; +// This is the complement of SmallStackStore. +class LargeStackStore + : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ + return !isSmallStackStore(cast(N)); +}]>; -// op(Ps, op(Pt, ~Pu)) -class LogLogNot_pat - : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))), - (MI I1:$Ps, I1:$Pt, I1:$Pu)>; - -def: LogLog_pat; -def: LogLog_pat; -def: LogLog_pat; -def: LogLog_pat; - -def: LogLogNot_pat; -def: LogLogNot_pat; -def: LogLogNot_pat; -def: LogLogNot_pat; - -//===----------------------------------------------------------------------===// -// PIC: Support for PIC compilations. The patterns and SD nodes defined -// below are needed to support code generation for PIC -//===----------------------------------------------------------------------===// - -def SDT_HexagonAtGot - : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; -def SDT_HexagonAtPcrel - : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +// Preferred addressing modes for various combinations of stored value +// and address computation. +// For stores where the address and value are both immediates, prefer +// store-immediate. The reason is that the constant-extender optimization +// can replace store-immediate with a store-register, but there is nothing +// to generate a store-immediate out of a store-register. +// +// C R F F+C R+C R+R R<; -// AT_PCREL address-of-global -def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; -def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), - (L2_loadri_io I32:$got, imm:$addr)>; -def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), - (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; -def: Pat<(HexagonAtPcrel I32:$addr), - (C4_addipc imm:$addr)>; +// First, match the unusual case of doubleword store into Reg+Imm4, i.e. +// a store where the offset Imm4 is a multiple of 4, but not of 8. This +// implies that Reg is also a proper multiple of 4. To still generate a +// doubleword store, add 4 to Reg, and subtract 4 from the offset. -def: Pat<(i64 (and I64:$Rs, (i64 (not I64:$Rt)))), - (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>; -def: Pat<(i64 (or I64:$Rs, (i64 (not I64:$Rt)))), - (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>; +def s30_2ProperPred : PatLeaf<(i32 imm), [{ + int64_t v = (int64_t)N->getSExtValue(); + return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v); +}]>; +def RoundTo8 : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32); +}]>; -def: Pat<(add I32:$Rs, (add I32:$Ru, s32_0ImmPred:$s6)), - (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>; +let AddedComplexity = 150 in +def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)), + (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>; -// Rd=add(Rs,sub(#s6,Ru)) -def: Pat<(add I32:$src1, (sub s32_0ImmPred:$src2, - I32:$src3)), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; +class Storexi_abs_pat + : Pat<(Store Value:$val, anyimm:$addr), + (MI (ToI32 $addr), 0, Value:$val)>; +class Storexim_abs_pat + : Pat<(Store Value:$val, anyimm:$addr), + (MI (ToI32 $addr), 0, (ValueMod Value:$val))>; -// Rd=sub(add(Rs,#s6),Ru) -def: Pat<(sub (add I32:$src1, s32_0ImmPred:$src2), - I32:$src3), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; +let AddedComplexity = 140 in { + def: Storexim_abs_pat; + def: Storexim_abs_pat; + def: Storexim_abs_pat; -// Rd=add(sub(Rs,Ru),#s6) -def: Pat<(add (sub I32:$src1, I32:$src3), - (s32_0ImmPred:$src2)), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; + def: Storexi_abs_pat; + def: Storexi_abs_pat; + def: Storexi_abs_pat; +} -def: Pat<(xor I64:$dst2, - (xor I64:$Rss, I64:$Rtt)), - (M4_xor_xacc DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt)>; -def: Pat<(or I32:$Ru, (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)), - (S4_or_andix IntRegs:$Ru, IntRegs:$_src_, imm:$s10)>; +// GP-relative address +let AddedComplexity = 120 in { + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat, I32, addrgp, S2_storerbgp>; + def: Storea_pat, I32, addrgp, S2_storerhgp>; + def: Storea_pat, I32, addrgp, S2_storerigp>; + def: Storea_pat, I64, addrgp, S2_storerdgp>; + + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; +} + +// Absolute address +let AddedComplexity = 110 in { + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat, I32, anyimm0, PS_storerbabs>; + def: Storea_pat, I32, anyimm1, PS_storerhabs>; + def: Storea_pat, I32, anyimm2, PS_storeriabs>; + def: Storea_pat, I64, anyimm3, PS_storerdabs>; + + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; +} + +// Reg<; + def: Storexu_shl_pat; + def: Storexu_shl_pat; + def: Storexu_shl_pat; + def: Storexu_shl_pat; + def: Storexu_shl_pat; -def: Pat<(or I32:$src1, (and I32:$Rs, s32_0ImmPred:$s10)), - (S4_or_andi IntRegs:$src1, IntRegs:$Rs, imm:$s10)>; + def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)), + (S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>; +} -def: Pat<(or I32:$src1, (or I32:$Rs, s32_0ImmPred:$s10)), - (S4_or_ori IntRegs:$src1, IntRegs:$Rs, imm:$s10)>; +// Reg<; + def: Storexr_shl_pat; + def: Storexr_shl_pat; + def: Storexr_shl_pat; + def: Storexr_shl_pat; + def: Storexr_shl_pat; + def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)), + (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>; +} +class SS_ : SmallStackStore; +class LS_ : LargeStackStore; -// Count trailing zeros: 64-bit. -def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; +multiclass IMFA_ { + defm: Storexim_fi_add_pat; +} +multiclass IFA_ { + defm: Storexi_fi_add_pat; +} -// Count trailing ones: 64-bit. -def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; +// Fi+Imm, store-immediate +let AddedComplexity = 80 in { + defm: IMFA_, anyint, u6_0ImmPred, ToImmByte, S4_storeirb_io>; + defm: IMFA_, anyint, u6_1ImmPred, ToImmHalf, S4_storeirh_io>; + defm: IMFA_, anyint, u6_2ImmPred, ToImmWord, S4_storeiri_io>; -// Define leading/trailing patterns that require zero-extensions to 64 bits. -def: Pat<(i64 (ctlz I64:$Rss)), (ToZext64 (S2_cl0p I64:$Rss))>; -def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>; -def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>; -def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>; + defm: IFA_, anyimm, u6_0ImmPred, S4_storeirb_io>; + defm: IFA_, anyimm, u6_1ImmPred, S4_storeirh_io>; + defm: IFA_, anyimm, u6_2ImmPred, S4_storeiri_io>; -def: Pat<(i64 (ctpop I64:$Rss)), (ToZext64 (S5_popcountp I64:$Rss))>; -def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>; + // For large-stack stores, generate store-register (prefer explicit Fi + // in the address). + defm: IMFA_, anyimm, u6_0ImmPred, ToI32, S2_storerb_io>; + defm: IMFA_, anyimm, u6_1ImmPred, ToI32, S2_storerh_io>; + defm: IMFA_, anyimm, u6_2ImmPred, ToI32, S2_storeri_io>; +} -def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>; -def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>; +// Fi, store-immediate +let AddedComplexity = 70 in { + def: Storexim_fi_pat, anyint, ToImmByte, S4_storeirb_io>; + def: Storexim_fi_pat, anyint, ToImmHalf, S4_storeirh_io>; + def: Storexim_fi_pat, anyint, ToImmWord, S4_storeiri_io>; -def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>; -def: Pat<(bswap I64:$Rss), (A2_combinew (A2_swiz (LoReg $Rss)), - (A2_swiz (HiReg $Rss)))>; + def: Storexi_fi_pat, anyimm, S4_storeirb_io>; + def: Storexi_fi_pat, anyimm, S4_storeirh_io>; + def: Storexi_fi_pat, anyimm, S4_storeiri_io>; -let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. - def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), - (S4_ntstbit_i I32:$Rs, u5_0ImmPred:$u5)>; - def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)), - (S4_ntstbit_r I32:$Rs, I32:$Rt)>; + // For large-stack stores, generate store-register (prefer explicit Fi + // in the address). + def: Storexim_fi_pat, anyimm, ToI32, S2_storerb_io>; + def: Storexim_fi_pat, anyimm, ToI32, S2_storerh_io>; + def: Storexim_fi_pat, anyimm, ToI32, S2_storeri_io>; } -// Add extra complexity to prefer these instructions over bitsset/bitsclr. -// The reason is that tstbit/ntstbit can be folded into a compound instruction: -// if ([!]tstbit(...)) jump ... -let AddedComplexity = 100 in -def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), - (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; +// Fi+Imm, Fi, store-register +let AddedComplexity = 60 in { + defm: Storexi_fi_add_pat; + defm: Storexi_fi_add_pat; + defm: Storexi_fi_add_pat; + defm: Storexi_fi_add_pat; + defm: Storexi_fi_add_pat; + defm: Storexi_fi_add_pat; + defm: Storexim_fi_add_pat; -let AddedComplexity = 100 in -def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), - (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; + def: Storexi_fi_pat; + def: Storexi_fi_pat; + def: Storexi_fi_pat; + def: Storexi_fi_pat; + def: Storexi_fi_pat; + def: Storexi_fi_pat; + def: Storexim_fi_pat; +} -// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be -// represented as a compare against "value & 0xFF", which is an exact match -// for cmpb (same for cmph). The patterns below do not contain any additional -// complexity that would make them preferable, and if they were actually used -// instead of cmpb/cmph, they would result in a compare against register that -// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). -def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)), - (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>; -def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), - (C4_nbitsclr I32:$Rs, I32:$Rt)>; -def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), - (C4_nbitsset I32:$Rs, I32:$Rt)>; +multiclass IMRA_ { + defm: Storexim_add_pat; +} +multiclass IRA_ { + defm: Storexi_add_pat; +} -def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6), - (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; -def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), - (HexagonCONST32 tglobaladdr:$global)), - (M4_mpyri_addi tglobaladdr:$global, IntRegs:$Rs, imm:$U6)>; -def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6), - (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(add (mul I32:$Rs, I32:$Rt), - (HexagonCONST32 tglobaladdr:$global)), - (M4_mpyrr_addi tglobaladdr:$global, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)), - (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>; -def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)), - (M4_mpyri_addr IntRegs:$src1, IntRegs:$src3, imm:$src2)>; +// Reg+Imm, store-immediate +let AddedComplexity = 50 in { + defm: IMRA_; + defm: IMRA_; + defm: IMRA_; -def: Pat<(add I32:$Ru, (mul (i32 IntRegs:$_src_), I32:$Rs)), - (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs)>; + defm: IRA_; + defm: IRA_; + defm: IRA_; +} -def: T_vcmp_pat; +// Reg+Imm, store-register +let AddedComplexity = 40 in { + defm: Storexi_pat; + defm: Storexi_pat; + defm: Storexi_pat; + defm: Storexi_pat; + defm: Storexi_pat; + defm: Storexi_pat; -class T_Shift_CommOp_pat - : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8), - (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>; + defm: Storexim_pat; + defm: Storexim_pat; + defm: Storexim_pat; + defm: Storexim_pat; -let AddedComplexity = 200 in { - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; + defm: Storexi_pat, I32, anyimm0, S2_storerb_io>; + defm: Storexi_pat, I32, anyimm1, S2_storerh_io>; + defm: Storexi_pat, I32, anyimm2, S2_storeri_io>; + defm: Storexi_pat, I64, anyimm3, S2_storerd_io>; } +// Reg+Reg let AddedComplexity = 30 in { - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; -} - -class T_Shift_Op_pat - : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)), - (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>; + def: Storexr_add_pat; + def: Storexr_add_pat; + def: Storexr_add_pat; + def: Storexr_add_pat; + def: Storexr_add_pat; + def: Storexr_add_pat; -def : T_Shift_Op_pat ; -def : T_Shift_Op_pat ; - -let AddedComplexity = 200 in { - def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)), - (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; - def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)), - (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; - def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)), - (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; - def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)), - (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; + def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)), + (S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>; } -def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt), - (S4_lsli imm:$s6, IntRegs:$Rt)>; +// Reg, store-immediate +let AddedComplexity = 20 in { + def: Storexim_base_pat; + def: Storexim_base_pat; + def: Storexim_base_pat; + + def: Storexi_base_pat; + def: Storexi_base_pat; + def: Storexi_base_pat; +} + +// Reg, store-register +let AddedComplexity = 10 in { + def: Storexi_base_pat; + def: Storexi_base_pat; + def: Storexi_base_pat; + def: Storexi_base_pat; + def: Storexi_base_pat; + def: Storexi_base_pat; + + def: Storexim_base_pat; + def: Storexim_base_pat; + def: Storexim_base_pat; + def: Storexim_base_pat; + + def: Storexi_base_pat, I32, S2_storerb_io>; + def: Storexi_base_pat, I32, S2_storerh_io>; + def: Storexi_base_pat, I32, S2_storeri_io>; + def: Storexi_base_pat, I64, S2_storerd_io>; +} + +// HVX stores + +multiclass HvxSt_pat { + def: Pat<(Store Value:$Vs, I32:$Rt), + (MI I32:$Rt, 0, Value:$Vs)>; + def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$s)), + (MI I32:$Rt, imm:$s, Value:$Vs)>; +} + +let Predicates = [UseHVX] in { + multiclass HvxStVs_pat { + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + } + defm: HvxStVs_pat; + defm: HvxStVs_pat; + defm: HvxStVs_pat; + + multiclass HvxStWs_pat { + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + } + defm: HvxStWs_pat; + defm: HvxStWs_pat; + defm: HvxStWs_pat; +} -//===----------------------------------------------------------------------===// -// MEMOP -//===----------------------------------------------------------------------===// +// --(13) Memop ---------------------------------------------------------- +// def m5_0Imm8Pred : PatLeaf<(i32 imm), [{ int8_t V = N->getSExtValue(); @@ -1751,25 +2454,10 @@ def LogN2_16 : SDNodeXFormgetTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32); }]>; -def NegImm8 : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); -}]>; - -def NegImm16 : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); -}]>; - -def NegImm32 : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); -}]>; - def IdImm : SDNodeXForm; -multiclass Memopxr_simple_pat { +multiclass Memopxr_base_pat { // Addr: i32 def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs), (MI I32:$Rs, 0, I32:$A)>; @@ -1798,11 +2486,11 @@ multiclass Memopxr_add_pat { - defm: Memopxr_simple_pat ; - defm: Memopxr_add_pat ; + defm: Memopxr_base_pat ; + defm: Memopxr_add_pat ; } -let AddedComplexity = 180 in { +let AddedComplexity = 200 in { // add reg defm: Memopxr_pat; @@ -1865,9 +2553,8 @@ let AddedComplexity = 180 in { } -multiclass Memopxi_simple_pat { +multiclass Memopxi_base_pat { // Addr: i32 def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs), (MI I32:$Rs, 0, (ArgMod Arg:$A))>; @@ -1898,12 +2585,11 @@ multiclass Memopxi_add_pat { - defm: Memopxi_simple_pat ; - defm: Memopxi_add_pat ; + defm: Memopxi_base_pat ; + defm: Memopxi_add_pat ; } - -let AddedComplexity = 200 in { +let AddedComplexity = 220 in { // add imm defm: Memopxi_pat; @@ -1997,1244 +2683,152 @@ let AddedComplexity = 200 in { Log2_32, L4_ior_memopw_io>; } -def : T_CMP_pat ; -def : T_CMP_pat ; -def : T_CMP_pat ; - -// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). -def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)), - (C4_cmpltei IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>; - -// rs != rt -> !(rs == rt). -def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)), - (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>; - -// For the sequence -// zext( setult ( and(Rs, 255), u8)) -// Use the isdigit transformation below - -def u7_0PosImmPred : ImmLeaf 0 && isUInt<7>(Imm); -}]>; - - -// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' -// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. -// The isdigit transformation relies on two 'clever' aspects: -// 1) The data type is unsigned which allows us to eliminate a zero test after -// biasing the expression by 48. We are depending on the representation of -// the unsigned types, and semantics. -// 2) The front end has converted <= 9 into < 10 on entry to LLVM +// --(14) PIC ------------------------------------------------------------ // -// For the C code: -// retval = ((c>='0') & (c<='9')) ? 1 : 0; -// The code is transformed upstream of llvm into -// retval = (c-48) < 10 ? 1 : 0; -let AddedComplexity = 139 in -def: Pat<(i32 (zext (i1 (setult (and I32:$src1, 255), u7_0PosImmPred:$src2)))), - (C2_muxii (A4_cmpbgtui IntRegs:$src1, (UDEC1 imm:$src2)), 0, 1)>; +def SDT_HexagonAtGot + : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_HexagonAtPcrel + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -class Loada_pat - : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; +// AT_GOT address-of-GOT, address-of-global, offset-in-global +def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; +// AT_PCREL address-of-global +def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; -class Loadam_pat - : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), + (L2_loadri_io I32:$got, imm:$addr)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), + (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; +def: Pat<(HexagonAtPcrel I32:$addr), + (C4_addipc imm:$addr)>; -class Storea_pat - : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; -class Stoream_pat - : Pat<(Store Value:$val, Addr:$addr), - (MI Addr:$addr, (ValueMod Value:$val))>; +// --(15) Call ----------------------------------------------------------- +// -let AddedComplexity = 30 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; +// Pseudo instructions. +def SDT_SPCallSeqStart + : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def SDT_SPCallSeqEnd + : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - def: Stoream_pat; - def: Stoream_pat; - def: Stoream_pat; -} +def callseq_start: SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end: SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def: Storea_pat, I32, addrgp, S2_storerbgp>; -def: Storea_pat, I32, addrgp, S2_storerhgp>; -def: Storea_pat, I32, addrgp, S2_storerigp>; -def: Storea_pat, I64, addrgp, S2_storerdgp>; +def SDT_SPCall: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; +def HexagonTCRet: SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def callv3: SDNode<"HexagonISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def callv3nr: SDNode<"HexagonISD::CALLnr", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; - // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" - // to "r0 = 1; memw(#foo) = r0" - let AddedComplexity = 100 in - def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>; -} +def: Pat<(callseq_start timm:$amt, timm:$amt2), + (ADJCALLSTACKDOWN imm:$amt, imm:$amt2)>; +def: Pat<(callseq_end timm:$amt1, timm:$amt2), + (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>; -class LoadAbs_pats - : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))), - (VT (MI tglobaladdr:$absaddr))>; +def: Pat<(HexagonTCRet tglobaladdr:$dst), (PS_tailcall_i tglobaladdr:$dst)>; +def: Pat<(HexagonTCRet texternalsym:$dst), (PS_tailcall_i texternalsym:$dst)>; +def: Pat<(HexagonTCRet I32:$dst), (PS_tailcall_r I32:$dst)>; -let AddedComplexity = 30 in { - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; -} +def: Pat<(callv3 I32:$dst), (J2_callr I32:$dst)>; +def: Pat<(callv3 tglobaladdr:$dst), (J2_call tglobaladdr:$dst)>; +def: Pat<(callv3 texternalsym:$dst), (J2_call texternalsym:$dst)>; +def: Pat<(callv3 tglobaltlsaddr:$dst), (J2_call tglobaltlsaddr:$dst)>; -let AddedComplexity = 30 in -def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))), - (ToZext64 (PS_loadrubabs tglobaladdr:$absaddr))>; +def: Pat<(callv3nr I32:$dst), (PS_callr_nr I32:$dst)>; +def: Pat<(callv3nr tglobaladdr:$dst), (PS_call_nr tglobaladdr:$dst)>; +def: Pat<(callv3nr texternalsym:$dst), (PS_call_nr texternalsym:$dst)>; -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; +def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; -def: Loadam_pat; -def: Loadam_pat; +def: Pat<(retflag), (PS_jmpret (i32 R31))>; +def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>; -def: Stoream_pat; -def: Stoream_pat; -// Map from load(globaladdress) -> mem[u][bhwd](#foo) -class LoadGP_pats - : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))), - (VT (MI tglobaladdr:$global))>; +// --(16) Branch --------------------------------------------------------- +// -let AddedComplexity = 100 in { - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; -} - -// When the Interprocedural Global Variable optimizer realizes that a certain -// global variable takes only two constant values, it shrinks the global to -// a boolean. Catch those loads here in the following 3 patterns. -let AddedComplexity = 100 in { - def: LoadGP_pats ; - def: LoadGP_pats ; -} +def: Pat<(br bb:$dst), (J2_jump b30_2Imm:$dst)>; +def: Pat<(brind I32:$dst), (J2_jumpr I32:$dst)>; -// Transfer global address into a register -def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>; -def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi imm:$Rs)>; -def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>; +def: Pat<(brcond I1:$Pu, bb:$dst), + (J2_jumpt I1:$Pu, bb:$dst)>; +def: Pat<(brcond (not I1:$Pu), bb:$dst), + (J2_jumpf I1:$Pu, bb:$dst)>; +def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), + (J2_jumpf I1:$Pu, bb:$dst)>; +def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), + (J2_jumpt I1:$Pu, bb:$dst)>; -let AddedComplexity = 30 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Stoream_pat; - def: Stoream_pat; - def: Stoream_pat; -} +// --(17) Misc ----------------------------------------------------------- -let AddedComplexity = 30 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; +// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' +// for C code of the form r = (c>='0' && c<='9') ? 1 : 0. +// The isdigit transformation relies on two 'clever' aspects: +// 1) The data type is unsigned which allows us to eliminate a zero test after +// biasing the expression by 48. We are depending on the representation of +// the unsigned types, and semantics. +// 2) The front end has converted <= 9 into < 10 on entry to LLVM. +// +// For the C code: +// retval = (c >= '0' && c <= '9') ? 1 : 0; +// The code is transformed upstream of llvm into +// retval = (c-48) < 10 ? 1 : 0; - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; +def u7_0PosImmPred : ImmLeaf 0 && isUInt<7>(Imm); +}]>; - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; -} +let AddedComplexity = 139 in +def: Pat<(i32 (zext (i1 (setult (and I32:$Rs, 255), u7_0PosImmPred:$u7)))), + (C2_muxii (A4_cmpbgtui IntRegs:$Rs, (UDEC1 imm:$u7)), 0, 1)>; -// Indexed store word - global address. -// memw(Rs+#u6:2)=#S8 let AddedComplexity = 100 in -defm: Storex_add_pat; - -// Load from a global address that has only one use in the current basic block. -let AddedComplexity = 100 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; -} - -// Store to a global address that has only one use in the current basic block. -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - - def: Stoream_pat; -} - -// i8/i16/i32 -> i64 loads -// We need a complexity of 120 here to override preceding handling of -// zextload. -let AddedComplexity = 120 in { - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; - - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; - - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; -} - -let AddedComplexity = 100 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; -} - -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; -} - -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; - -def: Storea_pat, I32, addrgp, PS_storerbabs>; -def: Storea_pat, I32, addrgp, PS_storerhabs>; -def: Storea_pat, I32, addrgp, PS_storeriabs>; -def: Storea_pat, I64, addrgp, PS_storerdabs>; - -// Prefer this pattern to S2_asl_i_p_or for the special case of joining -// two 32-bit words into a 64-bit word. -let AddedComplexity = 200 in -def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)), - (A2_combinew I32:$a, I32:$b)>; +def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), + (i32 (extloadi8 (add I32:$b, 3))), + 24, 8), + (i32 16)), + (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), + (zextloadi8 I32:$b)), + (A2_swiz (L2_loadri_io I32:$b, 0))>; -def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)), - (i64 (zext (i32 (and I32:$a, (i32 65535)))))), - (shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))), - (shl (Aext64 I32:$d), (i32 48))), - (A2_combinew (A2_combine_ll I32:$d, I32:$c), - (A2_combine_ll I32:$b, I32:$a))>; // We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH // because the SDNode ISD::PREFETCH has properties MayLoad and MayStore. // We don't really want either one here. -def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; -def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, - [SDNPHasChain]>; +def SDTHexagonDCFETCH: SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; +def HexagonDCFETCH: SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, + [SDNPHasChain]>; def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3), (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>; def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)), (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>; -def f32ImmPred : PatLeaf<(f32 fpimm:$F)>; -def f64ImmPred : PatLeaf<(f64 fpimm:$F)>; - -def ftoi : SDNodeXFormgetValueAPF().bitcastToAPInt(); - return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N), - MVT::getIntegerVT(I.getBitWidth())); -}]>; - - -def: Pat<(sra (i64 (add (sra I64:$src1, u6_0ImmPred:$src2), 1)), (i32 1)), - (S2_asr_i_p_rnd I64:$src1, imm:$src2)>; - -let AddedComplexity = 20 in { - defm: Loadx_pat; - defm: Loadx_pat; -} - -let AddedComplexity = 60 in { - defm : T_LoadAbsReg_Pat ; - defm : T_LoadAbsReg_Pat ; -} - -let AddedComplexity = 40 in { - def: Loadxs_pat; - def: Loadxs_pat; -} - -let AddedComplexity = 20 in { - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; -} +def SDTHexagonALLOCA + : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def HexagonALLOCA + : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, [SDNPHasChain]>; -let AddedComplexity = 80 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; -} - -let AddedComplexity = 100 in { - def: LoadGP_pats ; - def: LoadGP_pats ; -} - -let AddedComplexity = 20 in { - defm: Storex_pat; - defm: Storex_pat; -} - -// Simple patterns should be tried with the least priority. -def: Storex_simple_pat; -def: Storex_simple_pat; - -let AddedComplexity = 60 in { - defm : T_StoreAbsReg_Pats ; - defm : T_StoreAbsReg_Pats ; -} - -let AddedComplexity = 40 in { - def: Storexs_pat; - def: Storexs_pat; -} - -let AddedComplexity = 20 in { - def: Store_rr_pat; - def: Store_rr_pat; -} - -let AddedComplexity = 80 in { - def: Storea_pat; - def: Storea_pat; -} - -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; -} - -defm: Storex_pat; -defm: Storex_pat; -def: Storex_simple_pat; -def: Storex_simple_pat; - -def: Pat<(fadd F32:$src1, F32:$src2), - (F2_sfadd F32:$src1, F32:$src2)>; - -def: Pat<(fsub F32:$src1, F32:$src2), - (F2_sfsub F32:$src1, F32:$src2)>; - -def: Pat<(fmul F32:$src1, F32:$src2), - (F2_sfmpy F32:$src1, F32:$src2)>; - -let Predicates = [HasV5T] in { - def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>; - def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>; -} - -let AddedComplexity = 100, Predicates = [HasV5T] in { - class SfSel12 - : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt), - (MI F32:$Rs, F32:$Rt)>; - class SfSel21 - : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs), - (MI F32:$Rs, F32:$Rt)>; - - def: SfSel12; - def: SfSel12; - def: SfSel12; - def: SfSel12; - def: SfSel21; - def: SfSel21; - def: SfSel21; - def: SfSel21; -} - -class T_fcmp32_pat - : Pat<(i1 (OpNode F32:$src1, F32:$src2)), - (MI F32:$src1, F32:$src2)>; -class T_fcmp64_pat - : Pat<(i1 (OpNode F64:$src1, F64:$src2)), - (MI F64:$src1, F64:$src2)>; - -def: T_fcmp32_pat; -def: T_fcmp32_pat; -def: T_fcmp32_pat; -def: T_fcmp32_pat; - -def: T_fcmp64_pat; -def: T_fcmp64_pat; -def: T_fcmp64_pat; -def: T_fcmp64_pat; - -let Predicates = [HasV5T] in -multiclass T_fcmp_pats { - // IntRegs - def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), - (IntMI F32:$src1, F32:$src2)>; - // DoubleRegs - def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), - (DoubleMI F64:$src1, F64:$src2)>; -} - -defm : T_fcmp_pats ; -defm : T_fcmp_pats ; -defm : T_fcmp_pats ; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations. -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass unord_Pats { - // IntRegs - def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (IntMI F32:$src1, F32:$src2))>; - - // DoubleRegs - def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (DoubleMI F64:$src1, F64:$src2))>; -} - -defm : unord_Pats ; -defm : unord_Pats ; -defm : unord_Pats ; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for the following dags: -// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2)) -// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2) -// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2) -// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2)) -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass eq_ordgePats { - // IntRegs - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (C2_not (IntMI F32:$src1, F32:$src2))>; - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (IntMI F32:$src1, F32:$src2)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (IntMI F32:$src1, F32:$src2)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (C2_not (IntMI F32:$src1, F32:$src2))>; - - // DoubleRegs - def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src1, F64:$src2))>; - def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (DoubleMI F64:$src1, F64:$src2)>; - def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (DoubleMI F64:$src1, F64:$src2)>; - def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (C2_not (DoubleMI F64:$src1, F64:$src2))>; -} - -defm : eq_ordgePats; -defm : eq_ordgePats; -defm : eq_ordgePats; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for the following dags: -// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1)) -// seteq(setolt(op1, op2), 1) -> setogt(op2, op1) -// setne(setolt(op1, op2), 0) -> setogt(op2, op1) -// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1)) -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass eq_ordltPats { - // IntRegs - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (C2_not (IntMI F32:$src2, F32:$src1))>; - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (IntMI F32:$src2, F32:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (IntMI F32:$src2, F32:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (C2_not (IntMI F32:$src2, F32:$src1))>; - - // DoubleRegs - def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src2, F64:$src1))>; - def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (DoubleMI F64:$src2, F64:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (DoubleMI F64:$src2, F64:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src2, F64:$src1))>; -} - -defm : eq_ordltPats; -defm : eq_ordltPats; - - -// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp -let Predicates = [HasV5T] in { - def: Pat<(i1 (seto F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>; - def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)), - (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; - def: Pat<(i1 (seto F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>; - def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)), - (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>; -} - -// Ordered lt. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setolt F32:$src1, F32:$src2)), - (F2_sfcmpgt F32:$src2, F32:$src1)>; - def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - def: Pat<(i1 (setolt F64:$src1, F64:$src2)), - (F2_dfcmpgt F64:$src2, F64:$src1)>; - def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>; -} - -// Unordered lt. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setult F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (F2_sfcmpgt F32:$src2, F32:$src1))>; - def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), - (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; - def: Pat<(i1 (setult F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (F2_dfcmpgt F64:$src2, F64:$src1))>; - def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), - (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>; -} - -// Ordered le. -let Predicates = [HasV5T] in { - // rs <= rt -> rt >= rs. - def: Pat<(i1 (setole F32:$src1, F32:$src2)), - (F2_sfcmpge F32:$src2, F32:$src1)>; - def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - - // Rss <= Rtt -> Rtt >= Rss. - def: Pat<(i1 (setole F64:$src1, F64:$src2)), - (F2_dfcmpge F64:$src2, F64:$src1)>; - def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>; -} - -// Unordered le. -let Predicates = [HasV5T] in { -// rs <= rt -> rt >= rs. - def: Pat<(i1 (setule F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (F2_sfcmpge F32:$src2, F32:$src1))>; - def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), - (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; - def: Pat<(i1 (setule F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (F2_dfcmpge F64:$src2, F64:$src1))>; - def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), - (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>; -} - -// Ordered ne. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setone F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; - def: Pat<(i1 (setone F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; - def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>; - def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>; -} - -// Unordered ne. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setune F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>; - def: Pat<(i1 (setune F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>; - def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), - (C2_not (F2_sfcmpeq F32:$src1, - (f32 (A2_tfrsi (ftoi $src2))))))>; - def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), - (C2_not (F2_dfcmpeq F64:$src1, - (CONST64 (ftoi $src2)))))>; -} - -// Besides set[o|u][comparions], we also need set[comparisons]. -let Predicates = [HasV5T] in { - // lt. - def: Pat<(i1 (setlt F32:$src1, F32:$src2)), - (F2_sfcmpgt F32:$src2, F32:$src1)>; - def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - def: Pat<(i1 (setlt F64:$src1, F64:$src2)), - (F2_dfcmpgt F64:$src2, F64:$src1)>; - def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>; - - // le. - // rs <= rt -> rt >= rs. - def: Pat<(i1 (setle F32:$src1, F32:$src2)), - (F2_sfcmpge F32:$src2, F32:$src1)>; - def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - - // Rss <= Rtt -> Rtt >= Rss. - def: Pat<(i1 (setle F64:$src1, F64:$src2)), - (F2_dfcmpge F64:$src2, F64:$src1)>; - def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>; - - // ne. - def: Pat<(i1 (setne F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; - def: Pat<(i1 (setne F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; - def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>; - def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>; -} - - -def: Pat<(f64 (fpextend F32:$Rs)), (F2_conv_sf2df F32:$Rs)>; -def: Pat<(f32 (fpround F64:$Rs)), (F2_conv_df2sf F64:$Rs)>; - -def: Pat<(f32 (sint_to_fp I32:$Rs)), (F2_conv_w2sf I32:$Rs)>; -def: Pat<(f32 (sint_to_fp I64:$Rs)), (F2_conv_d2sf I64:$Rs)>; -def: Pat<(f64 (sint_to_fp I32:$Rs)), (F2_conv_w2df I32:$Rs)>; -def: Pat<(f64 (sint_to_fp I64:$Rs)), (F2_conv_d2df I64:$Rs)>; - -def: Pat<(f32 (uint_to_fp I32:$Rs)), (F2_conv_uw2sf I32:$Rs)>; -def: Pat<(f32 (uint_to_fp I64:$Rs)), (F2_conv_ud2sf I64:$Rs)>; -def: Pat<(f64 (uint_to_fp I32:$Rs)), (F2_conv_uw2df I32:$Rs)>; -def: Pat<(f64 (uint_to_fp I64:$Rs)), (F2_conv_ud2df I64:$Rs)>; - -def: Pat<(i32 (fp_to_sint F32:$Rs)), (F2_conv_sf2w_chop F32:$Rs)>; -def: Pat<(i32 (fp_to_sint F64:$Rs)), (F2_conv_df2w_chop F64:$Rs)>; -def: Pat<(i64 (fp_to_sint F32:$Rs)), (F2_conv_sf2d_chop F32:$Rs)>; -def: Pat<(i64 (fp_to_sint F64:$Rs)), (F2_conv_df2d_chop F64:$Rs)>; - -def: Pat<(i32 (fp_to_uint F32:$Rs)), (F2_conv_sf2uw_chop F32:$Rs)>; -def: Pat<(i32 (fp_to_uint F64:$Rs)), (F2_conv_df2uw_chop F64:$Rs)>; -def: Pat<(i64 (fp_to_uint F32:$Rs)), (F2_conv_sf2ud_chop F32:$Rs)>; -def: Pat<(i64 (fp_to_uint F64:$Rs)), (F2_conv_df2ud_chop F64:$Rs)>; - -// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. -let Predicates = [HasV5T] in { - def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>; - def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>; - def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>; - def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>; -} - -def : Pat <(fma F32:$src2, F32:$src3, F32:$src1), - (F2_sffma F32:$src1, F32:$src2, F32:$src3)>; - -def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1), - (F2_sffms F32:$src1, F32:$src2, F32:$src3)>; - -def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1), - (F2_sffms F32:$src1, F32:$src2, F32:$src3)>; - -def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm), - (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt), - (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$src1, F32:$src2, F32:$src3), - (C2_mux I1:$src1, F32:$src2, F32:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4), - (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$src1, F64:$src2, F64:$src3), - (C2_vmux I1:$src1, F64:$src2, F64:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4), - (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>, - Requires<[HasV5T]>; - -// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = mux(p0, #i, r1) -def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3), - (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>, - Requires<[HasV5T]>; - -// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = mux(p0, r1, #i) -def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3), - (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>, - Requires<[HasV5T]>; - -def: Pat<(i32 (fp_to_sint F64:$src1)), - (LoReg (F2_conv_df2d_chop F64:$src1))>, - Requires<[HasV5T]>; - -def : Pat <(fabs F32:$src1), - (S2_clrbit_i F32:$src1, 31)>, - Requires<[HasV5T]>; - -def : Pat <(fneg F32:$src1), - (S2_togglebit_i F32:$src1, 31)>, - Requires<[HasV5T]>; - -def: Pat<(fabs F64:$Rs), - (REG_SEQUENCE DoubleRegs, - (S2_clrbit_i (HiReg $Rs), 31), isub_hi, - (i32 (LoReg $Rs)), isub_lo)>; - -def: Pat<(fneg F64:$Rs), - (REG_SEQUENCE DoubleRegs, - (S2_togglebit_i (HiReg $Rs), 31), isub_hi, - (i32 (LoReg $Rs)), isub_lo)>; - -def: Pat<(mul I64:$Rss, I64:$Rtt), - (A2_combinew - (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))), - (LoReg $Rss), - (HiReg $Rtt)), - (LoReg $Rtt), - (HiReg $Rss)), - (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))))>; - -def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ - return isAlignedMemNode(dyn_cast(N)); -}]>; - -def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{ - return !isAlignedMemNode(dyn_cast(N)); -}]>; - -def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ - return isAlignedMemNode(dyn_cast(N)); -}]>; - -def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ - return !isAlignedMemNode(dyn_cast(N)); -}]>; - - -multiclass vS32b_ai_pats { - // Aligned stores - def : Pat<(alignednontemporalstore (VTSgl HvxVR:$src1), IntRegs:$addr), - (V6_vS32b_nt_ai IntRegs:$addr, 0, (VTSgl HvxVR:$src1))>; - def : Pat<(alignedstore (VTSgl HvxVR:$src1), IntRegs:$addr), - (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl HvxVR:$src1))>; - def : Pat<(unalignedstore (VTSgl HvxVR:$src1), IntRegs:$addr), - (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl HvxVR:$src1))>; - - // Fold Add R+OFF into vector store. - let AddedComplexity = 10 in { - def : Pat<(alignednontemporalstore (VTSgl HvxVR:$src1), - (add IntRegs:$src2, IsVecOff:$offset)), - (V6_vS32b_nt_ai IntRegs:$src2, imm:$offset, - (VTSgl HvxVR:$src1))>; - def : Pat<(alignedstore (VTSgl HvxVR:$src1), - (add IntRegs:$src2, IsVecOff:$offset)), - (V6_vS32b_ai IntRegs:$src2, imm:$offset, - (VTSgl HvxVR:$src1))>; - def : Pat<(unalignedstore (VTSgl HvxVR:$src1), - (add IntRegs:$src2, IsVecOff:$offset)), - (V6_vS32Ub_ai IntRegs:$src2, imm:$offset, - (VTSgl HvxVR:$src1))>; - } -} - -defm : vS32b_ai_pats ; -defm : vS32b_ai_pats ; -defm : vS32b_ai_pats ; -defm : vS32b_ai_pats ; - - -multiclass vL32b_ai_pats { - // Aligned loads - def : Pat < (VTSgl (alignednontemporalload IntRegs:$addr)), - (V6_vL32b_nt_ai IntRegs:$addr, 0) >; - def : Pat < (VTSgl (alignedload IntRegs:$addr)), - (V6_vL32b_ai IntRegs:$addr, 0) >; - def : Pat < (VTSgl (unalignedload IntRegs:$addr)), - (V6_vL32Ub_ai IntRegs:$addr, 0) >; - - // Fold Add R+OFF into vector load. - let AddedComplexity = 10 in { - def : Pat<(VTSgl (alignednontemporalload (add IntRegs:$src2, IsVecOff:$offset))), - (V6_vL32b_nt_ai IntRegs:$src2, imm:$offset)>; - def : Pat<(VTSgl (alignedload (add IntRegs:$src2, IsVecOff:$offset))), - (V6_vL32b_ai IntRegs:$src2, imm:$offset)>; - def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, IsVecOff:$offset))), - (V6_vL32Ub_ai IntRegs:$src2, imm:$offset)>; - } -} - -defm : vL32b_ai_pats ; -defm : vL32b_ai_pats ; -defm : vL32b_ai_pats ; -defm : vL32b_ai_pats ; - -multiclass STrivv_pats { - def : Pat<(alignednontemporalstore (VTSgl HvxWR:$src1), IntRegs:$addr), - (PS_vstorerw_nt_ai IntRegs:$addr, 0, (VTSgl HvxWR:$src1))>; - def : Pat<(alignedstore (VTSgl HvxWR:$src1), IntRegs:$addr), - (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl HvxWR:$src1))>; - def : Pat<(unalignedstore (VTSgl HvxWR:$src1), IntRegs:$addr), - (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl HvxWR:$src1))>; -} - -defm : STrivv_pats ; -defm : STrivv_pats ; -defm : STrivv_pats ; -defm : STrivv_pats ; - -multiclass LDrivv_pats { - def : Pat<(VTSgl (alignednontemporalload I32:$addr)), - (PS_vloadrw_nt_ai I32:$addr, 0)>; - def : Pat<(VTSgl (alignedload I32:$addr)), - (PS_vloadrw_ai I32:$addr, 0)>; - def : Pat<(VTSgl (unalignedload I32:$addr)), - (PS_vloadrwu_ai I32:$addr, 0)>; -} - -defm : LDrivv_pats ; -defm : LDrivv_pats ; -defm : LDrivv_pats ; -defm : LDrivv_pats ; - -let Predicates = [HasV60T] in { - def: Pat<(select I1:$Pu, (VecI32 HvxVR:$Vs), HvxVR:$Vt), - (PS_vselect I1:$Pu, HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(select I1:$Pu, (VecPI32 HvxWR:$Vs), HvxWR:$Vt), - (PS_wselect I1:$Pu, HvxWR:$Vs, HvxWR:$Vt)>; -} - - -def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, - SDTCisSubVecOfVec<1, 0>]>; - -def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; - -def: Pat<(VecPI32 (HexagonVCOMBINE (VecI32 HvxVR:$Vs), (VecI32 HvxVR:$Vt))), - (V6_vcombine HvxVR:$Vs, HvxVR:$Vt)>; - -def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>; - -def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>; -def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>; - -def: Pat<(VecI8 (HexagonVPACKE (VecI8 HvxVR:$Vs), (VecI8 HvxVR:$Vt))), - (V6_vpackeb HvxVR:$Vs, HvxVR:$Vt)>; -def: Pat<(VecI8 (HexagonVPACKO (VecI8 HvxVR:$Vs), (VecI8 HvxVR:$Vt))), - (V6_vpackob HvxVR:$Vs, HvxVR:$Vt)>; -def: Pat<(VecI16 (HexagonVPACKE (VecI16 HvxVR:$Vs), (VecI16 HvxVR:$Vt))), - (V6_vpackeh HvxVR:$Vs, HvxVR:$Vt)>; -def: Pat<(VecI16 (HexagonVPACKO (VecI16 HvxVR:$Vs), (VecI16 HvxVR:$Vt))), - (V6_vpackoh HvxVR:$Vs, HvxVR:$Vt)>; - -def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; -def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; -def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; -def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; -def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; -def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; -def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; -def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; - - -multiclass bitconvert_32 { - def : Pat <(b (bitconvert (a IntRegs:$src))), - (b IntRegs:$src)>; - def : Pat <(a (bitconvert (b IntRegs:$src))), - (a IntRegs:$src)>; -} - -multiclass bitconvert_64 { - def : Pat <(b (bitconvert (a DoubleRegs:$src))), - (b DoubleRegs:$src)>; - def : Pat <(a (bitconvert (b DoubleRegs:$src))), - (a DoubleRegs:$src)>; -} - -// Bit convert vector types to integers. -defm : bitconvert_32; -defm : bitconvert_32; -defm : bitconvert_64; -defm : bitconvert_64; -defm : bitconvert_64; - -def: Pat<(sra (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), - (S2_asr_i_vh DoubleRegs:$src1, imm:$src2)>; -def: Pat<(srl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), - (S2_lsr_i_vh DoubleRegs:$src1, imm:$src2)>; -def: Pat<(shl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), - (S2_asl_i_vh DoubleRegs:$src1, imm:$src2)>; - -def: Pat<(sra (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), - (S2_asr_i_vw DoubleRegs:$src1, imm:$src2)>; -def: Pat<(srl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), - (S2_lsr_i_vw DoubleRegs:$src1, imm:$src2)>; -def: Pat<(shl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), - (S2_asl_i_vw DoubleRegs:$src1, imm:$src2)>; - -def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), - (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; - -def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), - (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; - -def SDTHexagonVSPLAT: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; -def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>; - -// Replicate the low 8-bits from 32-bits input register into each of the -// four bytes of 32-bits destination register. -def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>; - -// Replicate the low 16-bits from 32-bits input register into each of the -// four halfwords of 64-bits destination register. -def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>; - -def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), - (A2_combineii imm:$s8, imm:$s8)>; -def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (A2_combinew I32:$Rs, I32:$Rs)>; - - -class VArith_pat - : Pat <(Op Type:$Rss, Type:$Rtt), - (MI Type:$Rss, Type:$Rtt)>; - -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; - -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; - -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; - -def: Pat<(v2i32 (sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), - (S2_asr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), - (S2_lsr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), - (S2_asl_i_vw V2I32:$b, imm:$c)>; - -def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), - (S2_asr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), - (S2_lsr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), - (S2_asl_i_vh V4I16:$b, imm:$c)>; - - -def SDTHexagonVShift - : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>; - -def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>; -def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>; -def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>; - -def: Pat<(v2i32 (HexagonVASL V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVASL V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVASR V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVASR V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVLSR V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVLSR V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; - -class vshift_rr_pat - : Pat <(Op Value:$Rs, I32:$Rt), - (MI Value:$Rs, I32:$Rt)>; - -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; - - -class vcmp_vi1_pat - : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), - (MI InVal:$Rs, InVal:$Rt)>; - -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; - -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; - -def: Pat<(mul V2I32:$Rs, V2I32:$Rt), - (PS_vmulw DoubleRegs:$Rs, DoubleRegs:$Rt)>; -def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)), - (PS_vmulw_acc DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt)>; - - -// Adds two v4i8: Hexagon does not have an insn for this one, so we -// use the double add v8i8, and use only the low part of the result. -def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), - (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>; - -// Subtract two v4i8: Hexagon does not have an insn for this one, so we -// use the double sub v8i8, and use only the low part of the result. -def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), - (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>; - -// -// No 32 bit vector mux. -// -def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), - (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; -def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), - (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; - -// -// 64-bit vector mux. -// -def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), - (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; -def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), - (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; -def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), - (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; - -// -// No 32 bit vector compare. -// -def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), - (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - -def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), - (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - - -class InvertCmp_pat - : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), - (InvMI Value:$Rt, Value:$Rs)>; - -// Map from a compare operation to the corresponding instruction with the -// order of operands reversed, e.g. x > y --> cmp.lt(y,x). -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; - -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; - -// Map from vcmpne(Rss) -> !vcmpew(Rss). -// rs != rt -> !(rs == rt). -def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), - (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; - - -// Truncate: from vector B copy all 'E'ven 'B'yte elements: -// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; -def: Pat<(v4i8 (trunc V4I16:$Rs)), - (S2_vtrunehb V4I16:$Rs)>; - -// Truncate: from vector B copy all 'O'dd 'B'yte elements: -// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; -// S2_vtrunohb - -// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: -// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; -// S2_vtruneh - -def: Pat<(v2i16 (trunc V2I32:$Rs)), - (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; - -def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; -def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; -def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; - -// Sign extends a v2i8 into a v2i32. -def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), - (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; - -// Sign extends a v2i16 into a v2i32. -def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), - (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; - - -// Multiplies two v2i16 and returns a v2i32. We are using here the -// saturating multiply, as hexagon does not provide a non saturating -// vector multiply, and saturation does not impact the result that is -// in double precision of the operands. - -// Multiplies two v2i16 vectors: as Hexagon does not have a multiply -// with the C semantics for this one, this pattern uses the half word -// multiply vmpyh that takes two v2i16 and returns a v2i32. This is -// then truncated to fit this back into a v2i16 and to simulate the -// wrap around semantics for unsigned in C. -def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), - (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; - -def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), - (LoReg (S2_vtrunewh (A2_combineii 0, 0), - (vmpyh V2I16:$Rs, V2I16:$Rt)))>; - -// Multiplies two v4i16 vectors. -def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), - (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), - (vmpyh (LoReg $Rs), (LoReg $Rt)))>; - -def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), - (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), - (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; - -// Multiplies two v4i8 vectors. -def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), - (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, - Requires<[HasV5T]>; - -def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), - (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; - -// Multiplies two v8i8 vectors. -def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), - (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), - (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, - Requires<[HasV5T]>; - -def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), - (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), - (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; - -// Truncated store from v4i16 to v4i8. -def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v4i8; }]>; - -// Truncated store from v2i32 to v2i16. -def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v2i16; }]>; - -def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), - (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), - (LoReg $Rs))))>; - -def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), - (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; - - -// Zero and sign extended load from v2i8 into v2i16. -def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v2i8; }]>; - -def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v2i8; }]>; - -def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), - (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; - -def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), - (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; - -def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), - (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; - -def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), - (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; +def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)), + (PS_alloca IntRegs:$Rs, imm:$A)>; +def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; +def: Pat<(HexagonBARRIER), (Y2_barrier)>; // Read cycle counter. -// def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, [SDNPHasChain]>; diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td index 094e0fbcac816..b2d66317b66e6 100644 --- a/lib/Target/Hexagon/HexagonPseudo.td +++ b/lib/Target/Hexagon/HexagonPseudo.td @@ -247,11 +247,14 @@ def PS_aligna : Pseudo<(outs IntRegs:$Rd), (ins u32_0Imm:$A), "", []>; // This simplifies the frame-index elimination code. // let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1, - isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in { + isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0, isExtendable = 1, + isExtentSigned = 1, opExtentBits = 16, opExtentAlign = 0 in { + let opExtendable = 2 in def PS_fi : Pseudo<(outs IntRegs:$Rd), - (ins IntRegs:$fi, s32_0Imm:$off), "">; + (ins IntRegs:$fi, s32_0Imm:$off), "">; + let opExtendable = 3 in def PS_fia : Pseudo<(outs IntRegs:$Rd), - (ins IntRegs:$Rs, IntRegs:$fi, s32_0Imm:$off), "">; + (ins IntRegs:$Rs, IntRegs:$fi, s32_0Imm:$off), "">; } class CondStr { @@ -424,7 +427,7 @@ class LDrivv_template def PS_vloadrw_ai: LDrivv_template, Requires<[HasV60T,UseHVX]>; def PS_vloadrw_nt_ai: LDrivv_template, - Requires<[HasV60T,UseHVXSgl]>; + Requires<[HasV60T,UseHVX]>; def PS_vloadrwu_ai: LDrivv_template, Requires<[HasV60T,UseHVX]>; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 27b3fb72a203b..e491c757670d9 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -118,11 +118,11 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool HasEHReturn = MF->getInfo()->hasEHReturn(); switch (MF->getSubtarget().getHexagonArchVersion()) { - case HexagonSubtarget::V4: - case HexagonSubtarget::V5: - case HexagonSubtarget::V55: - case HexagonSubtarget::V60: - case HexagonSubtarget::V62: + case Hexagon::ArchEnum::V4: + case Hexagon::ArchEnum::V5: + case Hexagon::ArchEnum::V55: + case Hexagon::ArchEnum::V60: + case Hexagon::ArchEnum::V62: return HasEHReturn ? CalleeSavedRegsV3EHReturn : CalleeSavedRegsV3; } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index b2e952a761226..51ef37f39a73b 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -216,25 +216,33 @@ let Namespace = "Hexagon" in { // HVX types -def VecI1 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v512i1, v1024i1, v512i1]>; -def VecI8 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v64i8, v128i8, v64i8]>; -def VecI16 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v32i16, v64i16, v32i16]>; -def VecI32 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v16i32, v32i32, v16i32]>; -def VecI64 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v8i64, v16i64, v8i64]>; -def VecPI8 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v128i8, v256i8, v128i8]>; -def VecPI16 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v64i16, v128i16, v64i16]>; -def VecPI32 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v32i32, v64i32, v32i32]>; -def VecPI64 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], - [v16i64, v32i64, v16i64]>; - +def VecI1 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v512i1, v512i1, v1024i1, v1024i1, v512i1]>; +def VecI8 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v64i8, v64i8, v128i8, v128i8, v64i8]>; +def VecI16 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v32i16, v32i16, v64i16, v64i16, v32i16]>; +def VecI32 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v16i32, v16i32, v32i32, v32i32, v16i32]>; +def VecI64 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v8i64, v8i64, v16i64, v16i64, v8i64]>; +def VecPI8 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v128i8, v128i8, v256i8, v256i8, v128i8]>; +def VecPI16 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v64i16, v64i16, v128i16, v128i16, v64i16]>; +def VecPI32 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v32i32, v32i32, v64i32, v64i32, v32i32]>; +def VecPI64 + : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], + [v16i64, v16i64, v32i64, v32i64, v16i64]>; // Register classes. // diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp index af1bf48b63206..d1816cbc7528a 100644 --- a/lib/Target/Hexagon/HexagonStoreWidening.cpp +++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -1,4 +1,4 @@ -//===--- HexagonStoreWidening.cpp------------------------------------------===// +//===- HexagonStoreWidening.cpp -------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -27,7 +27,6 @@ #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -55,8 +54,8 @@ using namespace llvm; namespace llvm { - FunctionPass *createHexagonStoreWidening(); - void initializeHexagonStoreWideningPass(PassRegistry&); +FunctionPass *createHexagonStoreWidening(); +void initializeHexagonStoreWideningPass(PassRegistry&); } // end namespace llvm @@ -91,8 +90,8 @@ namespace { private: static const int MaxWideSize = 4; - typedef std::vector InstrGroup; - typedef std::vector InstrGroupList; + using InstrGroup = std::vector; + using InstrGroupList = std::vector; bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO); bool instrAliased(InstrGroup &Stores, const MachineInstr *MI); @@ -109,9 +108,15 @@ namespace { bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2); }; +} // end anonymous namespace + char HexagonStoreWidening::ID = 0; -} // end anonymous namespace +INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", + "Hexason Store Widening", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", + "Hexagon Store Widening", false, false) // Some local helper functions... static unsigned getBaseAddressRegister(const MachineInstr *MI) { @@ -143,12 +148,6 @@ static const MachineMemOperand &getStoreTarget(const MachineInstr *MI) { return **MI->memoperands_begin(); } -INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores", - "Hexason Store Widening", false, false) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores", - "Hexagon Store Widening", false, false) - // Filtering function: any stores whose opcodes are not "approved" of by // this function will not be subjected to widening. inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) { diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index cc2e02d94d900..7ec4c34504bda 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -53,14 +53,6 @@ static cl::opt EnableIEEERndNear("enable-hexagon-ieee-rnd-near", static cl::opt EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::ZeroOrMore, cl::init(true)); -static cl::opt EnableHexagonHVXDouble("enable-hexagon-hvx-double", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable Hexagon Double Vector eXtensions")); - -static cl::opt EnableHexagonHVX("enable-hexagon-hvx", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable Hexagon Vector eXtensions")); - static cl::opt EnableTCLatencySched("enable-tc-latency-sched", cl::Hidden, cl::ZeroOrMore, cl::init(false)); @@ -112,12 +104,12 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - static std::map CpuTable { - { "hexagonv4", V4 }, - { "hexagonv5", V5 }, - { "hexagonv55", V55 }, - { "hexagonv60", V60 }, - { "hexagonv62", V62 }, + static std::map CpuTable{ + {"hexagonv4", Hexagon::ArchEnum::V4}, + {"hexagonv5", Hexagon::ArchEnum::V5}, + {"hexagonv55", Hexagon::ArchEnum::V55}, + {"hexagonv60", Hexagon::ArchEnum::V60}, + {"hexagonv62", Hexagon::ArchEnum::V62}, }; auto FoundIt = CpuTable.find(CPUString); @@ -126,8 +118,8 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { else llvm_unreachable("Unrecognized Hexagon processor version"); - UseHVXOps = false; - UseHVXDblOps = false; + UseHVX128BOps = false; + UseHVX64BOps = false; UseLongCalls = false; UseMemOps = DisableMemOps ? false : EnableMemOps; @@ -136,10 +128,6 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { ParseSubtargetFeatures(CPUString, FS); - if (EnableHexagonHVX.getPosition()) - UseHVXOps = EnableHexagonHVX; - if (EnableHexagonHVXDouble.getPosition()) - UseHVXDblOps = EnableHexagonHVXDouble; if (OverrideLongCalls.getPosition()) UseLongCalls = OverrideLongCalls; diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 3cce198d170d8..54cf8e11d0669 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -14,9 +14,10 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H +#include "HexagonDepArch.h" #include "HexagonFrameLowering.h" -#include "HexagonInstrInfo.h" #include "HexagonISelLowering.h" +#include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSelectionDAGInfo.h" #include "llvm/ADT/SmallSet.h" @@ -45,14 +46,13 @@ class Triple; class HexagonSubtarget : public HexagonGenSubtargetInfo { virtual void anchor(); - bool UseMemOps, UseHVXOps, UseHVXDblOps; + bool UseMemOps, UseHVX64BOps, UseHVX128BOps; bool UseLongCalls; bool ModeIEEERndNear; public: -#include "HexagonDepArch.h" - - HexagonArchEnum HexagonArchVersion; + Hexagon::ArchEnum HexagonArchVersion; + Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::V4; /// True if the target should use Back-Skip-Back scheduling. This is the /// default for V60. bool UseBSBScheduling; @@ -113,19 +113,35 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool useMemOps() const { return UseMemOps; } - bool hasV5TOps() const { return getHexagonArchVersion() >= V5; } - bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; } - bool hasV55TOps() const { return getHexagonArchVersion() >= V55; } - bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; } - bool hasV60TOps() const { return getHexagonArchVersion() >= V60; } - bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; } - bool hasV62TOps() const { return getHexagonArchVersion() >= V62; } - bool hasV62TOpsOnly() const { return getHexagonArchVersion() == V62; } + bool hasV5TOps() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V5; + } + bool hasV5TOpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V5; + } + bool hasV55TOps() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V55; + } + bool hasV55TOpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V55; + } + bool hasV60TOps() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V60; + } + bool hasV60TOpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V60; + } + bool hasV62TOps() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V62; + } + bool hasV62TOpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V62; + } bool modeIEEERndNear() const { return ModeIEEERndNear; } - bool useHVXOps() const { return UseHVXOps; } - bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; } - bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; } + bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::V4; } + bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; } + bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; } bool useLongCalls() const { return UseLongCalls; } bool usePredicatedCalls() const; @@ -149,7 +165,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { return Hexagon_SMALL_DATA_THRESHOLD; } - const HexagonArchEnum &getHexagonArchVersion() const { + const Hexagon::ArchEnum &getHexagonArchVersion() const { return HexagonArchVersion; } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 7b2774f8c408e..a66e212a76530 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -28,6 +28,9 @@ using namespace llvm; +static cl::opt EnableCExtOpt("hexagon-cext", cl::Hidden, cl::ZeroOrMore, + cl::init(true), cl::desc("Enable Hexagon constant-extender optimization")); + static cl::opt EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable RDF-based optimizations")); @@ -119,9 +122,11 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", namespace llvm { extern char &HexagonExpandCondsetsID; + void initializeHexagonConstExtendersPass(PassRegistry&); void initializeHexagonEarlyIfConversionPass(PassRegistry&); void initializeHexagonExpandCondsetsPass(PassRegistry&); void initializeHexagonGenMuxPass(PassRegistry&); + void initializeHexagonHardwareLoopsPass(PassRegistry&); void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&); void initializeHexagonNewValueJumpPass(PassRegistry&); @@ -135,6 +140,7 @@ namespace llvm { FunctionPass *createHexagonCallFrameInformation(); FunctionPass *createHexagonCFGOptimizer(); FunctionPass *createHexagonCommonGEP(); + FunctionPass *createHexagonConstExtenders(); FunctionPass *createHexagonConstPropagationPass(); FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonEarlyIfConversion(); @@ -176,8 +182,10 @@ extern "C" void LLVMInitializeHexagonTarget() { RegisterTargetMachine X(getTheHexagonTarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeHexagonConstExtendersPass(PR); initializeHexagonEarlyIfConversionPass(PR); initializeHexagonGenMuxPass(PR); + initializeHexagonHardwareLoopsPass(PR); initializeHexagonLoopIdiomRecognizePass(PR); initializeHexagonVectorLoopCarriedReusePass(PR); initializeHexagonNewValueJumpPass(PR); @@ -340,6 +348,8 @@ bool HexagonPassConfig::addInstSelector() { void HexagonPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { + if (EnableCExtOpt) + addPass(createHexagonConstExtenders()); if (EnableExpandCondsets) insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID); if (!DisableStoreWidening) diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index a6df6afae7871..f43db53950fd3 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -1,4 +1,4 @@ -//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===// +//===- HexagonPacketizer.cpp - VLIW packetizer ----------------------------===// // // The LLVM Compiler Infrastructure // @@ -16,18 +16,38 @@ // prune the dependence. // //===----------------------------------------------------------------------===// + #include "HexagonVLIWPacketizer.h" +#include "Hexagon.h" +#include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" -#include "HexagonTargetMachine.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include using namespace llvm; @@ -51,15 +71,18 @@ static cl::opt DisableVecDblNVStores("disable-vecdbl-nv-stores", extern cl::opt ScheduleInlineAsm; namespace llvm { - FunctionPass *createHexagonPacketizer(); - void initializeHexagonPacketizerPass(PassRegistry&); -} +FunctionPass *createHexagonPacketizer(); +void initializeHexagonPacketizerPass(PassRegistry&); + +} // end namespace llvm namespace { + class HexagonPacketizer : public MachineFunctionPass { public: static char ID; + HexagonPacketizer() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -72,8 +95,10 @@ namespace { AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } + StringRef getPassName() const override { return "Hexagon Packetizer"; } bool runOnMachineFunction(MachineFunction &Fn) override; + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); @@ -84,8 +109,9 @@ namespace { const HexagonRegisterInfo *HRI; }; - char HexagonPacketizer::ID = 0; -} +} // end anonymous namespace + +char HexagonPacketizer::ID = 0; INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer", "Hexagon Packetizer", false, false) @@ -103,9 +129,9 @@ HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, HII = MF.getSubtarget().getInstrInfo(); HRI = MF.getSubtarget().getRegisterInfo(); - addMutation(make_unique()); - addMutation(make_unique()); - addMutation(make_unique()); + addMutation(llvm::make_unique()); + addMutation(llvm::make_unique()); + addMutation(llvm::make_unique()); } // Check if FirstI modifies a register that SecondI reads. @@ -167,7 +193,6 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI, return NextIt; } - bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePacketizer || skipFunction(*MF.getFunction())) return false; @@ -187,7 +212,6 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { // DFA state table should not be empty. assert(Packetizer.getResourceTracker() && "Empty DFA table!"); - // // Loop over all basic blocks and remove KILL pseudo-instructions // These instructions confuse the dependence analysis. Consider: // D0 = ... (Insn 0) @@ -196,7 +220,6 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { // Here, Insn 1 will result in the dependence graph not emitting an output // dependence between Insn 0 and Insn 2. This can lead to incorrect // packetization - // for (auto &MB : MF) { auto End = MB.end(); auto MI = MB.begin(); @@ -239,7 +262,6 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { return true; } - // Reserve resources for a constant extender. Trigger an assertion if the // reservation fails. void HexagonPacketizerList::reserveResourcesForConstExt() { @@ -262,7 +284,6 @@ bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) { return Avail; } - bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI, SDep::Kind DepType, unsigned DepReg) { // Check for LR dependence. @@ -308,7 +329,6 @@ static bool isControlFlow(const MachineInstr &MI) { return MI.getDesc().isTerminator() || MI.getDesc().isCall(); } - /// Returns true if the instruction modifies a callee-saved register. static bool doesModifyCalleeSavedReg(const MachineInstr &MI, const TargetRegisterInfo *TRI) { @@ -423,7 +443,7 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI, bool HexagonPacketizerList::promoteToDotNew(MachineInstr &MI, SDep::Kind DepType, MachineBasicBlock::iterator &MII, const TargetRegisterClass* RC) { - assert (DepType == SDep::Data); + assert(DepType == SDep::Data); int NewOpcode; if (RC == &Hexagon::PredRegsRegClass) NewOpcode = HII->getDotNewPredOp(MI, MBPI); @@ -476,6 +496,48 @@ void HexagonPacketizerList::useCalleesSP(MachineInstr &MI) { Off.setImm(Off.getImm() + FrameSize + HEXAGON_LRFP_SIZE); } +/// Return true if we can update the offset in MI so that MI and MJ +/// can be packetized together. +bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) { + assert(SUI->getInstr() && SUJ->getInstr()); + MachineInstr &MI = *SUI->getInstr(); + MachineInstr &MJ = *SUJ->getInstr(); + + unsigned BPI, OPI; + if (!HII->getBaseAndOffsetPosition(MI, BPI, OPI)) + return false; + unsigned BPJ, OPJ; + if (!HII->getBaseAndOffsetPosition(MJ, BPJ, OPJ)) + return false; + unsigned Reg = MI.getOperand(BPI).getReg(); + if (Reg != MJ.getOperand(BPJ).getReg()) + return false; + // Make sure that the dependences do not restrict adding MI to the packet. + // That is, ignore anti dependences, and make sure the only data dependence + // involves the specific register. + for (const auto &PI : SUI->Preds) + if (PI.getKind() != SDep::Anti && + (PI.getKind() != SDep::Data || PI.getReg() != Reg)) + return false; + int Incr; + if (!HII->getIncrementValue(MJ, Incr)) + return false; + + int64_t Offset = MI.getOperand(OPI).getImm(); + MI.getOperand(OPI).setImm(Offset + Incr); + ChangedOffset = Offset; + return true; +} + +/// Undo the changed offset. This is needed if the instruction cannot be +/// added to the current packet due to a different instruction. +void HexagonPacketizerList::undoChangedOffset(MachineInstr &MI) { + unsigned BP, OP; + if (!HII->getBaseAndOffsetPosition(MI, BP, OP)) + llvm_unreachable("Unable to find base and offset operands."); + MI.getOperand(OP).setImm(ChangedOffset); +} + enum PredicateKind { PK_False, PK_True, @@ -551,7 +613,6 @@ static const MachineOperand &getAbsSetOperand(const MachineInstr &MI) { return MI.getOperand(1); } - // Can be new value store? // Following restrictions are to be respected in convert a store into // a new value store. @@ -869,7 +930,6 @@ bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr &MI, return false; } - /// Gets the predicate register of a predicated instruction. static unsigned getPredicatedRegister(MachineInstr &MI, const HexagonInstrInfo *QII) { @@ -962,6 +1022,7 @@ void HexagonPacketizerList::initPacketizerState() { GlueToNewValueJump = false; GlueAllocframeStore = false; FoundSequentialDependence = false; + ChangedOffset = INT64_MAX; } // Ignore bundling of pseudo instructions. @@ -1015,7 +1076,6 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) { return false; } - // Quick check if instructions MI and MJ cannot coexist in the same packet. // Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm", // but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm". @@ -1063,7 +1123,6 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ, return false; } - // Full, symmetric check. bool HexagonPacketizerList::cannotCoexist(const MachineInstr &MI, const MachineInstr &MJ) { @@ -1279,11 +1338,9 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { if (NOp1.isReg() && I.getOperand(0).getReg() == NOp1.getReg()) secondRegMatch = true; - for (auto T : CurrentPacketMIs) { - SUnit *PacketSU = MIToSUnit.find(T)->second; - MachineInstr &PI = *PacketSU->getInstr(); + for (MachineInstr *PI : CurrentPacketMIs) { // NVJ can not be part of the dual jump - Arch Spec: section 7.8. - if (PI.isCall()) { + if (PI->isCall()) { Dependence = true; break; } @@ -1295,22 +1352,22 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // 3. If the second operand of the nvj is newified, (which means // first operand is also a reg), first reg is not defined in // the same packet. - if (PI.getOpcode() == Hexagon::S2_allocframe || PI.mayStore() || - HII->isLoopN(PI)) { + if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() || + HII->isLoopN(*PI)) { Dependence = true; break; } // Check #2/#3. const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1; - if (OpR.isReg() && PI.modifiesRegister(OpR.getReg(), HRI)) { + if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) { Dependence = true; break; } } + GlueToNewValueJump = true; if (Dependence) return false; - GlueToNewValueJump = true; } // There no dependency between a prolog instruction and its successor. @@ -1442,7 +1499,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { if (StoreJ) { // Two stores are only allowed on V4+. Load following store is never // allowed. - if (LoadI) { + if (LoadI && alias(J, I)) { FoundSequentialDependence = true; break; } @@ -1551,6 +1608,23 @@ bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { useCalleesSP(I); GlueAllocframeStore = false; } + + if (ChangedOffset != INT64_MAX) + undoChangedOffset(I); + + if (GlueToNewValueJump) { + // Putting I and J together would prevent the new-value jump from being + // packetized with the producer. In that case I and J must be separated. + GlueToNewValueJump = false; + return false; + } + + if (ChangedOffset == INT64_MAX && updateOffset(SUI, SUJ)) { + FoundSequentialDependence = false; + Dependence = false; + return true; + } + return false; } @@ -1559,7 +1633,7 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) { MachineBasicBlock::iterator MII = MI.getIterator(); MachineBasicBlock *MBB = MI.getParent(); - if (CurrentPacketMIs.size() == 0) + if (CurrentPacketMIs.empty()) PacketStalls = false; PacketStalls |= producesStall(MI); @@ -1637,7 +1711,6 @@ bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { return !producesStall(MI); } - // V60 forward scheduling. bool HexagonPacketizerList::producesStall(const MachineInstr &I) { // If the packet already stalls, then ignore the stall from a subsequent diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h index adb92b6dc8557..cbdd2367429d4 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -1,18 +1,33 @@ -#ifndef HEXAGONVLIWPACKETIZER_H -#define HEXAGONVLIWPACKETIZER_H +//===- HexagonPacketizer.h - VLIW packetizer --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONVLIWPACKETIZER_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONVLIWPACKETIZER_H #include "llvm/CodeGen/DFAPacketizer.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include namespace llvm { + class HexagonInstrInfo; class HexagonRegisterInfo; +class MachineBranchProbabilityInfo; +class MachineFunction; +class MachineInstr; +class MachineLoopInfo; +class TargetRegisterClass; class HexagonPacketizerList : public VLIWPacketizerList { // Vector of instructions assigned to the packet that has just been created. - std::vector OldPacketMIs; + std::vector OldPacketMIs; // Has the instruction been promoted to a dot-new instruction. bool PromotedToDotNew; @@ -23,6 +38,9 @@ class HexagonPacketizerList : public VLIWPacketizerList { // Has the feeder instruction been glued to new value jump. bool GlueToNewValueJump; + // This holds the offset value, when pruning the dependences. + int64_t ChangedOffset; + // Check if there is a dependence between some instruction already in this // packet and this instruction. bool Dependence; @@ -48,7 +66,6 @@ class HexagonPacketizerList : public VLIWPacketizerList { const HexagonRegisterInfo *HRI; public: - // Ctor. HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, const MachineBranchProbabilityInfo *MBPI); @@ -103,14 +120,18 @@ class HexagonPacketizerList : public VLIWPacketizerList { bool demoteToDotOld(MachineInstr &MI); bool useCallersSP(MachineInstr &MI); void useCalleesSP(MachineInstr &MI); + bool updateOffset(SUnit *SUI, SUnit *SUJ); + void undoChangedOffset(MachineInstr &MI); bool arePredicatesComplements(MachineInstr &MI1, MachineInstr &MI2); bool restrictingDepExistInPacket(MachineInstr&, unsigned); bool isNewifiable(const MachineInstr &MI, const TargetRegisterClass *NewRC); bool isCurifiable(MachineInstr &MI); bool cannotCoexist(const MachineInstr &MI, const MachineInstr &MJ); - inline bool isPromotedToDotNew() const { + + bool isPromotedToDotNew() const { return PromotedToDotNew; } + bool tryAllocateResourcesForConstExt(bool Reserve); bool canReserveResourcesForConstExt(); void reserveResourcesForConstExt(); @@ -120,6 +141,7 @@ class HexagonPacketizerList : public VLIWPacketizerList { bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J); bool producesStall(const MachineInstr &MI); }; -} // namespace llvm -#endif // HEXAGONVLIWPACKETIZER_H +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONVLIWPACKETIZER_H diff --git a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp index 77dc5f5eee740..a0fdc70e141a5 100644 --- a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp +++ b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -6,6 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// // This pass removes the computation of provably redundant expressions that have // been computed earlier in a previous iteration. It relies on the use of PHIs // to identify loop carried dependences. This is scalar replacement for vector @@ -112,23 +113,42 @@ // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of // iterations of carried dependence + 1. // 2. All instructions in the DepChain except the last are PHIs. +// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexagon-vlcr" - #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Triple.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/ADT/Statistic.h" -#include +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include +#include +#include #include +#include +#include + using namespace llvm; +#define DEBUG_TYPE "hexagon-vlcr" + STATISTIC(HexagonNumVectorLoopCarriedReuse, "Number of values that were reused from a previous iteration."); @@ -136,17 +156,24 @@ static cl::opt HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim", cl::Hidden, cl::desc("Maximum distance of loop carried dependences that are handled"), cl::init(2), cl::ZeroOrMore); + namespace llvm { - void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&); - Pass *createHexagonVectorLoopCarriedReusePass(); -} + +void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&); +Pass *createHexagonVectorLoopCarriedReusePass(); + +} // end namespace llvm + namespace { + // See info about DepChain in the comments at the top of this file. - typedef SmallVector ChainOfDependences; + using ChainOfDependences = SmallVector; + class DepChain { ChainOfDependences Chain; + public: - bool isIdentical(DepChain &Other) { + bool isIdentical(DepChain &Other) const { if (Other.size() != size()) return false; ChainOfDependences &OtherChain = Other.getChain(); @@ -156,30 +183,39 @@ namespace { } return true; } + ChainOfDependences &getChain() { return Chain; } - int size() { + + int size() const { return Chain.size(); } + void clear() { Chain.clear(); } + void push_back(Instruction *I) { Chain.push_back(I); } - int iterations() { + + int iterations() const { return size() - 1; } - Instruction *front() { + + Instruction *front() const { return Chain.front(); } - Instruction *back() { + + Instruction *back() const { return Chain.back(); } + Instruction *&operator[](const int index) { return Chain[index]; } + friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D); }; @@ -194,19 +230,21 @@ namespace { OS << *CD[ChainSize-1] << "\n"; return OS; } -} -namespace { + struct ReuseValue { - Instruction *Inst2Replace; + Instruction *Inst2Replace = nullptr; + // In the new PHI node that we'll construct this is the value that'll be // used over the backedge. This is teh value that gets reused from a // previous iteration. - Instruction * BackedgeInst; - ReuseValue() : Inst2Replace(nullptr), BackedgeInst(nullptr) {}; + Instruction *BackedgeInst = nullptr; + + ReuseValue() = default; + void reset() { Inst2Replace = nullptr; BackedgeInst = nullptr; } bool isDefined() { return Inst2Replace != nullptr; } }; - typedef struct ReuseValue ReuseValue; + LLVM_ATTRIBUTE_UNUSED raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) { OS << "** ReuseValue ***\n"; @@ -214,21 +252,21 @@ namespace { OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n"; return OS; } -} -namespace { class HexagonVectorLoopCarriedReuse : public LoopPass { public: static char ID; + explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) { PassRegistry *PR = PassRegistry::getPassRegistry(); initializeHexagonVectorLoopCarriedReusePass(*PR); } + StringRef getPassName() const override { return "Hexagon-specific loop carried reuse for HVX vectors"; } - void getAnalysisUsage(AnalysisUsage &AU) const override { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequiredID(LoopSimplifyID); AU.addRequiredID(LCSSAID); @@ -254,9 +292,9 @@ namespace { DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2); bool isEquivalentOperation(Instruction *I1, Instruction *I2); bool canReplace(Instruction *I); - }; -} + +} // end anonymous namespace char HexagonVectorLoopCarriedReuse::ID = 0; @@ -276,7 +314,7 @@ bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) { return false; // Work only on innermost loops. - if (L->getSubLoops().size() != 0) + if (!L->getSubLoops().empty()) return false; // Work only on single basic blocks loops. @@ -302,6 +340,21 @@ bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1, return false; } } + + // If both the Instructions are of Vector Type and any of the element + // is integer constant, check their values too for equivalence. + if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) { + unsigned NumOperands = I1->getNumOperands(); + for (unsigned i = 0; i < NumOperands; ++i) { + ConstantInt *C1 = dyn_cast(I1->getOperand(i)); + ConstantInt *C2 = dyn_cast(I2->getOperand(i)); + if(!C1) continue; + assert(C2); + if (C1->getSExtValue() != C2->getSExtValue()) + return false; + } + } + return true; } @@ -396,8 +449,8 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() { } } ReuseCandidate.reset(); - return; } + Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, BasicBlock *BB) { PHINode *PN = dyn_cast(Op); @@ -405,6 +458,7 @@ Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, Value *ValueInBlock = PN->getIncomingValueForBlock(BB); return ValueInBlock; } + void HexagonVectorLoopCarriedReuse::reuseValue() { DEBUG(dbgs() << ReuseCandidate); Instruction *Inst2Replace = ReuseCandidate.Inst2Replace; @@ -476,12 +530,12 @@ void HexagonVectorLoopCarriedReuse::reuseValue() { } bool HexagonVectorLoopCarriedReuse::doVLCR() { - assert((CurLoop->getSubLoops().size() == 0) && + assert(CurLoop->getSubLoops().empty() && "Can do VLCR on the innermost loop only"); assert((CurLoop->getNumBlocks() == 1) && "Can do VLCR only on single block loops"); - bool Changed; + bool Changed = false; bool Continue; DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n"); @@ -502,6 +556,7 @@ bool HexagonVectorLoopCarriedReuse::doVLCR() { } while (Continue); return Changed; } + void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, DepChain &D) { PHINode *PN = dyn_cast(I); @@ -536,7 +591,6 @@ void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I, D.push_back(PN); findDepChainFromPHI(BEInst, D); } - return; } bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1, @@ -548,6 +602,7 @@ bool HexagonVectorLoopCarriedReuse::isDepChainBtwn(Instruction *I1, } return false; } + DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1, Instruction *I2) { for (auto *D : Dependences) { @@ -556,6 +611,7 @@ DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1, } return nullptr; } + void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { BasicBlock *BB = CurLoop->getHeader(); for (auto I = BB->begin(), E = BB->end(); I != E && isa(I); ++I) { @@ -575,6 +631,7 @@ void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { dbgs() << *Dependences[i] << "\n"; }); } + Pass *llvm::createHexagonVectorLoopCarriedReusePass() { return new HexagonVectorLoopCarriedReuse(); } diff --git a/lib/Target/Hexagon/HexagonVectorPrint.cpp b/lib/Target/Hexagon/HexagonVectorPrint.cpp index 085d4645df064..45931a930dc38 100644 --- a/lib/Target/Hexagon/HexagonVectorPrint.cpp +++ b/lib/Target/Hexagon/HexagonVectorPrint.cpp @@ -1,4 +1,4 @@ -//===-- HexagonVectorPrint.cpp - Generate vector printing instructions -===// +//===- HexagonVectorPrint.cpp - Generate vector printing instructions -----===// // // The LLVM Compiler Infrastructure // @@ -13,8 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexagon-vector-print" - #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "llvm/ADT/StringRef.h" @@ -31,34 +29,36 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOpcodes.h" #include #include using namespace llvm; +#define DEBUG_TYPE "hexagon-vector-print" + static cl::opt TraceHexVectorStoresOnly("trace-hex-vector-stores-only", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enables tracing of vector stores")); namespace llvm { - FunctionPass *createHexagonVectorPrint(); - void initializeHexagonVectorPrintPass(PassRegistry&); +FunctionPass *createHexagonVectorPrint(); +void initializeHexagonVectorPrintPass(PassRegistry&); } // end namespace llvm namespace { class HexagonVectorPrint : public MachineFunctionPass { - const HexagonSubtarget *QST; - const HexagonInstrInfo *QII; - const HexagonRegisterInfo *QRI; + const HexagonSubtarget *QST = nullptr; + const HexagonInstrInfo *QII = nullptr; + const HexagonRegisterInfo *QRI = nullptr; public: static char ID; - HexagonVectorPrint() - : MachineFunctionPass(ID), QST(nullptr), QII(nullptr), QRI(nullptr) { + HexagonVectorPrint() : MachineFunctionPass(ID) { initializeHexagonVectorPrintPass(*PassRegistry::getPassRegistry()); } @@ -67,10 +67,10 @@ class HexagonVectorPrint : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &Fn) override; }; -char HexagonVectorPrint::ID = 0; - } // end anonymous namespace +char HexagonVectorPrint::ID = 0; + static bool isVecReg(unsigned Reg) { return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) || (Reg >= Hexagon::W0 && Reg <= Hexagon::W15) @@ -97,7 +97,6 @@ static void addAsmInstr(MachineBasicBlock *MBB, unsigned Reg, MachineBasicBlock::instr_iterator I, const DebugLoc &DL, const HexagonInstrInfo *QII, MachineFunction &Fn) { - std::string VDescStr = ".long 0x1dffe0" + getStringReg(Reg); const char *cstr = Fn.createExternalSymbolName(VDescStr); unsigned ExtraInfo = InlineAsm::Extra_HasSideEffects; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 2a0edda8dcee8..31da9fa06d000 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -65,7 +65,8 @@ class HexagonAsmBackend : public MCAsmBackend { OSABI(OSABI), CPU(CPU), MCII(T.createMCInstrInfo()), RelaxTarget(new MCInst *), Extender(nullptr) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createHexagonELFObjectWriter(OS, OSABI, CPU); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index b975e31310946..12aa1bd9b2a0d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/HexagonFixupKinds.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -297,9 +298,9 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx, } } -MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI, - StringRef CPU) { - MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true); +std::unique_ptr +llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, + StringRef CPU) { + auto MOTW = llvm::make_unique(OSABI, CPU); + return createELFObjectWriter(std::move(MOTW), OS, /*IsLittleEndian*/ true); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 47007e08a2ff9..691e269cb91f5 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -18,7 +18,9 @@ #include "MCTargetDesc/HexagonMCShuffler.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -43,6 +45,19 @@ static cl::opt GPSize cl::Prefix, cl::init(8)); +HexagonMCELFStreamer::HexagonMCELFStreamer( + MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, std::unique_ptr Emitter) + : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)), + MCII(createHexagonMCInstrInfo()) {} + +HexagonMCELFStreamer::HexagonMCELFStreamer( + MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, std::unique_ptr Emitter, + MCAssembler *Assembler) + : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)), + MCII(createHexagonMCInstrInfo()) {} + void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB, const MCSubtargetInfo &STI, bool) { assert(MCB.getOpcode() == Hexagon::BUNDLE); @@ -149,10 +164,11 @@ void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, namespace llvm { - MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context, - MCAsmBackend &MAB, - raw_pwrite_stream &OS, MCCodeEmitter *CE) { - return new HexagonMCELFStreamer(Context, MAB, OS, CE); +MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context, + std::unique_ptr MAB, + raw_pwrite_stream &OS, + std::unique_ptr CE) { + return new HexagonMCELFStreamer(Context, std::move(MAB), OS, std::move(CE)); } } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h index 024dff1a2f97c..c6fa0021d86b5 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h @@ -22,17 +22,14 @@ class HexagonMCELFStreamer : public MCELFStreamer { std::unique_ptr MCII; public: - HexagonMCELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_pwrite_stream &OS, MCCodeEmitter *Emitter) - : MCELFStreamer(Context, TAB, OS, Emitter), - MCII(createHexagonMCInstrInfo()) {} - - HexagonMCELFStreamer(MCContext &Context, - MCAsmBackend &TAB, - raw_pwrite_stream &OS, MCCodeEmitter *Emitter, - MCAssembler *Assembler) : - MCELFStreamer(Context, TAB, OS, Emitter), - MCII (createHexagonMCInstrInfo()) {} + HexagonMCELFStreamer(MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter); + + HexagonMCELFStreamer(MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter, + MCAssembler *Assembler); void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool) override; @@ -45,8 +42,9 @@ class HexagonMCELFStreamer : public MCELFStreamer { }; MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context, - MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *CE); + std::unique_ptr MAB, + raw_pwrite_stream &OS, + std::unique_ptr CE); } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 1a361548f9386..6f48169be8cfd 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -20,6 +20,8 @@ #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCELFStreamer.h" @@ -224,13 +226,13 @@ createMCAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, return new HexagonTargetAsmStreamer(S, OS, IsVerboseAsm, *IP); } -static MCStreamer *createMCStreamer(Triple const &T, - MCContext &Context, - MCAsmBackend &MAB, +static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context, + std::unique_ptr &&MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, + std::unique_ptr &&Emitter, bool RelaxAll) { - return createHexagonELFStreamer(T, Context, MAB, OS, Emitter); + return createHexagonELFStreamer(T, Context, std::move(MAB), OS, + std::move(Emitter)); } static MCTargetStreamer * @@ -286,7 +288,7 @@ MCSubtargetInfo *Hexagon_MC::createHexagonMCSubtargetInfo(const Triple &TT, } MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(TT, CPUName, ArchFS); - if (X->getFeatureBits()[Hexagon::ExtensionHVXDbl]) { + if (X->getFeatureBits()[Hexagon::ExtensionHVX128B]) { llvm::FeatureBitset Features = X->getFeatureBits(); X->setFeatureBits(Features.set(Hexagon::ExtensionHVX)); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 6bb69be6142e5..30d75dbc84e2c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -63,8 +63,9 @@ MCAsmBackend *createHexagonAsmBackend(const Target &T, const Triple &TT, StringRef CPU, const MCTargetOptions &Options); -MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI, StringRef CPU); +std::unique_ptr +createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, + StringRef CPU); unsigned HexagonGetLastSlot(); diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index 6d9e234ee1425..de58ddff3397c 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -903,15 +903,18 @@ void DataFlowGraph::build(unsigned Options) { NodeList Blocks = Func.Addr->members(*this); // Collect information about block references. - BlockRefsMap RefM; - buildBlockRefs(EA, RefM); + RegisterSet AllRefs; + for (NodeAddr BA : Blocks) + for (NodeAddr IA : BA.Addr->members(*this)) + for (NodeAddr RA : IA.Addr->members(*this)) + AllRefs.insert(RA.Addr->getRegRef(*this)); // Collect function live-ins and entry block live-ins. MachineRegisterInfo &MRI = MF.getRegInfo(); MachineBasicBlock &EntryB = *EA.Addr->getCode(); assert(EntryB.pred_empty() && "Function entry block has predecessors"); - for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) - LiveIns.insert(RegisterRef(I->first)); + for (std::pair P : MRI.liveins()) + LiveIns.insert(RegisterRef(P.first)); if (MRI.tracksLiveness()) { for (auto I : EntryB.liveins()) LiveIns.insert(RegisterRef(I.PhysReg, I.LaneMask)); @@ -964,9 +967,9 @@ void DataFlowGraph::build(unsigned Options) { // of references that will require phi definitions in that block. BlockRefsMap PhiM; for (NodeAddr BA : Blocks) - recordDefsForDF(PhiM, RefM, BA); + recordDefsForDF(PhiM, BA); for (NodeAddr BA : Blocks) - buildPhis(PhiM, RefM, BA); + buildPhis(PhiM, AllRefs, BA); // Link all the refs. This will recursively traverse the dominator tree. DefStackMap DM; @@ -1394,29 +1397,9 @@ void DataFlowGraph::buildStmt(NodeAddr BA, MachineInstr &In) { } } -// Build a map that for each block will have the set of all references from -// that block, and from all blocks dominated by it. -void DataFlowGraph::buildBlockRefs(NodeAddr BA, - BlockRefsMap &RefM) { - RegisterSet &Refs = RefM[BA.Id]; - MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode()); - assert(N); - for (auto I : *N) { - MachineBasicBlock *SB = I->getBlock(); - NodeAddr SBA = findBlock(SB); - buildBlockRefs(SBA, RefM); - const RegisterSet &RefsS = RefM[SBA.Id]; - Refs.insert(RefsS.begin(), RefsS.end()); - } - - for (NodeAddr IA : BA.Addr->members(*this)) - for (NodeAddr RA : IA.Addr->members(*this)) - Refs.insert(RA.Addr->getRegRef(*this)); -} - // Scan all defs in the block node BA and record in PhiM the locations of // phi nodes corresponding to these defs. -void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM, +void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, NodeAddr BA) { // Check all defs from block BA and record them in each block in BA's // iterated dominance frontier. This information will later be used to @@ -1446,14 +1429,6 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM, IDF.insert(F->second.begin(), F->second.end()); } - // Get the register references that are reachable from this block. - RegisterSet &Refs = RefM[BA.Id]; - for (auto DB : IDF) { - NodeAddr DBA = findBlock(DB); - const RegisterSet &RefsD = RefM[DBA.Id]; - Refs.insert(RefsD.begin(), RefsD.end()); - } - // Finally, add the set of defs to each block in the iterated dominance // frontier. for (auto DB : IDF) { @@ -1464,7 +1439,7 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM, // Given the locations of phi nodes in the map PhiM, create the phi nodes // that are located in the block node BA. -void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM, +void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs, NodeAddr BA) { // Check if this blocks has any DF defs, i.e. if there are any defs // that this block is in the iterated dominance frontier of. @@ -1488,9 +1463,8 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM, MaxDF.insert(MaxCoverIn(I, HasDF->second)); std::vector MaxRefs; - RegisterSet &RefB = RefM[BA.Id]; for (RegisterRef I : MaxDF) - MaxRefs.push_back(MaxCoverIn(I, RefB)); + MaxRefs.push_back(MaxCoverIn(I, AllRefs)); // Now, for each R in MaxRefs, get the alias closure of R. If the closure // only has R in it, create a phi a def for R. Otherwise, create a phi, diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h index b1366c7ffecf1..399b401c5ff6e 100644 --- a/lib/Target/Hexagon/RDFGraph.h +++ b/lib/Target/Hexagon/RDFGraph.h @@ -846,10 +846,8 @@ namespace rdf { using BlockRefsMap = std::map; void buildStmt(NodeAddr BA, MachineInstr &In); - void buildBlockRefs(NodeAddr BA, BlockRefsMap &RefM); - void recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM, - NodeAddr BA); - void buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM, + void recordDefsForDF(BlockRefsMap &PhiM, NodeAddr BA); + void buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs, NodeAddr BA); void removeUnusedPhis(); diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 1394ac7210f2f..3f24c3ef39020 100644 --- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -36,7 +36,7 @@ #include #include -namespace llvm { +using namespace llvm; // Auto-generated by TableGen static unsigned MatchRegisterName(StringRef Name); @@ -85,7 +85,7 @@ class LanaiAsmParser : public MCTargetAsmParser { public: LanaiAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI), Parser(Parser), + : MCTargetAsmParser(Options, STI, MII), Parser(Parser), Lexer(Parser.getLexer()), SubtargetInfo(STI) { setAvailableFeatures( ComputeAvailableFeatures(SubtargetInfo.getFeatureBits())); @@ -1226,5 +1226,3 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/, extern "C" void LLVMInitializeLanaiAsmParser() { RegisterMCAsmParser x(getTheLanaiTarget()); } - -} // end namespace llvm diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp index bbce5f670c99e..c4935746f5ad1 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp @@ -53,7 +53,8 @@ class LanaiAsmBackend : public MCAsmBackend { const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsResolved) const override; - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override; // No instruction requires relaxation bool fixupNeedsRelaxation(const MCFixup & /*Fixup*/, uint64_t /*Value*/, @@ -126,7 +127,7 @@ void LanaiAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, } } -MCObjectWriter * +std::unique_ptr LanaiAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createLanaiELFObjectWriter(OS, MCELFObjectTargetWriter::getOSABI(OSType)); diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp index 64cd3342ac18b..3c40176d2f60c 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/LanaiFixupKinds.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -86,8 +87,8 @@ bool LanaiELFObjectWriter::needsRelocateWithSymbol(const MCSymbol & /*SD*/, } } -MCObjectWriter *llvm::createLanaiELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI) { - MCELFObjectTargetWriter *MOTW = new LanaiELFObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); +std::unique_ptr +llvm::createLanaiELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { + return createELFObjectWriter(llvm::make_unique(OSABI), + OS, /*IsLittleEndian=*/false); } diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp index bcbde2b8b7947..74d929450ed25 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp @@ -60,12 +60,15 @@ createLanaiMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { } static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, - MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll) { + std::unique_ptr &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, + bool RelaxAll) { if (!T.isOSBinFormatELF()) llvm_unreachable("OS not supported"); - return createELFStreamer(Context, MAB, OS, Emitter, RelaxAll); + return createELFStreamer(Context, std::move(MAB), OS, std::move(Emitter), + RelaxAll); } static MCInstPrinter *createLanaiMCInstPrinter(const Triple & /*T*/, diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h index 8adaf4cea4202..5bc84ad83870f 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h +++ b/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h @@ -42,8 +42,8 @@ MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TheTriple, StringRef CPU, const MCTargetOptions &Options); -MCObjectWriter *createLanaiELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI); +std::unique_ptr +createLanaiELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); } // namespace llvm // Defines symbolic names for Lanai registers. This defines a mapping from diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 9bbb430962eca..59f78ed5cd63f 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -473,7 +473,7 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, sti), + : MCTargetAsmParser(Options, sti, MII), ABI(MipsABIInfo::computeTargetABI(Triple(sti.getTargetTriple()), sti.getCPU(), Options)) { MCAsmParserExtension::Initialize(parser); @@ -5859,14 +5859,21 @@ OperandMatchResultTy MipsAsmParser::parseInvNum(OperandVector &Operands) { MCAsmParser &Parser = getParser(); const MCExpr *IdVal; - // If the first token is '$' we may have register operand. - if (Parser.getTok().is(AsmToken::Dollar)) - return MatchOperand_NoMatch; + // If the first token is '$' we may have register operand. We have to reject + // cases where it is not a register. Complicating the matter is that + // register names are not reserved across all ABIs. + // Peek past the dollar to see if it's a register name for this ABI. SMLoc S = Parser.getTok().getLoc(); + if (Parser.getTok().is(AsmToken::Dollar)) { + return matchCPURegisterName(Parser.getLexer().peekTok().getString()) == -1 + ? MatchOperand_ParseFail + : MatchOperand_NoMatch; + } if (getParser().parseExpression(IdVal)) return MatchOperand_ParseFail; const MCConstantExpr *MCE = dyn_cast(IdVal); - assert(MCE && "Unexpected MCExpr type."); + if (!MCE) + return MatchOperand_NoMatch; int64_t Val = MCE->getValue(); SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(MipsOperand::CreateImm( @@ -6584,6 +6591,10 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) { setFeatureBits(Mips::FeatureDSP, "dsp"); getTargetStreamer().emitDirectiveSetDsp(); break; + case Mips::FeatureDSPR2: + setFeatureBits(Mips::FeatureDSPR2, "dspr2"); + getTargetStreamer().emitDirectiveSetDspr2(); + break; case Mips::FeatureMicroMips: setFeatureBits(Mips::FeatureMicroMips, "micromips"); getTargetStreamer().emitDirectiveSetMicroMips(); @@ -6928,6 +6939,8 @@ bool MipsAsmParser::parseDirectiveSet() { return parseSetFeature(Mips::FeatureMips64r6); } else if (Tok.getString() == "dsp") { return parseSetFeature(Mips::FeatureDSP); + } else if (Tok.getString() == "dspr2") { + return parseSetFeature(Mips::FeatureDSPR2); } else if (Tok.getString() == "nodsp") { return parseSetNoDspDirective(); } else if (Tok.getString() == "msa") { diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 15195564301fd..002fa512b2132 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -1283,9 +1283,9 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, return Result; } - if (hasMips32r6() && isFP64()) { - DEBUG(dbgs() << "Trying MicroMips32r6FP64 table (32-bit opcodes):\n"); - Result = decodeInstruction(DecoderTableMicroMips32r6FP6432, Instr, Insn, + if (isFP64()) { + DEBUG(dbgs() << "Trying MicroMipsFP64 table (32-bit opcodes):\n"); + Result = decodeInstruction(DecoderTableMicroMipsFP6432, Instr, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { Size = 4; @@ -1368,6 +1368,14 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size, return Result; } + if (isFP64()) { + DEBUG(dbgs() << "Trying MipsFP64 (64 bit FPU) table (32-bit opcodes):\n"); + Result = decodeInstruction(DecoderTableMipsFP6432, Instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) + return Result; + } + DEBUG(dbgs() << "Trying Mips table (32-bit opcodes):\n"); // Calling the auto-generated decoder function. Result = diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 512267320c118..1ad524c069696 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -210,7 +210,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return Value; } -MCObjectWriter * +std::unique_ptr MipsAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createMipsELFObjectWriter(OS, TheTriple, IsN32); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index 409d4e2bf92d1..406b820edae57 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -37,7 +37,8 @@ class MipsAsmBackend : public MCAsmBackend { StringRef CPU, bool N32) : TheTriple(TT), IsLittle(TT.isLittleEndian()), IsN32(N32) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 4339eca93f147..6d2f098a6b32a 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" @@ -655,12 +656,13 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, } } -MCObjectWriter *llvm::createMipsELFObjectWriter(raw_pwrite_stream &OS, - const Triple &TT, bool IsN32) { +std::unique_ptr +llvm::createMipsELFObjectWriter(raw_pwrite_stream &OS, const Triple &TT, + bool IsN32) { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); bool IsN64 = TT.isArch64Bit() && !IsN32; bool HasRelocationAddend = TT.isArch64Bit(); - auto *MOTW = new MipsELFObjectWriter(OSABI, HasRelocationAddend, IsN64, - TT.isLittleEndian()); - return createELFObjectWriter(MOTW, OS, TT.isLittleEndian()); + auto MOTW = llvm::make_unique( + OSABI, HasRelocationAddend, IsN64, TT.isLittleEndian()); + return createELFObjectWriter(std::move(MOTW), OS, TT.isLittleEndian()); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp index f658aadff22fe..4b8f9c7a680c1 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp @@ -11,7 +11,9 @@ #include "MipsOptionRecord.h" #include "MipsTargetStreamer.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbolELF.h" @@ -19,6 +21,16 @@ using namespace llvm; +MipsELFStreamer::MipsELFStreamer(MCContext &Context, + std::unique_ptr MAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter) + : MCELFStreamer(Context, std::move(MAB), OS, std::move(Emitter)) { + RegInfoRecord = new MipsRegInfoRecord(this, Context); + MipsOptionRecords.push_back( + std::unique_ptr(RegInfoRecord)); +} + void MipsELFStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, bool) { MCELFStreamer::EmitInstruction(Inst, STI); @@ -77,10 +89,9 @@ void MipsELFStreamer::EmitMipsOptionRecords() { I->EmitMipsOptionRecord(); } -MCELFStreamer *llvm::createMipsELFStreamer(MCContext &Context, - MCAsmBackend &MAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll) { - return new MipsELFStreamer(Context, MAB, OS, Emitter); +MCELFStreamer *llvm::createMipsELFStreamer( + MCContext &Context, std::unique_ptr MAB, + raw_pwrite_stream &OS, std::unique_ptr Emitter, + bool RelaxAll) { + return new MipsELFStreamer(Context, std::move(MAB), OS, std::move(Emitter)); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h index f5eda112817ef..2fe9b08b645ac 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h +++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h @@ -33,13 +33,9 @@ class MipsELFStreamer : public MCELFStreamer { SmallVector Labels; public: - MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter) - : MCELFStreamer(Context, MAB, OS, Emitter) { - RegInfoRecord = new MipsRegInfoRecord(this, Context); - MipsOptionRecords.push_back( - std::unique_ptr(RegInfoRecord)); - } + MipsELFStreamer(MCContext &Context, std::unique_ptr MAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter); /// Overriding this function allows us to add arbitrary behaviour before the /// \p Inst is actually emitted. For example, we can inspect the operands and @@ -69,9 +65,11 @@ class MipsELFStreamer : public MCELFStreamer { void createPendingLabelRelocs(); }; -MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, +MCELFStreamer *createMipsELFStreamer(MCContext &Context, + std::unique_ptr MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll); + std::unique_ptr Emitter, + bool RelaxAll); } // end namespace llvm #endif // LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSELFSTREAMER_H diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h index 687b800c24096..dfacf43545164 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h @@ -22,9 +22,11 @@ bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx, bool baseRegNeedsLoadStoreMask(unsigned Reg); // This function creates an MCELFStreamer for Mips NaCl. -MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, +MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, + std::unique_ptr TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll); + std::unique_ptr Emitter, + bool RelaxAll); } #endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index e05cbc55ffeed..8fcd8aa4c19ba 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -19,6 +19,7 @@ #include "MipsMCNaCl.h" #include "MipsTargetStreamer.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -91,13 +92,17 @@ static MCInstPrinter *createMipsMCInstPrinter(const Triple &T, } static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context, - MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll) { + std::unique_ptr &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr &&Emitter, + bool RelaxAll) { MCStreamer *S; if (!T.isOSNaCl()) - S = createMipsELFStreamer(Context, MAB, OS, Emitter, RelaxAll); + S = createMipsELFStreamer(Context, std::move(MAB), OS, std::move(Emitter), + RelaxAll); else - S = createMipsNaClELFStreamer(Context, MAB, OS, Emitter, RelaxAll); + S = createMipsNaClELFStreamer(Context, std::move(MAB), OS, + std::move(Emitter), RelaxAll); return S; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 3b46c5c494986..abbf08ed212ff 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -16,6 +16,8 @@ #include "llvm/Support/DataTypes.h" +#include + namespace llvm { class MCAsmBackend; class MCCodeEmitter; @@ -47,8 +49,8 @@ MCAsmBackend *createMipsAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU, const MCTargetOptions &Options); -MCObjectWriter *createMipsELFObjectWriter(raw_pwrite_stream &OS, - const Triple &TT, bool IsN32); +std::unique_ptr +createMipsELFObjectWriter(raw_pwrite_stream &OS, const Triple &TT, bool IsN32); namespace MIPS_MC { StringRef selectMipsCPU(const Triple &TT, StringRef CPU); diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index 9266f0e216d11..d878cf82e26d4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -20,7 +20,9 @@ #include "Mips.h" #include "MipsELFStreamer.h" #include "MipsMCNaCl.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/ErrorHandling.h" @@ -40,9 +42,10 @@ const unsigned LoadStoreStackMaskReg = Mips::T7; class MipsNaClELFStreamer : public MipsELFStreamer { public: - MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_pwrite_stream &OS, MCCodeEmitter *Emitter) - : MipsELFStreamer(Context, TAB, OS, Emitter) {} + MipsNaClELFStreamer(MCContext &Context, std::unique_ptr TAB, + raw_pwrite_stream &OS, + std::unique_ptr Emitter) + : MipsELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)) {} ~MipsNaClELFStreamer() override = default; @@ -255,11 +258,13 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) { return Reg != Mips::SP && Reg != Mips::T8; } -MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, +MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, + std::unique_ptr TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, + std::unique_ptr Emitter, bool RelaxAll) { - MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter); + MipsNaClELFStreamer *S = + new MipsNaClELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)); if (RelaxAll) S->getAssembler().setRelaxAll(true); diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 2907b77158575..f01a800afc5eb 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -98,6 +98,7 @@ void MipsTargetStreamer::emitDirectiveSetHardFloat() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); } +void MipsTargetStreamer::emitDirectiveSetDspr2() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {} bool MipsTargetStreamer::emitDirectiveCpRestore( @@ -547,6 +548,11 @@ void MipsTargetAsmStreamer::emitDirectiveSetDsp() { MipsTargetStreamer::emitDirectiveSetDsp(); } +void MipsTargetAsmStreamer::emitDirectiveSetDspr2() { + OS << "\t.set\tdspr2\n"; + MipsTargetStreamer::emitDirectiveSetDspr2(); +} + void MipsTargetAsmStreamer::emitDirectiveSetNoDsp() { OS << "\t.set\tnodsp\n"; MipsTargetStreamer::emitDirectiveSetNoDsp(); diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td index 23e8b32d48900..425e75e14c8b3 100644 --- a/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -1485,7 +1485,7 @@ def MTC1_MMR6 : StdMMR6Rel, MTC1_MMR6_DESC, MTC1_MMR6_ENC, ISA_MICROMIPS32R6; def MTC2_MMR6 : StdMMR6Rel, MTC2_MMR6_ENC, MTC2_MMR6_DESC, ISA_MICROMIPS32R6; def MTHC0_MMR6 : R6MMR6Rel, MTHC0_MMR6_ENC, MTHC0_MMR6_DESC, ISA_MICROMIPS32R6; def MTHC1_D32_MMR6 : StdMMR6Rel, MTHC1_D32_MMR6_DESC, MTHC1_MMR6_ENC, ISA_MICROMIPS32R6; -let DecoderNamespace = "MicroMips32r6FP64" in { +let DecoderNamespace = "MicroMipsFP64" in { def MTHC1_D64_MMR6 : R6MMR6Rel, MTHC1_D64_MMR6_DESC, MTHC1_MMR6_ENC, ISA_MICROMIPS32R6; } @@ -1496,7 +1496,7 @@ def MFC2_MMR6 : StdMMR6Rel, MFC2_MMR6_ENC, MFC2_MMR6_DESC, ISA_MICROMIPS32R6; def MFHC0_MMR6 : R6MMR6Rel, MFHC0_MMR6_ENC, MFHC0_MMR6_DESC, ISA_MICROMIPS32R6; def MFHC1_D32_MMR6 : StdMMR6Rel, MFHC1_D32_MMR6_DESC, MFHC1_MMR6_ENC, ISA_MICROMIPS32R6; -let DecoderNamespace = "MicroMips32r6FP64" in { +let DecoderNamespace = "MicroMipsFP64" in { def MFHC1_D64_MMR6 : StdMMR6Rel, MFHC1_D64_MMR6_DESC, MFHC1_MMR6_ENC, ISA_MICROMIPS32R6; } @@ -1729,7 +1729,7 @@ def BC2EQZC_MMR6 : R6MMR6Rel, MipsR6Inst, BC2EQZC_MMR6_ENC, BC2EQZC_MMR6_DESC, ISA_MICROMIPS32R6; def BC2NEZC_MMR6 : R6MMR6Rel, MipsR6Inst, BC2NEZC_MMR6_ENC, BC2NEZC_MMR6_DESC, ISA_MICROMIPS32R6; -let DecoderNamespace = "MicroMips32r6FP64" in { +let DecoderNamespace = "MicroMipsFP64" in { def LDC1_D64_MMR6 : StdMMR6Rel, LDC1_D64_MMR6_DESC, LDC1_MMR6_ENC, ISA_MICROMIPS32R6 { let BaseOpcode = "LDC164"; diff --git a/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/lib/Target/Mips/MicroMipsDSPInstrInfo.td index f82f82fc7e45b..20c1ab5a99982 100644 --- a/lib/Target/Mips/MicroMipsDSPInstrInfo.td +++ b/lib/Target/Mips/MicroMipsDSPInstrInfo.td @@ -415,6 +415,13 @@ class BITREV_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"bitrev", int_mips_bitrev, class BPOSGE32_MM_DESC : BPOSGE32_DESC_BASE<"bposge32", brtarget_mm, NoItinerary>; +let DecoderNamespace = "MicroMipsDSP", Arch = "mmdsp", + AdditionalPredicates = [HasDSP, InMicroMips] in { + def LWDSP_MM : Load<"lw", DSPROpnd, null_frag, II_LW>, DspMMRel, + LW_FM_MM<0x3f>; + def SWDSP_MM : Store<"sw", DSPROpnd, null_frag, II_SW>, DspMMRel, + LW_FM_MM<0x3e>; +} // Instruction defs. // microMIPS DSP Rev 1 def ADDQ_PH_MM : DspMMRel, ADDQ_PH_MM_ENC, ADDQ_PH_DESC; diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td index f0bbc84048769..49025cc1570a0 100644 --- a/lib/Target/Mips/MicroMipsInstrFPU.td +++ b/lib/Target/Mips/MicroMipsInstrFPU.td @@ -1,33 +1,49 @@ -let isCodeGenOnly = 1, Predicates = [InMicroMips] in { +//==- MicroMipsInstrFPU.td - microMIPS FPU Instruction Info -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the microMIPS FPU instruction set. +// +//===----------------------------------------------------------------------===// + +let isCodeGenOnly = 1 in { def FADD_S_MM : MMRel, ADDS_FT<"add.s", FGR32Opnd, II_ADD_S, 1, fadd>, - ADDS_FM_MM<0, 0x30>; + ADDS_FM_MM<0, 0x30>, ISA_MICROMIPS; def FDIV_S_MM : MMRel, ADDS_FT<"div.s", FGR32Opnd, II_DIV_S, 0, fdiv>, - ADDS_FM_MM<0, 0xf0>; + ADDS_FM_MM<0, 0xf0>, ISA_MICROMIPS; def FMUL_S_MM : MMRel, ADDS_FT<"mul.s", FGR32Opnd, II_MUL_S, 1, fmul>, - ADDS_FM_MM<0, 0xb0>; + ADDS_FM_MM<0, 0xb0>, ISA_MICROMIPS; def FSUB_S_MM : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>, - ADDS_FM_MM<0, 0x70>; + ADDS_FM_MM<0, 0x70>, ISA_MICROMIPS; def FADD_MM : MMRel, ADDS_FT<"add.d", AFGR64Opnd, II_ADD_D, 1, fadd>, - ADDS_FM_MM<1, 0x30>; + ADDS_FM_MM<1, 0x30>, ISA_MICROMIPS; def FDIV_MM : MMRel, ADDS_FT<"div.d", AFGR64Opnd, II_DIV_D, 0, fdiv>, - ADDS_FM_MM<1, 0xf0>; + ADDS_FM_MM<1, 0xf0>, ISA_MICROMIPS; def FMUL_MM : MMRel, ADDS_FT<"mul.d", AFGR64Opnd, II_MUL_D, 1, fmul>, - ADDS_FM_MM<1, 0xb0>; + ADDS_FM_MM<1, 0xb0>, ISA_MICROMIPS; def FSUB_MM : MMRel, ADDS_FT<"sub.d", AFGR64Opnd, II_SUB_D, 0, fsub>, - ADDS_FM_MM<1, 0x70>; + ADDS_FM_MM<1, 0x70>, ISA_MICROMIPS; def LWXC1_MM : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, - LWXC1_FM_MM<0x48>, INSN_MIPS4_32R2_NOT_32R6_64R6; + LWXC1_FM_MM<0x48>, ISA_MICROMIPS32_NOT_MIPS32R6; def SWXC1_MM : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, - SWXC1_FM_MM<0x88>, INSN_MIPS4_32R2_NOT_32R6_64R6; + SWXC1_FM_MM<0x88>, ISA_MICROMIPS32_NOT_MIPS32R6; + +// FIXME: These instruction definitions are incorrect. They should be 64-bit +// FPU only. def LUXC1_MM : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, - LWXC1_FM_MM<0x148>, INSN_MIPS5_32R2_NOT_32R6_64R6; + LWXC1_FM_MM<0x148>, ISA_MICROMIPS32_NOT_MIPS32R6; def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, - SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6; + SWXC1_FM_MM<0x188>, ISA_MICROMIPS32_NOT_MIPS32R6; def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, - CEQS_FM_MM<0> { + CEQS_FM_MM<0>, ISA_MICROMIPS32_NOT_MIPS32R6 { // FIXME: This is a required to work around the fact that these instructions // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the // fcc register set is used directly. @@ -35,255 +51,299 @@ def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, } def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, - CEQS_FM_MM<1> { + CEQS_FM_MM<1>, ISA_MICROMIPS32_NOT_MIPS32R6 { // FIXME: This is a required to work around the fact that these instructions // only use $fcc0. Ideally, MipsFPCmp nodes could be removed and the // fcc register set is used directly. bits<3> fcc = 0; } -def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>, - BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6; -def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, II_BC1T, MIPS_BRANCH_T>, - BC1F_FM_MM<0x1d>, ISA_MIPS1_NOT_32R6_64R6; +} + +let DecoderNamespace = "MicroMips" in { + def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>, + BC1F_FM_MM<0x1c>, ISA_MICROMIPS32_NOT_MIPS32R6; + def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, II_BC1T, MIPS_BRANCH_T>, + BC1F_FM_MM<0x1d>, ISA_MICROMIPS32_NOT_MIPS32R6; +} + +let isCodeGenOnly = 1 in { def CVT_W_S_MM : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>, - ROUND_W_FM_MM<0, 0x24>; -def ROUND_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>, - ROUND_W_FM_MM<0, 0xec>; + ROUND_W_FM_MM<0, 0x24>, ISA_MICROMIPS; +def ROUND_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, + II_ROUND>, ROUND_W_FM_MM<0, 0xec>, + ISA_MICROMIPS; def CEIL_W_MM : MMRel, ABSS_FT<"ceil.w.d", FGR32Opnd, AFGR64Opnd, II_CEIL>, - ROUND_W_FM_MM<1, 0x6c>; + ROUND_W_FM_MM<1, 0x6c>, ISA_MICROMIPS, FGR_32; def CVT_W_MM : MMRel, ABSS_FT<"cvt.w.d", FGR32Opnd, AFGR64Opnd, II_CVT>, - ROUND_W_FM_MM<1, 0x24>; + ROUND_W_FM_MM<1, 0x24>, ISA_MICROMIPS, FGR_32; def FLOOR_W_MM : MMRel, ABSS_FT<"floor.w.d", FGR32Opnd, AFGR64Opnd, II_FLOOR>, - ROUND_W_FM_MM<1, 0x2c>; -def ROUND_W_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, II_ROUND>, - ROUND_W_FM_MM<1, 0xec>; + ROUND_W_FM_MM<1, 0x2c>, ISA_MICROMIPS, FGR_32; +def ROUND_W_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, + II_ROUND>, ROUND_W_FM_MM<1, 0xec>, + ISA_MICROMIPS, FGR_32; def TRUNC_W_MM : MMRel, ABSS_FT<"trunc.w.d", FGR32Opnd, AFGR64Opnd, II_TRUNC>, - ROUND_W_FM_MM<1, 0xac>; + ROUND_W_FM_MM<1, 0xac>, ISA_MICROMIPS, FGR_32; def FSQRT_MM : MMRel, ABSS_FT<"sqrt.d", AFGR64Opnd, AFGR64Opnd, II_SQRT_D, - fsqrt>, ROUND_W_FM_MM<1, 0x28>; + fsqrt>, ROUND_W_FM_MM<1, 0x28>, + ISA_MICROMIPS, FGR_32; def CVT_L_S_MM : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>, - ROUND_W_FM_MM<0, 0x4>, INSN_MIPS3_32R2; + ROUND_W_FM_MM<0, 0x4>, ISA_MICROMIPS, FGR_64; def CVT_L_D64_MM : MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>, - ROUND_W_FM_MM<1, 0x4>, INSN_MIPS3_32R2; + ROUND_W_FM_MM<1, 0x4>, ISA_MICROMIPS, FGR_64; + +} -def FABS_S_MM : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>, - ABS_FM_MM<0, 0xd>; +let DecoderNamespace = "MicroMips" in { + def FABS_S_MM : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>, + ABS_FM_MM<0, 0xd>, ISA_MICROMIPS; + def FABS_MM : MMRel, ABSS_FT<"abs.d", AFGR64Opnd, AFGR64Opnd, II_ABS, fabs>, + ABS_FM_MM<1, 0xd>, ISA_MICROMIPS, FGR_32; +} + +let isCodeGenOnly = 1 in { def FMOV_S_MM : MMRel, ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>, - ABS_FM_MM<0, 0x1>; + ABS_FM_MM<0, 0x1>, ISA_MICROMIPS; def FNEG_S_MM : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>, - ABS_FM_MM<0, 0x2d>; + ABS_FM_MM<0, 0x2d>, ISA_MICROMIPS; def CVT_D_S_MM : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>, - ABS_FM_MM<0, 0x4d>; + ABS_FM_MM<0, 0x4d>, ISA_MICROMIPS, FGR_32; def CVT_D32_W_MM : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>, - ABS_FM_MM<1, 0x4d>; + ABS_FM_MM<1, 0x4d>, ISA_MICROMIPS, FGR_32; def CVT_S_D32_MM : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>, - ABS_FM_MM<0, 0x6d>; + ABS_FM_MM<0, 0x6d>, ISA_MICROMIPS, FGR_32; def CVT_S_W_MM : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>, - ABS_FM_MM<1, 0x6d>; + ABS_FM_MM<1, 0x6d>, ISA_MICROMIPS; -def FABS_MM : MMRel, ABSS_FT<"abs.d", AFGR64Opnd, AFGR64Opnd, II_ABS, fabs>, - ABS_FM_MM<1, 0xd>; def FNEG_MM : MMRel, ABSS_FT<"neg.d", AFGR64Opnd, AFGR64Opnd, II_NEG, fneg>, - ABS_FM_MM<1, 0x2d>; + ABS_FM_MM<1, 0x2d>, ISA_MICROMIPS, FGR_32; def FMOV_D32_MM : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>, - ABS_FM_MM<1, 0x1>, FGR_32; + ABS_FM_MM<1, 0x1>, ISA_MICROMIPS, FGR_32; def MOVZ_I_S_MM : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, - II_MOVZ_S>, CMov_I_F_FM_MM<0x78, 0>; + II_MOVZ_S>, CMov_I_F_FM_MM<0x78, 0>, + ISA_MICROMIPS32_NOT_MIPS32R6; def MOVN_I_S_MM : MMRel, CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, - II_MOVN_S>, CMov_I_F_FM_MM<0x38, 0>; + II_MOVN_S>, CMov_I_F_FM_MM<0x38, 0>, + ISA_MICROMIPS32_NOT_MIPS32R6; def MOVZ_I_D32_MM : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, - II_MOVZ_D>, CMov_I_F_FM_MM<0x78, 1>; + II_MOVZ_D>, CMov_I_F_FM_MM<0x78, 1>, + ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def MOVN_I_D32_MM : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, - II_MOVN_D>, CMov_I_F_FM_MM<0x38, 1>; + II_MOVN_D>, CMov_I_F_FM_MM<0x38, 1>, + ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def MOVT_S_MM : MMRel, CMov_F_F_FT<"movt.s", FGR32Opnd, II_MOVT_S, - MipsCMovFP_T>, CMov_F_F_FM_MM<0x60, 0>; + MipsCMovFP_T>, CMov_F_F_FM_MM<0x60, 0>, + ISA_MICROMIPS32_NOT_MIPS32R6; def MOVF_S_MM : MMRel, CMov_F_F_FT<"movf.s", FGR32Opnd, II_MOVF_S, - MipsCMovFP_F>, CMov_F_F_FM_MM<0x20, 0>; + MipsCMovFP_F>, CMov_F_F_FM_MM<0x20, 0>, + ISA_MICROMIPS32_NOT_MIPS32R6; def MOVT_D32_MM : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, II_MOVT_D, - MipsCMovFP_T>, CMov_F_F_FM_MM<0x60, 1>; + MipsCMovFP_T>, CMov_F_F_FM_MM<0x60, 1>, + ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def MOVF_D32_MM : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D, - MipsCMovFP_F>, CMov_F_F_FM_MM<0x20, 1>; + MipsCMovFP_F>, CMov_F_F_FM_MM<0x20, 1>, + ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def MFC1_MM : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, - II_MFC1, bitconvert>, MFC1_FM_MM<0x80>; + II_MFC1, bitconvert>, MFC1_FM_MM<0x80>, + ISA_MICROMIPS; def MTC1_MM : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, - II_MTC1, bitconvert>, MFC1_FM_MM<0xa0>; + II_MTC1, bitconvert>, MFC1_FM_MM<0xa0>, + ISA_MICROMIPS; def MADD_S_MM : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, - MADDS_FM_MM<0x1>; + MADDS_FM_MM<0x1>, ISA_MICROMIPS32_NOT_MIPS32R6; def MSUB_S_MM : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, - MADDS_FM_MM<0x21>; + MADDS_FM_MM<0x21>, ISA_MICROMIPS32_NOT_MIPS32R6; def NMADD_S_MM : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>, - MADDS_FM_MM<0x2>; + MADDS_FM_MM<0x2>, ISA_MICROMIPS32_NOT_MIPS32R6; def NMSUB_S_MM : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>, - MADDS_FM_MM<0x22>; + MADDS_FM_MM<0x22>, ISA_MICROMIPS32_NOT_MIPS32R6; def MADD_D32_MM : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>, - MADDS_FM_MM<0x9>; + MADDS_FM_MM<0x9>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def MSUB_D32_MM : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM_MM<0x29>; + MADDS_FM_MM<0x29>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def NMADD_D32_MM : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>, - MADDS_FM_MM<0xa>; + MADDS_FM_MM<0xa>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; def NMSUB_D32_MM : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>, - MADDS_FM_MM<0x2a>; + MADDS_FM_MM<0x2a>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; } -let AdditionalPredicates = [InMicroMips] in { - def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, - II_FLOOR>, ROUND_W_FM_MM<0, 0x2c>; - def TRUNC_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd, - FGR32Opnd, II_TRUNC>, ROUND_W_FM_MM<0, 0xac>; - def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>, - ROUND_W_FM_MM<0, 0x6c>; - def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, - fsqrt>, ROUND_W_FM_MM<0, 0x28>; - def MTHC1_MM : MMRel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>, - MFC1_FM_MM<0xe0>, ISA_MIPS32R2, FGR_32; - def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>, - MFC1_FM_MM<0xc0>, ISA_MIPS32R2, FGR_32; - let DecoderNamespace = "MicroMips" in { - def CFC1_MM : MMRel, MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, II_CFC1>, - MFC1_FM_MM<0x40>; - def CTC1_MM : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, II_CTC1>, - MFC1_FM_MM<0x60>; - def RECIP_S_MM : MMRel, ABSS_FT<"recip.s", FGR32Opnd, FGR32Opnd, - II_RECIP_S>, - ROUND_W_FM_MM<0b0, 0b01001000>; - def RECIP_D_MM : MMRel, ABSS_FT<"recip.d", AFGR64Opnd, AFGR64Opnd, - II_RECIP_D>, ROUND_W_FM_MM<0b1, 0b01001000>; - def RSQRT_S_MM : MMRel, ABSS_FT<"rsqrt.s", FGR32Opnd, FGR32Opnd, +def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, + II_FLOOR>, ROUND_W_FM_MM<0, 0x2c>, + ISA_MICROMIPS; +def TRUNC_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd, + FGR32Opnd, II_TRUNC>, + ROUND_W_FM_MM<0, 0xac>, ISA_MICROMIPS; +def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>, + ROUND_W_FM_MM<0, 0x6c>, ISA_MICROMIPS; +def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, + fsqrt>, ROUND_W_FM_MM<0, 0x28>, ISA_MICROMIPS; +def MTHC1_MM : MMRel, MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>, + MFC1_FM_MM<0xe0>, ISA_MICROMIPS, FGR_32; +def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>, + MFC1_FM_MM<0xc0>, ISA_MICROMIPS, FGR_32; + +let DecoderNamespace = "MicroMips" in { + def CFC1_MM : MMRel, MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, II_CFC1>, + MFC1_FM_MM<0x40>, ISA_MICROMIPS; + def CTC1_MM : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, II_CTC1>, + MFC1_FM_MM<0x60>, ISA_MICROMIPS; + def RECIP_S_MM : MMRel, ABSS_FT<"recip.s", FGR32Opnd, FGR32Opnd, II_RECIP_S>, - ROUND_W_FM_MM<0b0, 0b00001000>; - def RSQRT_D_MM : MMRel, ABSS_FT<"rsqrt.d", AFGR64Opnd, AFGR64Opnd, - II_RECIP_D>, ROUND_W_FM_MM<0b1, 0b00001000>; + ROUND_W_FM_MM<0b0, 0b01001000>, ISA_MICROMIPS; + def RECIP_D32_MM : MMRel, ABSS_FT<"recip.d", AFGR64Opnd, AFGR64Opnd, + II_RECIP_D>, + ROUND_W_FM_MM<0b1, 0b01001000>, ISA_MICROMIPS, FGR_32 { + let BaseOpcode = "RECIP_D32"; } - let DecoderNamespace = "MicroMips", DecoderMethod = "DecodeFMemMMR2" in { - def LDC1_MM : MMRel, LW_FT<"ldc1", AFGR64Opnd, mem_mm_16, II_LDC1, load>, - LW_FM_MM<0x2f>, FGR_32 { - let BaseOpcode = "LDC132"; - } - def SDC1_MM : MMRel, SW_FT<"sdc1", AFGR64Opnd, mem_mm_16, II_SDC1, store>, - LW_FM_MM<0x2e>, FGR_32; - def LWC1_MM : MMRel, LW_FT<"lwc1", FGR32Opnd, mem_mm_16, II_LWC1, load>, - LW_FM_MM<0x27>; - def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>, - LW_FM_MM<0x26>; + let DecoderNamespace = "MicroMipsFP64" in + def RECIP_D64_MM : MMRel, ABSS_FT<"recip.d", FGR64Opnd, FGR64Opnd, + II_RECIP_D>, + ROUND_W_FM_MM<0b1, 0b01001000>, ISA_MICROMIPS, FGR_64; + def RSQRT_S_MM : MMRel, ABSS_FT<"rsqrt.s", FGR32Opnd, FGR32Opnd, + II_RECIP_S>, + ROUND_W_FM_MM<0b0, 0b00001000>; + def RSQRT_D32_MM : MMRel, ABSS_FT<"rsqrt.d", AFGR64Opnd, AFGR64Opnd, + II_RECIP_D>, + ROUND_W_FM_MM<0b1, 0b00001000>, ISA_MICROMIPS, FGR_32 { + let BaseOpcode = "RSQRT_D32"; } + let DecoderNamespace = "MicroMipsFP64" in + def RSQRT_D64_MM : MMRel, ABSS_FT<"rsqrt.d", FGR64Opnd, FGR64Opnd, + II_RECIP_D>, + ROUND_W_FM_MM<0b1, 0b00001000>, ISA_MICROMIPS, FGR_64; +} - multiclass C_COND_MM fmt, - InstrItinClass itin> { - def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.f."#NAME; - let isCommutable = 1; - } - def C_UN_#NAME#_MM : MMRel, C_COND_FT<"un", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.un."#NAME; - let isCommutable = 1; - } - def C_EQ_#NAME#_MM : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.eq."#NAME; - let isCommutable = 1; - } - def C_UEQ_#NAME#_MM : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.ueq."#NAME; - let isCommutable = 1; - } - def C_OLT_#NAME#_MM : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.olt."#NAME; - } - def C_ULT_#NAME#_MM : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.ult."#NAME; - } - def C_OLE_#NAME#_MM : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.ole."#NAME; - } - def C_ULE_#NAME#_MM : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.ule."#NAME; - } - def C_SF_#NAME#_MM : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.sf."#NAME; - let isCommutable = 1; - } - def C_NGLE_#NAME#_MM : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.ngle."#NAME; - } - def C_SEQ_#NAME#_MM : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.seq."#NAME; - let isCommutable = 1; - } - def C_NGL_#NAME#_MM : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.ngl."#NAME; - } - def C_LT_#NAME#_MM : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.lt."#NAME; - } - def C_NGE_#NAME#_MM : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.nge."#NAME; - } - def C_LE_#NAME#_MM : MMRel, C_COND_FT<"le", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.le."#NAME; - } - def C_NGT_#NAME#_MM : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>, - C_COND_FM_MM { - let BaseOpcode = "c.ngt."#NAME; - } +let DecoderNamespace = "MicroMips", DecoderMethod = "DecodeFMemMMR2" in { + def LDC1_MM : MMRel, LW_FT<"ldc1", AFGR64Opnd, mem_mm_16, II_LDC1, load>, + LW_FM_MM<0x2f>, ISA_MICROMIPS, FGR_32 { + let BaseOpcode = "LDC132"; + } + def SDC1_MM : MMRel, SW_FT<"sdc1", AFGR64Opnd, mem_mm_16, II_SDC1, store>, + LW_FM_MM<0x2e>, ISA_MICROMIPS, FGR_32; + def LWC1_MM : MMRel, LW_FT<"lwc1", FGR32Opnd, mem_mm_16, II_LWC1, load>, + LW_FM_MM<0x27>, ISA_MICROMIPS; + def SWC1_MM : MMRel, SW_FT<"swc1", FGR32Opnd, mem_mm_16, II_SWC1, store>, + LW_FM_MM<0x26>, ISA_MICROMIPS; +} + +multiclass C_COND_MM fmt, + InstrItinClass itin> { + def C_F_#NAME#_MM : MMRel, C_COND_FT<"f", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.f."#NAME; + let isCommutable = 1; + } + def C_UN_#NAME#_MM : MMRel, C_COND_FT<"un", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.un."#NAME; + let isCommutable = 1; } + def C_EQ_#NAME#_MM : MMRel, C_COND_FT<"eq", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.eq."#NAME; + let isCommutable = 1; + } + def C_UEQ_#NAME#_MM : MMRel, C_COND_FT<"ueq", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.ueq."#NAME; + let isCommutable = 1; + } + def C_OLT_#NAME#_MM : MMRel, C_COND_FT<"olt", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.olt."#NAME; + } + def C_ULT_#NAME#_MM : MMRel, C_COND_FT<"ult", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.ult."#NAME; + } + def C_OLE_#NAME#_MM : MMRel, C_COND_FT<"ole", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.ole."#NAME; + } + def C_ULE_#NAME#_MM : MMRel, C_COND_FT<"ule", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.ule."#NAME; + } + def C_SF_#NAME#_MM : MMRel, C_COND_FT<"sf", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.sf."#NAME; + let isCommutable = 1; + } + def C_NGLE_#NAME#_MM : MMRel, C_COND_FT<"ngle", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.ngle."#NAME; + } + def C_SEQ_#NAME#_MM : MMRel, C_COND_FT<"seq", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.seq."#NAME; + let isCommutable = 1; + } + def C_NGL_#NAME#_MM : MMRel, C_COND_FT<"ngl", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.ngl."#NAME; + } + def C_LT_#NAME#_MM : MMRel, C_COND_FT<"lt", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.lt."#NAME; + } + def C_NGE_#NAME#_MM : MMRel, C_COND_FT<"nge", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.nge."#NAME; + } + def C_LE_#NAME#_MM : MMRel, C_COND_FT<"le", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.le."#NAME; + } + def C_NGT_#NAME#_MM : MMRel, C_COND_FT<"ngt", TypeStr, RC, itin>, + C_COND_FM_MM { + let BaseOpcode = "c.ngt."#NAME; + } +} - defm S : C_COND_MM<"s", FGR32Opnd, 0b00, II_C_CC_S>, - ISA_MIPS1_NOT_32R6_64R6; - defm D32 : C_COND_MM<"d", AFGR64Opnd, 0b01, II_C_CC_D>, - ISA_MIPS1_NOT_32R6_64R6, FGR_32; - let DecoderNamespace = "Mips64" in +defm S : C_COND_MM<"s", FGR32Opnd, 0b00, II_C_CC_S>, + ISA_MICROMIPS32_NOT_MIPS32R6; +defm D32 : C_COND_MM<"d", AFGR64Opnd, 0b01, II_C_CC_D>, + ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; +let DecoderNamespace = "Mips64" in defm D64 : C_COND_MM<"d", FGR64Opnd, 0b01, II_C_CC_D>, - ISA_MIPS1_NOT_32R6_64R6, FGR_64; + ISA_MICROMIPS32_NOT_MIPS32R6, FGR_64; - defm S_MM : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT, - ISA_MIPS1_NOT_32R6_64R6; - defm D32_MM : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT, - ISA_MIPS1_NOT_32R6_64R6, FGR_32; - defm D64_MM : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT, - ISA_MIPS1_NOT_32R6_64R6, FGR_64; +defm S_MM : C_COND_ALIASES<"s", FGR32Opnd>, HARDFLOAT, + ISA_MICROMIPS32_NOT_MIPS32R6; +defm D32_MM : C_COND_ALIASES<"d", AFGR64Opnd>, HARDFLOAT, + ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; +defm D64_MM : C_COND_ALIASES<"d", FGR64Opnd>, HARDFLOAT, + ISA_MICROMIPS32_NOT_MIPS32R6, FGR_64; + +defm : BC1_ALIASES, + ISA_MICROMIPS32_NOT_MIPS32R6, HARDFLOAT; - defm : BC1_ALIASES, - ISA_MIPS1_NOT_32R6_64R6, HARDFLOAT; -} // To generate NMADD and NMSUB instructions when fneg node is present -let AdditionalPredicates = [NoNaNsFPMath, HasMadd4, InMicroMips, NotMips32r6] in { - defm : NMADD_NMSUB; - defm : NMADD_NMSUB; +let AdditionalPredicates = [NoNaNsFPMath, HasMadd4, + InMicroMips, NotMips32r6] in { + defm : NMADD_NMSUB, + ISA_MICROMIPS32_NOT_MIPS32R6; + defm : NMADD_NMSUB, + ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; } //===----------------------------------------------------------------------===// // Floating Point Patterns //===----------------------------------------------------------------------===// -let AdditionalPredicates = [InMicroMips] in { - // Patterns for loads/stores with a reg+imm operand. - let AddedComplexity = 40 in { - def : LoadRegImmPat, FGR_32; - def : StoreRegImmPat, FGR_32; - def : LoadRegImmPat; - def : StoreRegImmPat; - } + +// Patterns for loads/stores with a reg+imm operand. +let AddedComplexity = 40 in { + def : LoadRegImmPat, ISA_MICROMIPS, FGR_32; + def : StoreRegImmPat, ISA_MICROMIPS, FGR_32; + def : LoadRegImmPat, ISA_MICROMIPS; + def : StoreRegImmPat, ISA_MICROMIPS; } diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td index 774976828a0c8..bc0045dad21e8 100644 --- a/lib/Target/Mips/MicroMipsInstrFormats.td +++ b/lib/Target/Mips/MicroMipsInstrFormats.td @@ -786,13 +786,14 @@ class C_COND_FM_MM fmt, bits<4> c> : CEQS_FM_MM { } class BC1F_FM_MM tf> : MMArch { + bits<3> fcc; bits<16> offset; bits<32> Inst; let Inst{31-26} = 0x10; let Inst{25-21} = tf; - let Inst{20-18} = 0x0; // cc + let Inst{20-18} = fcc; // cc let Inst{17-16} = 0x0; let Inst{15-0} = offset; } diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 75a0ca30c1177..1f869db4efee2 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -1006,20 +1006,14 @@ let DecoderNamespace = "MicroMips" in { // MicroMips arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// -def : MipsPat<(i32 immLi16:$imm), - (LI16_MM immLi16:$imm)>; - -let AdditionalPredicates = [InMicroMips] in -defm : MaterializeImms; - -let Predicates = [InMicroMips] in { +let AdditionalPredicates = [InMicroMips] in { def : MipsPat<(i32 immLi16:$imm), (LI16_MM immLi16:$imm)>; - def : MipsPat<(i32 immSExt16:$imm), - (ADDiu_MM ZERO, immSExt16:$imm)>; - def : MipsPat<(i32 immZExt16:$imm), - (ORi_MM ZERO, immZExt16:$imm)>; + defm : MaterializeImms; +} + +let Predicates = [InMicroMips] in { def : MipsPat<(not GPRMM16:$in), (NOT16_MM GPRMM16:$in)>; def : MipsPat<(not GPR32:$in), diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index 00f890168e65e..76bca3df2bcdc 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -59,7 +59,6 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF, MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - MachineLocation DstML, SrcML; // Adjust stack. TII.makeFrame(Mips::SP, StackSize, MBB, MBBI); diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 72d2cf56bba4b..f99d0da8d6872 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -417,6 +417,7 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { } void MipsAsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) { + AsmPrinter::EmitBasicBlockEnd(MBB); MipsTargetStreamer &TS = getTargetStreamer(); if (MBB.empty()) TS.emitDirectiveInsn(); @@ -1080,16 +1081,16 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { // be patching over the full 48 bytes (12 instructions) with the following // pattern: // - // ADDIU SP, SP, -8 + // ADDIU SP, SP, -8 // NOP - // SW RA, 4(SP) + // SW RA, 4(SP) // SW T9, 0(SP) // LUI T9, %hi(__xray_FunctionEntry/Exit) // ORI T9, T9, %lo(__xray_FunctionEntry/Exit) // LUI T0, %hi(function_id) - // JALR T9 - // ORI T0, T0, %lo(function_id) - // LW T9, 0(SP) + // JALR T9 + // ORI T0, T0, %lo(function_id) + // LW T9, 0(SP) // LW RA, 4(SP) // ADDIU SP, SP, 8 // diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 69bb374dc209f..a0039d1592485 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -149,7 +149,7 @@ def MOVN_I_D32 : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, II_MOVN_D>, CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_32; -let DecoderNamespace = "Mips64" in { +let DecoderNamespace = "MipsFP64" in { def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64Opnd, II_MOVZ_D>, CMov_I_F_FM<18, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64Opnd, II_MOVN_D>, @@ -188,7 +188,7 @@ def MOVF_D32 : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D, MipsCMovFP_F>, CMov_F_F_FM<17, 0>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_32; -let DecoderNamespace = "Mips64" in { +let DecoderNamespace = "MipsFP64" in { def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64Opnd, II_MOVT_D, MipsCMovFP_T>, CMov_F_F_FM<17, 1>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64Opnd, II_MOVF_D, MipsCMovFP_F>, diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index c238a65378e22..2595333188a4a 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -1284,6 +1284,12 @@ let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in { def STORE_CCOND_DSP : Store<"store_ccond_dsp", DSPCC>; } +let DecoderNamespace = "MipsDSP", Arch = "dsp", + AdditionalPredicates = [HasDSP] in { + def LWDSP : Load<"lw", DSPROpnd, null_frag, II_LW>, DspMMRel, LW_FM<0x23>; + def SWDSP : Store<"sw", DSPROpnd, null_frag, II_SW>, DspMMRel, LW_FM<0x2b>; +} + // Pseudo CMP and PICK instructions. class PseudoCMP : PseudoDSP<(outs DSPCC:$cmp), (ins DSPROpnd:$rs, DSPROpnd:$rt), []>, diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 06874eb979640..38b3c3fb16020 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3118,7 +3118,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. - SDValue CalleeLo; EVT Ty = Callee.getValueType(); bool GlobalOrExternal = false, IsCallReloc = false; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 999e5fadb817b..c817391153732 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -119,7 +119,7 @@ multiclass ADDS_M { def _D32 : MMRel, ADDS_FT, FGR_32; def _D64 : ADDS_FT, FGR_64 { - string DecoderNamespace = "Mips64"; + string DecoderNamespace = "MipsFP64"; } } @@ -135,14 +135,14 @@ multiclass ABSS_M, FGR_32; def _D64 : ABSS_FT, FGR_64 { - string DecoderNamespace = "Mips64"; + string DecoderNamespace = "MipsFP64"; } } multiclass ROUND_M { def _D32 : MMRel, ABSS_FT, FGR_32; def _D64 : StdMMR6Rel, ABSS_FT, FGR_64 { - let DecoderNamespace = "Mips64"; + let DecoderNamespace = "MipsFP64"; } } @@ -215,14 +215,25 @@ class SWXC1_FT : + SDPatternOperator Op = null_frag> : InstSE<(outs), (ins FCCRegsOpnd:$fcc, opnd:$offset), !strconcat(opstr, "\t$fcc, $offset"), [(MipsFPBrcond Op, FCCRegsOpnd:$fcc, bb:$offset)], Itin, FrmFI, opstr>, HARDFLOAT { let isBranch = 1; let isTerminator = 1; - let hasDelaySlot = DelaySlot; + let hasDelaySlot = 1; + let Defs = [AT]; + let hasFCCRegOperand = 1; +} + +class BC1XL_FT : + InstSE<(outs), (ins FCCRegsOpnd:$fcc, opnd:$offset), + !strconcat(opstr, "\t$fcc, $offset"), [], Itin, + FrmFI, opstr>, HARDFLOAT { + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; let Defs = [AT]; let hasFCCRegOperand = 1; } @@ -331,7 +342,7 @@ let AdditionalPredicates = [NotInMicroMips] in { defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6; defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, FGR_32; -let DecoderNamespace = "Mips64" in +let DecoderNamespace = "MipsFP64" in defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, FGR_64; } @@ -358,14 +369,26 @@ defm CVT_W : ROUND_M<"cvt.w.d", II_CVT>, ABSS_FM<0x24, 17>; let AdditionalPredicates = [NotInMicroMips] in { def RECIP_S : MMRel, ABSS_FT<"recip.s", FGR32Opnd, FGR32Opnd, II_RECIP_S>, ABSS_FM<0b010101, 0x10>, INSN_MIPS4_32R2; - def RECIP_D : MMRel, ABSS_FT<"recip.d", FGR64Opnd, FGR64Opnd, II_RECIP_D>, - ABSS_FM<0b010101, 0x11>, INSN_MIPS4_32R2; + def RECIP_D32 : MMRel, ABSS_FT<"recip.d", AFGR64Opnd, AFGR64Opnd, II_RECIP_D>, + ABSS_FM<0b010101, 0x11>, INSN_MIPS4_32R2, FGR_32 { + let BaseOpcode = "RECIP_D32"; + } + let DecoderNamespace = "MipsFP64" in + def RECIP_D64 : MMRel, ABSS_FT<"recip.d", FGR64Opnd, FGR64Opnd, + II_RECIP_D>, ABSS_FM<0b010101, 0x11>, + INSN_MIPS4_32R2, FGR_64; def RSQRT_S : MMRel, ABSS_FT<"rsqrt.s", FGR32Opnd, FGR32Opnd, II_RSQRT_S>, ABSS_FM<0b010110, 0x10>, INSN_MIPS4_32R2; - def RSQRT_D : MMRel, ABSS_FT<"rsqrt.d", FGR64Opnd, FGR64Opnd, II_RSQRT_D>, - ABSS_FM<0b010110, 0x11>, INSN_MIPS4_32R2; + def RSQRT_D32 : MMRel, ABSS_FT<"rsqrt.d", AFGR64Opnd, AFGR64Opnd, II_RSQRT_D>, + ABSS_FM<0b010110, 0x11>, INSN_MIPS4_32R2, FGR_32 { + let BaseOpcode = "RSQRT_D32"; + } + let DecoderNamespace = "MipsFP64" in + def RSQRT_D64 : MMRel, ABSS_FT<"rsqrt.d", FGR64Opnd, FGR64Opnd, + II_RSQRT_D>, ABSS_FM<0b010110, 0x11>, + INSN_MIPS4_32R2, FGR_64; } -let DecoderNamespace = "Mips64" in { +let DecoderNamespace = "MipsFP64" in { let AdditionalPredicates = [NotInMicroMips] in { def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>, ABSS_FM<0x8, 16>, FGR_64; @@ -402,7 +425,7 @@ def CVT_D32_W : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>, def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x21, 16>, FGR_32; -let DecoderNamespace = "Mips64" in { +let DecoderNamespace = "MipsFP64" in { def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, II_CVT>, ABSS_FM<0x20, 17>, FGR_64; let AdditionalPredicates = [NotInMicroMips] in{ @@ -425,11 +448,14 @@ let isPseudo = 1, isCodeGenOnly = 1 in { def PseudoCVT_D64_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, II_CVT>; } -def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>, - ABSS_FM<0x5, 16>; +let AdditionalPredicates = [NotInMicroMips] in { + def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>, + ABSS_FM<0x5, 16>; + defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>; +} + def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>, ABSS_FM<0x7, 16>; -defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>; defm FNEG : ABSS_M<"neg.d", II_NEG, fneg>, ABSS_FM<0x7, 17>; def FSQRT_S : MMRel, StdMMR6Rel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, @@ -450,13 +476,13 @@ def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, bitconvert>, MFC1_FM<0>; def MFC1_D64 : MFC1_FT<"mfc1", GPR32Opnd, FGR64Opnd, II_MFC1>, MFC1_FM<0>, FGR_64 { - let DecoderNamespace = "Mips64"; + let DecoderNamespace = "MipsFP64"; } def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, bitconvert>, MFC1_FM<4>; def MTC1_D64 : MTC1_FT<"mtc1", FGR64Opnd, GPR32Opnd, II_MTC1>, MFC1_FM<4>, FGR_64 { - let DecoderNamespace = "Mips64"; + let DecoderNamespace = "MipsFP64"; } let AdditionalPredicates = [NotInMicroMips] in { @@ -464,7 +490,7 @@ let AdditionalPredicates = [NotInMicroMips] in { MFC1_FM<3>, ISA_MIPS32R2, FGR_32; def MFHC1_D64 : MFC1_FT<"mfhc1", GPR32Opnd, FGR64Opnd, II_MFHC1>, MFC1_FM<3>, ISA_MIPS32R2, FGR_64 { - let DecoderNamespace = "Mips64"; + let DecoderNamespace = "MipsFP64"; } } let AdditionalPredicates = [NotInMicroMips] in { @@ -472,7 +498,7 @@ let AdditionalPredicates = [NotInMicroMips] in { MFC1_FM<7>, ISA_MIPS32R2, FGR_32; def MTHC1_D64 : MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>, MFC1_FM<7>, ISA_MIPS32R2, FGR_64 { - let DecoderNamespace = "Mips64"; + let DecoderNamespace = "MipsFP64"; } } let AdditionalPredicates = [NotInMicroMips] in { @@ -488,7 +514,7 @@ def FMOV_D32 : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>, ABSS_FM<0x6, 17>, FGR_32; def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, II_MOV_D>, ABSS_FM<0x6, 17>, FGR_64 { - let DecoderNamespace = "Mips64"; + let DecoderNamespace = "MipsFP64"; } /// Floating Point Memory Instructions @@ -499,7 +525,7 @@ let AdditionalPredicates = [NotInMicroMips] in { LW_FM<0x39>; } -let DecoderNamespace = "Mips64", AdditionalPredicates = [NotInMicroMips] in { +let DecoderNamespace = "MipsFP64", AdditionalPredicates = [NotInMicroMips] in { def LDC164 : StdMMR6Rel, LW_FT<"ldc1", FGR64Opnd, mem_simm16, II_LDC1, load>, LW_FM<0x35>, ISA_MIPS2, FGR_64 { let BaseOpcode = "LDC164"; @@ -534,7 +560,7 @@ let AdditionalPredicates = [NotInMicroMips, IsNotNaCl] in { INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; } -let DecoderNamespace="Mips64" in { +let DecoderNamespace="MipsFP64" in { def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>, @@ -542,6 +568,7 @@ let DecoderNamespace="Mips64" in { } // Load/store doubleword indexed unaligned. +// FIXME: This instruction should not be defined for FGR_32. let AdditionalPredicates = [IsNotNaCl] in { def LUXC1 : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>, INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_32; @@ -549,7 +576,7 @@ let AdditionalPredicates = [IsNotNaCl] in { INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_32; } -let DecoderNamespace="Mips64" in { +let DecoderNamespace="MipsFP64" in { def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>, INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_64; def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>, @@ -594,7 +621,7 @@ let AdditionalPredicates = [NoNaNsFPMath, HasMadd4] in { MADDS_FM<7, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; } -let DecoderNamespace = "Mips64" in { +let DecoderNamespace = "MipsFP64" in { def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>, MADDS_FM<4, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64, MADD4; def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>, @@ -602,7 +629,7 @@ let DecoderNamespace = "Mips64" in { } let AdditionalPredicates = [NoNaNsFPMath, HasMadd4], - DecoderNamespace = "Mips64" in { + DecoderNamespace = "MipsFP64" in { def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>, MADDS_FM<6, 1>, INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>, @@ -617,17 +644,17 @@ let AdditionalPredicates = [NoNaNsFPMath, HasMadd4], def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; -def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, II_BC1F, MIPS_BRANCH_F>, - BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6; -def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, II_BC1FL, MIPS_BRANCH_F, 0>, - BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6; -def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, II_BC1T, MIPS_BRANCH_T>, - BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6; -def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>, - BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6; +let AdditionalPredicates = [NotInMicroMips] in { + def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, II_BC1F, MIPS_BRANCH_F>, + BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6; + def BC1FL : MMRel, BC1XL_FT<"bc1fl", brtarget, II_BC1FL>, + BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6; + def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, II_BC1T, MIPS_BRANCH_T>, + BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6; + def BC1TL : MMRel, BC1XL_FT<"bc1tl", brtarget, II_BC1TL>, + BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6; /// Floating Point Compare -let AdditionalPredicates = [NotInMicroMips] in { def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>, ISA_MIPS1_NOT_32R6_64R6 { @@ -644,7 +671,7 @@ let AdditionalPredicates = [NotInMicroMips] in { bits<3> fcc = 0; } } -let DecoderNamespace = "Mips64" in +let DecoderNamespace = "MipsFP64" in def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, ISA_MIPS1_NOT_32R6_64R6, FGR_64 { // FIXME: This is a required to work around the fact that thiese instructions diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 82dc442a53d9c..661ead4803bed 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -157,24 +157,23 @@ unsigned MipsInstrInfo::removeBranch(MachineBasicBlock &MBB, assert(!BytesRemoved && "code size not handled"); MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); - unsigned removed; - - // Skip all the debug instructions. - while (I != REnd && I->isDebugValue()) - ++I; - - if (I == REnd) - return 0; - - MachineBasicBlock::iterator FirstBr = ++I.getReverse(); + unsigned removed = 0; // Up to 2 branches are removed. // Note that indirect branches are not removed. - for (removed = 0; I != REnd && removed < 2; ++I, ++removed) + while (I != REnd && removed < 2) { + // Skip past debug instructions. + if (I->isDebugValue()) { + ++I; + continue; + } if (!getAnalyzableBrOpc(I->getOpcode())) break; - - MBB.erase((--I).getReverse(), FirstBr); + // Remove the branch. + I->eraseFromParent(); + I = MBB.rbegin(); + ++removed; + } return removed; } @@ -218,7 +217,13 @@ MipsInstrInfo::BranchType MipsInstrInfo::analyzeBranch( unsigned SecondLastOpc = 0; MachineInstr *SecondLastInst = nullptr; - if (++I != REnd) { + // Skip past any debug instruction to see if the second last actual + // is a branch. + ++I; + while (I != REnd && I->isDebugValue()) + ++I; + + if (I != REnd) { SecondLastInst = &*I; SecondLastOpc = getAnalyzableBrOpc(SecondLastInst->getOpcode()); @@ -618,3 +623,39 @@ bool MipsInstrInfo::verifyInstruction(const MachineInstr &MI, return true; } +std::pair +MipsInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF, 0u); +} + +ArrayRef> +MipsInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace MipsII; + + static const std::pair Flags[] = { + {MO_GOT, "mips-got"}, + {MO_GOT_CALL, "mips-got-call"}, + {MO_GPREL, "mips-gprel"}, + {MO_ABS_HI, "mips-abs-hi"}, + {MO_ABS_LO, "mips-abs-lo"}, + {MO_TLSGD, "mips-tlsgd"}, + {MO_TLSLDM, "mips-tlsldm"}, + {MO_DTPREL_HI, "mips-dtprel-hi"}, + {MO_DTPREL_LO, "mips-dtprel-lo"}, + {MO_GOTTPREL, "mips-gottprel"}, + {MO_TPREL_HI, "mips-tprel-hi"}, + {MO_TPREL_LO, "mips-tprel-lo"}, + {MO_GPOFF_HI, "mips-gpoff-hi"}, + {MO_GPOFF_LO, "mips-gpoff-lo"}, + {MO_GOT_DISP, "mips-got-disp"}, + {MO_GOT_PAGE, "mips-got-page"}, + {MO_GOT_OFST, "mips-got-ofst"}, + {MO_HIGHER, "mips-higher"}, + {MO_HIGHEST, "mips-highest"}, + {MO_GOT_HI16, "mips-got-hi16"}, + {MO_GOT_LO16, "mips-got-lo16"}, + {MO_CALL_HI16, "mips-call-hi16"}, + {MO_CALL_LO16, "mips-call-lo16"} + }; + return makeArrayRef(Flags); +} diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 0830b57cd0523..a5ed1be3bee56 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -152,6 +152,12 @@ class MipsInstrInfo : public MipsGenInstrInfo { bool verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; + std::pair + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + + ArrayRef> + getSerializableDirectMachineOperandTargetFlags() const override; + protected: bool isZeroImm(const MachineOperand &op) const; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 50515808376fb..c4c3eb760c57c 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -1222,6 +1222,20 @@ def immZExtRange2To64 : PatLeaf<(imm), [{ (N->getZExtValue() <= 64); }]>; +def ORiPred : PatLeaf<(imm), [{ + return isUInt<16>(N->getZExtValue()) && !isInt<16>(N->getSExtValue()); +}], LO16>; + +def LUiPred : PatLeaf<(imm), [{ + int64_t Val = N->getSExtValue(); + return !isInt<16>(Val) && isInt<32>(Val) && !(Val & 0xffff); +}]>; + +def LUiORiPred : PatLeaf<(imm), [{ + int64_t SVal = N->getSExtValue(); + return isInt<32>(SVal) && (SVal & 0xffff); +}]>; + // Mips Address Mode! SDNode frameindex could possibily be a match // since load and store instructions from stack used it. def addr : @@ -1399,27 +1413,47 @@ class SW_FT3 : + RegisterOperand RO> : InstSE<(outs), (ins RO:$rs, RO:$rt, opnd:$offset), !strconcat(opstr, "\t$rs, $rt, $offset"), [(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], II_BCC, FrmI, opstr> { let isBranch = 1; let isTerminator = 1; - let hasDelaySlot = DelaySlot; + let hasDelaySlot = 1; + let Defs = [AT]; + bit isCTI = 1; +} + +class CBranchLikely : + InstSE<(outs), (ins RO:$rs, RO:$rt, opnd:$offset), + !strconcat(opstr, "\t$rs, $rt, $offset"), [], II_BCC, FrmI, opstr> { + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; let Defs = [AT]; bit isCTI = 1; } class CBranchZero : + RegisterOperand RO> : InstSE<(outs), (ins RO:$rs, opnd:$offset), !strconcat(opstr, "\t$rs, $offset"), [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], II_BCCZ, FrmI, opstr> { let isBranch = 1; let isTerminator = 1; - let hasDelaySlot = DelaySlot; + let hasDelaySlot = 1; + let Defs = [AT]; + bit isCTI = 1; +} + +class CBranchZeroLikely : + InstSE<(outs), (ins RO:$rs, opnd:$offset), + !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZ, FrmI, opstr> { + let isBranch = 1; + let isTerminator = 1; + let hasDelaySlot = 1; let Defs = [AT]; bit isCTI = 1; } @@ -1495,10 +1529,10 @@ let isCall=1, hasDelaySlot=1, isCTI=1, Defs = [RA] in { [], II_JALR, FrmR, opstr>; class BGEZAL_FT : + RegisterOperand RO> : InstSE<(outs), (ins RO:$rs, opnd:$offset), !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZAL, FrmI, opstr> { - let hasDelaySlot = DelaySlot; + let hasDelaySlot = 1; } } @@ -2011,26 +2045,26 @@ def J : MMRel, JumpFJ, FJ<2>, AdditionalRequires<[RelocNotPIC]>, IsBranch; def JR : MMRel, IndirectBranch<"jr", GPR32Opnd>, MTLO_FM<8>, ISA_MIPS1_NOT_32R6_64R6; def BEQ : MMRel, CBranch<"beq", brtarget, seteq, GPR32Opnd>, BEQ_FM<4>; -def BEQL : MMRel, CBranch<"beql", brtarget, seteq, GPR32Opnd, 0>, +def BEQL : MMRel, CBranchLikely<"beql", brtarget, GPR32Opnd>, BEQ_FM<20>, ISA_MIPS2_NOT_32R6_64R6; def BNE : MMRel, CBranch<"bne", brtarget, setne, GPR32Opnd>, BEQ_FM<5>; -def BNEL : MMRel, CBranch<"bnel", brtarget, setne, GPR32Opnd, 0>, +def BNEL : MMRel, CBranchLikely<"bnel", brtarget, GPR32Opnd>, BEQ_FM<21>, ISA_MIPS2_NOT_32R6_64R6; def BGEZ : MMRel, CBranchZero<"bgez", brtarget, setge, GPR32Opnd>, BGEZ_FM<1, 1>; -def BGEZL : MMRel, CBranchZero<"bgezl", brtarget, setge, GPR32Opnd, 0>, +def BGEZL : MMRel, CBranchZeroLikely<"bgezl", brtarget, GPR32Opnd>, BGEZ_FM<1, 3>, ISA_MIPS2_NOT_32R6_64R6; def BGTZ : MMRel, CBranchZero<"bgtz", brtarget, setgt, GPR32Opnd>, BGEZ_FM<7, 0>; -def BGTZL : MMRel, CBranchZero<"bgtzl", brtarget, setgt, GPR32Opnd, 0>, +def BGTZL : MMRel, CBranchZeroLikely<"bgtzl", brtarget, GPR32Opnd>, BGEZ_FM<23, 0>, ISA_MIPS2_NOT_32R6_64R6; def BLEZ : MMRel, CBranchZero<"blez", brtarget, setle, GPR32Opnd>, BGEZ_FM<6, 0>; -def BLEZL : MMRel, CBranchZero<"blezl", brtarget, setle, GPR32Opnd, 0>, +def BLEZL : MMRel, CBranchZeroLikely<"blezl", brtarget, GPR32Opnd>, BGEZ_FM<22, 0>, ISA_MIPS2_NOT_32R6_64R6; def BLTZ : MMRel, CBranchZero<"bltz", brtarget, setlt, GPR32Opnd>, BGEZ_FM<1, 0>; -def BLTZL : MMRel, CBranchZero<"bltzl", brtarget, setlt, GPR32Opnd, 0>, +def BLTZL : MMRel, CBranchZeroLikely<"bltzl", brtarget, GPR32Opnd>, BGEZ_FM<1, 2>, ISA_MIPS2_NOT_32R6_64R6; def B : UncondBranch; @@ -2044,11 +2078,11 @@ def JALX : MMRel, JumpLink<"jalx", calltarget>, FJ<0x1D>, ISA_MIPS32_NOT_32R6_64R6; def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>, ISA_MIPS1_NOT_32R6_64R6; -def BGEZALL : MMRel, BGEZAL_FT<"bgezall", brtarget, GPR32Opnd, 0>, +def BGEZALL : MMRel, BGEZAL_FT<"bgezall", brtarget, GPR32Opnd>, BGEZAL_FM<0x13>, ISA_MIPS2_NOT_32R6_64R6; def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>, ISA_MIPS1_NOT_32R6_64R6; -def BLTZALL : MMRel, BGEZAL_FT<"bltzall", brtarget, GPR32Opnd, 0>, +def BLTZALL : MMRel, BGEZAL_FT<"bltzall", brtarget, GPR32Opnd>, BGEZAL_FM<0x12>, ISA_MIPS2_NOT_32R6_64R6; def BAL_BR : BAL_BR_Pseudo; @@ -2696,15 +2730,20 @@ multiclass MaterializeImms { -// Small immediates -def : MipsPat<(VT immSExt16:$imm), (ADDiuOp ZEROReg, imm:$imm)>; -def : MipsPat<(VT immZExt16:$imm), (ORiOp ZEROReg, imm:$imm)>; +// Constant synthesis previously relied on the ordering of the patterns below. +// By making the predicates they use non-overlapping, the patterns were +// reordered so that the effect of the newly introduced predicates can be +// observed. + +// Arbitrary immediates +def : MipsPat<(VT LUiORiPred:$imm), (ORiOp (LUiOp (HI16 imm:$imm)), (LO16 imm:$imm))>; // Bits 32-16 set, sign/zero extended. -def : MipsPat<(VT immSExt32Low16Zero:$imm), (LUiOp (HI16 imm:$imm))>; +def : MipsPat<(VT LUiPred:$imm), (LUiOp (HI16 imm:$imm))>; -// Arbitrary immediates -def : MipsPat<(VT immSExt32:$imm), (ORiOp (LUiOp (HI16 imm:$imm)), (LO16 imm:$imm))>; +// Small immediates +def : MipsPat<(VT ORiPred:$imm), (ORiOp ZEROReg, imm:$imm)>; +def : MipsPat<(VT immSExt16:$imm), (ADDiuOp ZEROReg, imm:$imm)>; } let AdditionalPredicates = [NotInMicroMips] in diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 4cf6235d3e020..9c64a0ecbb152 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -58,7 +58,7 @@ MipsRegisterInfo::getPointerRegClass(const MachineFunction &MF, : &Mips::GPRMM16RegClass; case MipsPtrClass::StackPointer: return ABI.ArePtrs64bit() ? &Mips::SP64RegClass : &Mips::SP32RegClass; - case MipsPtrClass::GlobalPointer: + case MipsPtrClass::GlobalPointer: return ABI.ArePtrs64bit() ? &Mips::GP64RegClass : &Mips::GP32RegClass; } diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 4c7e25b4b61af..0b19b18449e08 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -424,7 +424,6 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF, MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - MachineLocation DstML, SrcML; // Adjust stack. TII.adjustStackPtr(SP, -StackSize, MBB, MBBI); diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 9439c51a3ad46..b1311fbd90e1b 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -226,6 +226,8 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::SW; else if (Mips::HI64RegClass.hasSubClassEq(RC)) Opc = Mips::SD; + else if (Mips::DSPRRegClass.hasSubClassEq(RC)) + Opc = Mips::SWDSP; // Hi, Lo are normally caller save but they are callee save // for interrupt handling. @@ -302,6 +304,8 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::LW; else if (Mips::LO64RegClass.hasSubClassEq(RC)) Opc = Mips::LD; + else if (Mips::DSPRRegClass.hasSubClassEq(RC)) + Opc = Mips::LWDSP; assert(Opc && "Register class not handled!"); diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 7d9f99ce071e8..b295c16ea81cb 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -77,6 +77,7 @@ class MipsTargetStreamer : public MCTargetStreamer { virtual void emitDirectiveSetMips64R5(); virtual void emitDirectiveSetMips64R6(); virtual void emitDirectiveSetDsp(); + virtual void emitDirectiveSetDspr2(); virtual void emitDirectiveSetNoDsp(); virtual void emitDirectiveSetPop(); virtual void emitDirectiveSetPush(); @@ -244,6 +245,7 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer { void emitDirectiveSetMips64R5() override; void emitDirectiveSetMips64R6() override; void emitDirectiveSetDsp() override; + void emitDirectiveSetDspr2() override; void emitDirectiveSetNoDsp() override; void emitDirectiveSetPop() override; void emitDirectiveSetPush() override; diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 2f389860d142a..a7e58fa9738d9 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -496,8 +496,318 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) { SelectCode(N); } +// Each instruction has four addressing variants. WMMA_VARIANTS() macro below +// constructs an array indexed by WmmaVariant which getWmmaLdVariant() uses to +// look up the intrinsic ID of particular variant. +enum WmmaVariant { + WMMA_VARIANT_ARI64, + WMMA_VARIANT_ARI64_STRIDE, + WMMA_VARIANT_AVAR, + WMMA_VARIANT_AVAR_STRIDE, +}; + +// clang-format off +#define WMMA_VARIANTS(base) \ + {{ base##_ari64, base##_ari64_stride, base##_avar, base##_avar_stride }} +// clang-format on + +static unsigned getWmmaLdVariant(WmmaVariant Variant, bool Stride, + const std::array Variants) { + if (Stride) { + if (Variant == WMMA_VARIANT_ARI64) + Variant = WMMA_VARIANT_ARI64_STRIDE; + else if (Variant == WMMA_VARIANT_AVAR) + Variant = WMMA_VARIANT_AVAR_STRIDE; + } + return Variants[Variant]; +} + +static Optional +getWmmaLdStOpcode(unsigned IntrinsicID, + WmmaVariant Variant = WMMA_VARIANT_ARI64) { + switch (IntrinsicID) { + default: + return None; + // + // WMMA_LOAD_A f16 + // + case Intrinsic::nvvm_wmma_load_a_f16_col: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col)); + case Intrinsic::nvvm_wmma_load_a_f16_row: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row)); + case Intrinsic::nvvm_wmma_load_a_f16_col_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col)); + case Intrinsic::nvvm_wmma_load_a_f16_row_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row)); + case Intrinsic::nvvm_wmma_load_a_f16_col_shared: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col_shared)); + case Intrinsic::nvvm_wmma_load_a_f16_row_shared: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row_shared)); + case Intrinsic::nvvm_wmma_load_a_f16_col_shared_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col_shared)); + case Intrinsic::nvvm_wmma_load_a_f16_row_shared_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row_shared)); + case Intrinsic::nvvm_wmma_load_a_f16_col_global: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col_global)); + case Intrinsic::nvvm_wmma_load_a_f16_row_global: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row_global)); + case Intrinsic::nvvm_wmma_load_a_f16_col_global_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_col_global)); + case Intrinsic::nvvm_wmma_load_a_f16_row_global_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_A_row_global)); + + // + // WMMA_LOAD_B f16 + // + case Intrinsic::nvvm_wmma_load_b_f16_col: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col)); + case Intrinsic::nvvm_wmma_load_b_f16_row: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row)); + case Intrinsic::nvvm_wmma_load_b_f16_col_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col)); + case Intrinsic::nvvm_wmma_load_b_f16_row_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row)); + case Intrinsic::nvvm_wmma_load_b_f16_col_shared: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col_shared)); + case Intrinsic::nvvm_wmma_load_b_f16_row_shared: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row_shared)); + case Intrinsic::nvvm_wmma_load_b_f16_col_shared_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col_shared)); + case Intrinsic::nvvm_wmma_load_b_f16_row_shared_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row_shared)); + case Intrinsic::nvvm_wmma_load_b_f16_col_global: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col_global)); + case Intrinsic::nvvm_wmma_load_b_f16_row_global: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row_global)); + case Intrinsic::nvvm_wmma_load_b_f16_col_global_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_col_global)); + case Intrinsic::nvvm_wmma_load_b_f16_row_global_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_B_row_global)); + + // + // WMMA_LOAD_C f16 + // + case Intrinsic::nvvm_wmma_load_c_f16_col: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col)); + case Intrinsic::nvvm_wmma_load_c_f16_row: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row)); + case Intrinsic::nvvm_wmma_load_c_f16_col_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col)); + case Intrinsic::nvvm_wmma_load_c_f16_row_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row)); + case Intrinsic::nvvm_wmma_load_c_f16_col_shared: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col_shared)); + case Intrinsic::nvvm_wmma_load_c_f16_row_shared: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row_shared)); + case Intrinsic::nvvm_wmma_load_c_f16_col_shared_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col_shared)); + case Intrinsic::nvvm_wmma_load_c_f16_row_shared_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row_shared)); + case Intrinsic::nvvm_wmma_load_c_f16_col_global: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col_global)); + case Intrinsic::nvvm_wmma_load_c_f16_row_global: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row_global)); + case Intrinsic::nvvm_wmma_load_c_f16_col_global_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_col_global)); + case Intrinsic::nvvm_wmma_load_c_f16_row_global_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f16_row_global)); + + // + // WMMA_LOAD_C f32 + // + case Intrinsic::nvvm_wmma_load_c_f32_col: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col)); + case Intrinsic::nvvm_wmma_load_c_f32_row: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row)); + case Intrinsic::nvvm_wmma_load_c_f32_col_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col)); + case Intrinsic::nvvm_wmma_load_c_f32_row_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row)); + case Intrinsic::nvvm_wmma_load_c_f32_col_shared: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col_shared)); + case Intrinsic::nvvm_wmma_load_c_f32_row_shared: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row_shared)); + case Intrinsic::nvvm_wmma_load_c_f32_col_shared_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col_shared)); + case Intrinsic::nvvm_wmma_load_c_f32_row_shared_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row_shared)); + case Intrinsic::nvvm_wmma_load_c_f32_col_global: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col_global)); + case Intrinsic::nvvm_wmma_load_c_f32_row_global: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row_global)); + case Intrinsic::nvvm_wmma_load_c_f32_col_global_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_col_global)); + case Intrinsic::nvvm_wmma_load_c_f32_row_global_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_LOAD_C_f32_row_global)); + + // + // WMMA_STORE_D f16 + // + case Intrinsic::nvvm_wmma_store_d_f16_col: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col)); + case Intrinsic::nvvm_wmma_store_d_f16_row: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row)); + case Intrinsic::nvvm_wmma_store_d_f16_col_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col)); + case Intrinsic::nvvm_wmma_store_d_f16_row_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row)); + case Intrinsic::nvvm_wmma_store_d_f16_col_shared: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col_shared)); + case Intrinsic::nvvm_wmma_store_d_f16_row_shared: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row_shared)); + case Intrinsic::nvvm_wmma_store_d_f16_col_shared_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col_shared)); + case Intrinsic::nvvm_wmma_store_d_f16_row_shared_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row_shared)); + case Intrinsic::nvvm_wmma_store_d_f16_col_global: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col_global)); + case Intrinsic::nvvm_wmma_store_d_f16_row_global: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row_global)); + case Intrinsic::nvvm_wmma_store_d_f16_col_global_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_col_global)); + case Intrinsic::nvvm_wmma_store_d_f16_row_global_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f16_row_global)); + + // + // WMMA_STORE_D f32 + // + case Intrinsic::nvvm_wmma_store_d_f32_col: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col)); + case Intrinsic::nvvm_wmma_store_d_f32_row: + return getWmmaLdVariant(Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row)); + case Intrinsic::nvvm_wmma_store_d_f32_col_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col)); + case Intrinsic::nvvm_wmma_store_d_f32_row_stride: + return getWmmaLdVariant(Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row)); + case Intrinsic::nvvm_wmma_store_d_f32_col_shared: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col_shared)); + case Intrinsic::nvvm_wmma_store_d_f32_row_shared: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row_shared)); + case Intrinsic::nvvm_wmma_store_d_f32_col_shared_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col_shared)); + case Intrinsic::nvvm_wmma_store_d_f32_row_shared_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row_shared)); + case Intrinsic::nvvm_wmma_store_d_f32_col_global: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col_global)); + case Intrinsic::nvvm_wmma_store_d_f32_row_global: + return getWmmaLdVariant( + Variant, /*Stride=*/false, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row_global)); + case Intrinsic::nvvm_wmma_store_d_f32_col_global_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_col_global)); + case Intrinsic::nvvm_wmma_store_d_f32_row_global_stride: + return getWmmaLdVariant( + Variant, /*Stride=*/true, + WMMA_VARIANTS(NVPTX::INT_WMMA_STORE_D_f32_row_global)); + } +} +#undef WMMA_VARIANTS + bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) { unsigned IID = cast(N->getOperand(1))->getZExtValue(); + if (getWmmaLdStOpcode(IID)) + return tryWMMA_LDST(N); + switch (IID) { default: return false; @@ -719,6 +1029,39 @@ bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) { case Intrinsic::nvvm_match_all_sync_i64p: SelectMatchAll(N); return true; + case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f16: + case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f16_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f32: + case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f32_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f16: + case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f16_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f32: + case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f32_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f16: + case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f16_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f32: + case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f32_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f16: + case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f16_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f32: + case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f32_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f16: + case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f16_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f32: + case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f32_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f16: + case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f16_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f32: + case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f32_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f16: + case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f16_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f32: + case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f32_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f16: + case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f16_satfinite: + case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f32: + case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f32_satfinite: + return tryWMMA_MMA(N); } } @@ -3725,3 +4068,172 @@ unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy, } } } + +bool NVPTXDAGToDAGISel::tryWMMA_LDST(SDNode *N) { + SDValue Chain = N->getOperand(0); + unsigned IID = cast(N->getOperand(1))->getZExtValue(); + SDValue Op1 = N->getOperand(2); + SDValue Addr, Offset, Base; + Optional Opcode; + SDLoc DL(N); + MemSDNode *MemSD = cast(N); + WmmaVariant Variant; + SmallVector Ops; + bool isStore = N->getNumValues() == 1; // Store ops only return a chain. + + if (SelectDirectAddr(Op1, Addr)) { + Variant = WMMA_VARIANT_AVAR; + Ops.push_back(Addr); + } else if (SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) || + SelectADDRri64(Op1.getNode(), Op1, Base, Offset)) { + Variant = WMMA_VARIANT_ARI64; + Ops.push_back(Base); + Ops.push_back(Offset); + } else { + Variant = WMMA_VARIANT_AVAR; + Ops.push_back(Op1); + } + unsigned NumOps = N->getNumOperands(); + // Pass through the rest of the operands to the machine node. + for (unsigned i = 3; i < NumOps; ++i) + Ops.push_back(N->getOperand(i)); + Ops.push_back(Chain); + + Opcode = getWmmaLdStOpcode(IID, Variant); + if (!Opcode) { + llvm::errs() << "tryWMMALD - no Opcode.\n"; + return false; + } + + EVT MemVT = MemSD->getMemoryVT(); + assert(MemVT.isVector() && "Expected vector return type."); + + SDNode *MN; + if (isStore) { + MN = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops); + } else { + SmallVector InstVTs(MemVT.getVectorNumElements(), + MemSD->getValueType(0)); + InstVTs.push_back(MVT::Other); + MN = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTs, Ops); + } + + ReplaceNode(N, MN); + return true; +} + +bool NVPTXDAGToDAGISel::tryWMMA_MMA(SDNode *N) { + unsigned IID = cast(N->getOperand(0))->getZExtValue(); + SDLoc DL(N); + unsigned Opc; + + switch (IID) { + default: + return false; + case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f16: + Opc = NVPTX::INT_WMMA_MMA_col_col_f16_f16; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f16_satfinite: + Opc = NVPTX::INT_WMMA_MMA_col_col_f16_f16_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f32: + Opc = NVPTX::INT_WMMA_MMA_col_col_f16_f32; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_col_f16_f32_satfinite: + Opc = NVPTX::INT_WMMA_MMA_col_col_f16_f32_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f16: + Opc = NVPTX::INT_WMMA_MMA_col_col_f32_f16; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f16_satfinite: + Opc = NVPTX::INT_WMMA_MMA_col_col_f32_f16_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f32: + Opc = NVPTX::INT_WMMA_MMA_col_col_f32_f32; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_col_f32_f32_satfinite: + Opc = NVPTX::INT_WMMA_MMA_col_col_f32_f32_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f16: + Opc = NVPTX::INT_WMMA_MMA_col_row_f16_f16; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f16_satfinite: + Opc = NVPTX::INT_WMMA_MMA_col_row_f16_f16_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f32: + Opc = NVPTX::INT_WMMA_MMA_col_row_f16_f32; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_row_f16_f32_satfinite: + Opc = NVPTX::INT_WMMA_MMA_col_row_f16_f32_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f16: + Opc = NVPTX::INT_WMMA_MMA_col_row_f32_f16; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f16_satfinite: + Opc = NVPTX::INT_WMMA_MMA_col_row_f32_f16_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f32: + Opc = NVPTX::INT_WMMA_MMA_col_row_f32_f32; + break; + case Intrinsic::nvvm_wmma_mma_sync_col_row_f32_f32_satfinite: + Opc = NVPTX::INT_WMMA_MMA_col_row_f32_f32_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f16: + Opc = NVPTX::INT_WMMA_MMA_row_col_f16_f16; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f16_satfinite: + Opc = NVPTX::INT_WMMA_MMA_row_col_f16_f16_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f32: + Opc = NVPTX::INT_WMMA_MMA_row_col_f16_f32; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_col_f16_f32_satfinite: + Opc = NVPTX::INT_WMMA_MMA_row_col_f16_f32_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f16: + Opc = NVPTX::INT_WMMA_MMA_row_col_f32_f16; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f16_satfinite: + Opc = NVPTX::INT_WMMA_MMA_row_col_f32_f16_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f32: + Opc = NVPTX::INT_WMMA_MMA_row_col_f32_f32; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_col_f32_f32_satfinite: + Opc = NVPTX::INT_WMMA_MMA_row_col_f32_f32_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f16: + Opc = NVPTX::INT_WMMA_MMA_row_row_f16_f16; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f16_satfinite: + Opc = NVPTX::INT_WMMA_MMA_row_row_f16_f16_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f32: + Opc = NVPTX::INT_WMMA_MMA_row_row_f16_f32; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_row_f16_f32_satfinite: + Opc = NVPTX::INT_WMMA_MMA_row_row_f16_f32_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f16: + Opc = NVPTX::INT_WMMA_MMA_row_row_f32_f16; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f16_satfinite: + Opc = NVPTX::INT_WMMA_MMA_row_row_f32_f16_satfinite; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f32: + Opc = NVPTX::INT_WMMA_MMA_row_row_f32_f32; + break; + case Intrinsic::nvvm_wmma_mma_sync_row_row_f32_f32_satfinite: + Opc = NVPTX::INT_WMMA_MMA_row_row_f32_f32_satfinite; + break; + } + + SmallVector Ops; + // Pass through operands and return value types to the machine node. + for (unsigned i = 1; i < N->getNumOperands(); ++i) + Ops.push_back(N->getOperand(i)); + SmallVector InstVTs(N->getNumValues(), N->getValueType(0)); + SDNode *MN = CurDAG->getMachineNode(Opc, DL, InstVTs, Ops); + ReplaceNode(N, MN); + return true; +} diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 3ce7843b72fa3..b23c27581a174 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -74,6 +74,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { bool tryConstantFP16(SDNode *N); bool SelectSETP_F16X2(SDNode *N); bool tryEXTRACT_VECTOR_ELEMENT(SDNode *N); + bool tryWMMA_LDST(SDNode *N); + bool tryWMMA_MMA(SDNode *N); inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) { return CurDAG->getTargetConstant(Imm, DL, MVT::i32); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 150e67a833f12..7b9acb20b759c 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3321,6 +3321,132 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( switch (Intrinsic) { default: return false; + case Intrinsic::nvvm_wmma_load_a_f16_col: + case Intrinsic::nvvm_wmma_load_a_f16_row: + case Intrinsic::nvvm_wmma_load_a_f16_col_stride: + case Intrinsic::nvvm_wmma_load_a_f16_row_stride: + case Intrinsic::nvvm_wmma_load_a_f16_col_shared: + case Intrinsic::nvvm_wmma_load_a_f16_row_shared: + case Intrinsic::nvvm_wmma_load_a_f16_col_shared_stride: + case Intrinsic::nvvm_wmma_load_a_f16_row_shared_stride: + case Intrinsic::nvvm_wmma_load_a_f16_col_global: + case Intrinsic::nvvm_wmma_load_a_f16_row_global: + case Intrinsic::nvvm_wmma_load_a_f16_col_global_stride: + case Intrinsic::nvvm_wmma_load_a_f16_row_global_stride: + case Intrinsic::nvvm_wmma_load_b_f16_col: + case Intrinsic::nvvm_wmma_load_b_f16_row: + case Intrinsic::nvvm_wmma_load_b_f16_col_stride: + case Intrinsic::nvvm_wmma_load_b_f16_row_stride: + case Intrinsic::nvvm_wmma_load_b_f16_col_shared: + case Intrinsic::nvvm_wmma_load_b_f16_row_shared: + case Intrinsic::nvvm_wmma_load_b_f16_col_shared_stride: + case Intrinsic::nvvm_wmma_load_b_f16_row_shared_stride: + case Intrinsic::nvvm_wmma_load_b_f16_col_global: + case Intrinsic::nvvm_wmma_load_b_f16_row_global: + case Intrinsic::nvvm_wmma_load_b_f16_col_global_stride: + case Intrinsic::nvvm_wmma_load_b_f16_row_global_stride: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::v8f16; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + + case Intrinsic::nvvm_wmma_load_c_f16_col: + case Intrinsic::nvvm_wmma_load_c_f16_row: + case Intrinsic::nvvm_wmma_load_c_f16_col_stride: + case Intrinsic::nvvm_wmma_load_c_f16_row_stride: + case Intrinsic::nvvm_wmma_load_c_f16_col_shared: + case Intrinsic::nvvm_wmma_load_c_f16_row_shared: + case Intrinsic::nvvm_wmma_load_c_f16_col_shared_stride: + case Intrinsic::nvvm_wmma_load_c_f16_row_shared_stride: + case Intrinsic::nvvm_wmma_load_c_f16_col_global: + case Intrinsic::nvvm_wmma_load_c_f16_row_global: + case Intrinsic::nvvm_wmma_load_c_f16_col_global_stride: + case Intrinsic::nvvm_wmma_load_c_f16_row_global_stride: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::v4f16; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + + case Intrinsic::nvvm_wmma_load_c_f32_col: + case Intrinsic::nvvm_wmma_load_c_f32_row: + case Intrinsic::nvvm_wmma_load_c_f32_col_stride: + case Intrinsic::nvvm_wmma_load_c_f32_row_stride: + case Intrinsic::nvvm_wmma_load_c_f32_col_shared: + case Intrinsic::nvvm_wmma_load_c_f32_row_shared: + case Intrinsic::nvvm_wmma_load_c_f32_col_shared_stride: + case Intrinsic::nvvm_wmma_load_c_f32_row_shared_stride: + case Intrinsic::nvvm_wmma_load_c_f32_col_global: + case Intrinsic::nvvm_wmma_load_c_f32_row_global: + case Intrinsic::nvvm_wmma_load_c_f32_col_global_stride: + case Intrinsic::nvvm_wmma_load_c_f32_row_global_stride: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::v8f32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = false; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + + case Intrinsic::nvvm_wmma_store_d_f16_col: + case Intrinsic::nvvm_wmma_store_d_f16_row: + case Intrinsic::nvvm_wmma_store_d_f16_col_stride: + case Intrinsic::nvvm_wmma_store_d_f16_row_stride: + case Intrinsic::nvvm_wmma_store_d_f16_col_shared: + case Intrinsic::nvvm_wmma_store_d_f16_row_shared: + case Intrinsic::nvvm_wmma_store_d_f16_col_shared_stride: + case Intrinsic::nvvm_wmma_store_d_f16_row_shared_stride: + case Intrinsic::nvvm_wmma_store_d_f16_col_global: + case Intrinsic::nvvm_wmma_store_d_f16_row_global: + case Intrinsic::nvvm_wmma_store_d_f16_col_global_stride: + case Intrinsic::nvvm_wmma_store_d_f16_row_global_stride: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::v4f16; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = false; + Info.readMem = false; + Info.writeMem = true; + Info.align = 16; + return true; + } + + case Intrinsic::nvvm_wmma_store_d_f32_col: + case Intrinsic::nvvm_wmma_store_d_f32_row: + case Intrinsic::nvvm_wmma_store_d_f32_col_stride: + case Intrinsic::nvvm_wmma_store_d_f32_row_stride: + case Intrinsic::nvvm_wmma_store_d_f32_col_shared: + case Intrinsic::nvvm_wmma_store_d_f32_row_shared: + case Intrinsic::nvvm_wmma_store_d_f32_col_shared_stride: + case Intrinsic::nvvm_wmma_store_d_f32_row_shared_stride: + case Intrinsic::nvvm_wmma_store_d_f32_col_global: + case Intrinsic::nvvm_wmma_store_d_f32_row_global: + case Intrinsic::nvvm_wmma_store_d_f32_col_global_stride: + case Intrinsic::nvvm_wmma_store_d_f32_row_global_stride: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::v8f32; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = false; + Info.readMem = false; + Info.writeMem = true; + Info.align = 16; + return true; + } case Intrinsic::nvvm_atomic_load_add_f32: case Intrinsic::nvvm_atomic_load_inc_32: diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 11ebaaa5407c8..f745b6f66353f 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -7368,3 +7368,208 @@ def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; def INT_PTX_SREG_WARPSIZE : NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; + +// +// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] +// +class WMMA_LOAD_ALSTOS + : NVPTXInst, + Requires<[hasPTX60, hasSM70]>; + +multiclass WMMA_LOAD_ALSTO { + def _stride: WMMA_LOAD_ALSTOS; + def NAME: WMMA_LOAD_ALSTOS; +} + +multiclass WMMA_LOAD_ALST { + defm _avar: WMMA_LOAD_ALSTO; + defm _ari64: WMMA_LOAD_ALSTO; +} + +multiclass WMMA_LOAD_ALT { + defm _global: WMMA_LOAD_ALST; + defm _shared: WMMA_LOAD_ALST; + defm NAME: WMMA_LOAD_ALST; +} + +multiclass WMMA_LOAD_AT { + defm _row: WMMA_LOAD_ALT; + defm _col: WMMA_LOAD_ALT; +} + +defm INT_WMMA_LOAD_A: WMMA_LOAD_AT<"a", "f16", Float16x2Regs>; +defm INT_WMMA_LOAD_B: WMMA_LOAD_AT<"b", "f16", Float16x2Regs>; +defm INT_WMMA_LOAD_C_f16: WMMA_LOAD_AT<"c", "f16", Float16x2Regs>; +defm INT_WMMA_LOAD_C_f32: WMMA_LOAD_AT<"c", "f32", Float32Regs>; + +// +// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] +// +class WMMA_STORE_D_LSTOS + : NVPTXInst<(outs), + !if(!eq(Type,"f16"), + !if(WithStride, + !if(WithOffset, + (ins DstOp:$src, i32imm:$offset, + regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3, + Int32Regs:$ldm), + (ins DstOp:$src, + regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3, + Int32Regs:$ldm)), + !if(WithOffset, + (ins DstOp:$src, i32imm:$offset, + regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3), + (ins DstOp:$src, + regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3))), + !if(WithStride, + !if(WithOffset, + (ins DstOp:$src, i32imm:$offset, + regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3, + regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7, + Int32Regs:$ldm), + (ins DstOp:$src, + regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3, + regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7, + Int32Regs:$ldm)), + !if(WithOffset, + (ins DstOp:$src, i32imm:$offset, + regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3, + regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7), + (ins DstOp:$src, + regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3, + regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7)))), + "wmma.store.d.sync."#Layout#".m16n16k16"#Space#"." #Type# " \t" + #!if(WithOffset,"[$src+$offset], ", "[$src], ") + #!if(!eq(Type,"f16"), + "{{$r0, $r1, $r2, $r3}}", + "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}") + #!if(WithStride, ", $ldm", "") + #";", + []>, + Requires<[hasPTX60, hasSM70]>; + +multiclass WMMA_STORE_D_LSTO { + def _stride: WMMA_STORE_D_LSTOS; + def NAME: WMMA_STORE_D_LSTOS; +} + +multiclass WMMA_STORE_D_LST { + defm _avar: WMMA_STORE_D_LSTO; + defm _ari64: WMMA_STORE_D_LSTO; +} + +multiclass WMMA_STORE_D_LT { + defm _global: WMMA_STORE_D_LST; + defm _shared: WMMA_STORE_D_LST; + defm NAME: WMMA_STORE_D_LST; +} + +multiclass WMMA_STORE_D_T { + defm _row: WMMA_STORE_D_LT<"row", Type, regclass>; + defm _col: WMMA_STORE_D_LT<"col", Type, regclass>; +} + +defm INT_WMMA_STORE_D_f16: WMMA_STORE_D_T<"f16", Float16x2Regs>; +defm INT_WMMA_STORE_D_f32: WMMA_STORE_D_T<"f32", Float32Regs>; + +// WMMA.MMA +class WMMA_MMA_ABDCS + : NVPTXInst, + Requires<[hasPTX60, hasSM70]>; + +multiclass WMMA_MMA_ABDC { + def _satfinite: WMMA_MMA_ABDCS; + def NAME: WMMA_MMA_ABDCS; +} + +multiclass WMMA_MMA_ABD { + defm _f16: WMMA_MMA_ABDC; + defm _f32: WMMA_MMA_ABDC; +} + +multiclass WMMA_MMA_AB { + defm _f16: WMMA_MMA_ABD; + defm _f32: WMMA_MMA_ABD; +} + +multiclass WMMA_MMA_A { + defm _col: WMMA_MMA_AB; + defm _row: WMMA_MMA_AB; +} + +defm INT_WMMA_MMA_col: WMMA_MMA_A<"col">; +defm INT_WMMA_MMA_row: WMMA_MMA_A<"row">; + diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h index f987892ba6758..322e8f4d9a2c3 100644 --- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -63,6 +63,22 @@ class NVPTXTTIImpl : public BasicTTIImplBase { void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { + // Volatile loads/stores are only supported for shared and global address + // spaces, or for generic AS that maps to them. + if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC || + AddrSpace == llvm::ADDRESS_SPACE_GLOBAL || + AddrSpace == llvm::ADDRESS_SPACE_SHARED)) + return false; + + switch(I->getOpcode()){ + default: + return false; + case Instruction::Load: + case Instruction::Store: + return true; + } + } }; } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td deleted file mode 100644 index e69bbba9f193d..0000000000000 --- a/lib/Target/NVPTX/NVPTXVector.td +++ /dev/null @@ -1,1479 +0,0 @@ -//===- NVPTXVector.td - NVPTX Vector Specific Instruction defs -*- tblgen-*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//----------------------------------- -// Vector Specific -//----------------------------------- - -// -// All vector instructions derive from NVPTXVecInst -// - -class NVPTXVecInst pattern, - NVPTXInst sInst=NOP> - : NVPTXInst { - NVPTXInst scalarInst=sInst; -} - -let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in { -// Extract v2i16 -def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), - (ins V2I16Regs:$src, i8imm:$c), - "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int16Regs:$dst, (extractelt - (v2i16 V2I16Regs:$src), imm:$c))], - IMOV16rr>; - -// Extract v4i16 -def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst), - (ins V4I16Regs:$src, i8imm:$c), - "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int16Regs:$dst, (extractelt - (v4i16 V4I16Regs:$src), imm:$c))], - IMOV16rr>; - -// Extract v2i8 -def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), - (ins V2I8Regs:$src, i8imm:$c), - "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int8Regs:$dst, (extractelt - (v2i8 V2I8Regs:$src), imm:$c))], - IMOV8rr>; - -// Extract v4i8 -def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst), - (ins V4I8Regs:$src, i8imm:$c), - "mov.u16 \t$dst, $src${c:vecelem};", - [(set Int8Regs:$dst, (extractelt - (v4i8 V4I8Regs:$src), imm:$c))], - IMOV8rr>; - -// Extract v2i32 -def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), - (ins V2I32Regs:$src, i8imm:$c), - "mov.u32 \t$dst, $src${c:vecelem};", - [(set Int32Regs:$dst, (extractelt - (v2i32 V2I32Regs:$src), imm:$c))], - IMOV32rr>; - -// Extract v2f32 -def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), - (ins V2F32Regs:$src, i8imm:$c), - "mov.f32 \t$dst, $src${c:vecelem};", - [(set Float32Regs:$dst, (extractelt - (v2f32 V2F32Regs:$src), imm:$c))], - FMOV32rr>; - -// Extract v2i64 -def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst), - (ins V2I64Regs:$src, i8imm:$c), - "mov.u64 \t$dst, $src${c:vecelem};", - [(set Int64Regs:$dst, (extractelt - (v2i64 V2I64Regs:$src), imm:$c))], - IMOV64rr>; - -// Extract v2f64 -def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst), - (ins V2F64Regs:$src, i8imm:$c), - "mov.f64 \t$dst, $src${c:vecelem};", - [(set Float64Regs:$dst, (extractelt - (v2f64 V2F64Regs:$src), imm:$c))], - FMOV64rr>; - -// Extract v4i32 -def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst), - (ins V4I32Regs:$src, i8imm:$c), - "mov.u32 \t$dst, $src${c:vecelem};", - [(set Int32Regs:$dst, (extractelt - (v4i32 V4I32Regs:$src), imm:$c))], - IMOV32rr>; - -// Extract v4f32 -def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst), - (ins V4F32Regs:$src, i8imm:$c), - "mov.f32 \t$dst, $src${c:vecelem};", - [(set Float32Regs:$dst, (extractelt - (v4f32 V4F32Regs:$src), imm:$c))], - FMOV32rr>; -} - -let isAsCheapAsAMove=1, VecInstType=isVecInsert.Value in { -// Insert v2i8 -def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst), - (ins V2I8Regs:$src, Int8Regs:$val, i8imm:$c), - "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" - "\n\tmov.u16 \t$dst${c:vecelem}, $val;", - [(set V2I8Regs:$dst, - (insertelt V2I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>; - -// Insert v4i8 -def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst), - (ins V4I8Regs:$src, Int8Regs:$val, i8imm:$c), - "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" - "\n\tmov.u16 \t$dst${c:vecelem}, $val;", - [(set V4I8Regs:$dst, - (insertelt V4I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>; - -// Insert v2i16 -def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst), - (ins V2I16Regs:$src, Int16Regs:$val, i8imm:$c), - "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};" - "\n\tmov.u16 \t$dst${c:vecelem}, $val;", - [(set V2I16Regs:$dst, - (insertelt V2I16Regs:$src, Int16Regs:$val, imm:$c))], - IMOV16rr>; - -// Insert v4i16 -def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst), - (ins V4I16Regs:$src, Int16Regs:$val, i8imm:$c), - "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};" - "\n\tmov.u16 \t$dst${c:vecelem}, $val;", - [(set V4I16Regs:$dst, - (insertelt V4I16Regs:$src, Int16Regs:$val, imm:$c))], - IMOV16rr>; - -// Insert v2i32 -def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst), - (ins V2I32Regs:$src, Int32Regs:$val, i8imm:$c), - "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};" - "\n\tmov.u32 \t$dst${c:vecelem}, $val;", - [(set V2I32Regs:$dst, - (insertelt V2I32Regs:$src, Int32Regs:$val, imm:$c))], - IMOV32rr>; - -// Insert v2f32 -def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst), - (ins V2F32Regs:$src, Float32Regs:$val, i8imm:$c), - "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};" - "\n\tmov.f32 \t$dst${c:vecelem}, $val;", - [(set V2F32Regs:$dst, - (insertelt V2F32Regs:$src, Float32Regs:$val, imm:$c))], - FMOV32rr>; - -// Insert v2i64 -def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst), - (ins V2I64Regs:$src, Int64Regs:$val, i8imm:$c), - "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};" - "\n\tmov.u64 \t$dst${c:vecelem}, $val;", - [(set V2I64Regs:$dst, - (insertelt V2I64Regs:$src, Int64Regs:$val, imm:$c))], - IMOV64rr>; - -// Insert v2f64 -def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst), - (ins V2F64Regs:$src, Float64Regs:$val, i8imm:$c), - "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};" - "\n\tmov.f64 \t$dst${c:vecelem}, $val;", - [(set V2F64Regs:$dst, - (insertelt V2F64Regs:$src, Float64Regs:$val, imm:$c))], - FMOV64rr>; - -// Insert v4i32 -def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst), - (ins V4I32Regs:$src, Int32Regs:$val, i8imm:$c), - "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};" - "\n\tmov.u32 \t$dst${c:vecelem}, $val;", - [(set V4I32Regs:$dst, - (insertelt V4I32Regs:$src, Int32Regs:$val, imm:$c))], - IMOV32rr>; - -// Insert v4f32 -def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst), - (ins V4F32Regs:$src, Float32Regs:$val, i8imm:$c), - "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};" - "\n\tmov.f32 \t$dst${c:vecelem}, $val;", - [(set V4F32Regs:$dst, - (insertelt V4F32Regs:$src, Float32Regs:$val, imm:$c))], - FMOV32rr>; -} - -class BinOpAsmString { - string s = c; -} - -class V4AsmStr : BinOpAsmString< - !strconcat(!strconcat(!strconcat(!strconcat( - !strconcat(!strconcat(!strconcat( - opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), - opcode), " \t${dst}_1, ${a}_1, ${b}_1;\n\t"), - opcode), " \t${dst}_2, ${a}_2, ${b}_2;\n\t"), - opcode), " \t${dst}_3, ${a}_3, ${b}_3;")>; - -class V2AsmStr : BinOpAsmString< - !strconcat(!strconcat(!strconcat( - opcode, " \t${dst}_0, ${a}_0, ${b}_0;\n\t"), - opcode), " \t${dst}_1, ${a}_1, ${b}_1;")>; - -class V4MADStr : BinOpAsmString< - !strconcat(!strconcat(!strconcat(!strconcat( - !strconcat(!strconcat(!strconcat( - opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), - opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;\n\t"), - opcode), " \t${dst}_2, ${a}_2, ${b}_2, ${c}_2;\n\t"), - opcode), " \t${dst}_3, ${a}_3, ${b}_3, ${c}_3;")>; - -class V2MADStr : BinOpAsmString< - !strconcat(!strconcat(!strconcat( - opcode, " \t${dst}_0, ${a}_0, ${b}_0, ${c}_0;\n\t"), - opcode), " \t${dst}_1, ${a}_1, ${b}_1, ${c}_1;")>; - -class V4UnaryStr : BinOpAsmString< - !strconcat(!strconcat(!strconcat(!strconcat( - !strconcat(!strconcat(!strconcat( - opcode, " \t${dst}_0, ${a}_0;\n\t"), - opcode), " \t${dst}_1, ${a}_1;\n\t"), - opcode), " \t${dst}_2, ${a}_2;\n\t"), - opcode), " \t${dst}_3, ${a}_3;")>; - -class V2UnaryStr : BinOpAsmString< - !strconcat(!strconcat(!strconcat( - opcode, " \t${dst}_0, ${a}_0;\n\t"), - opcode), " \t${dst}_1, ${a}_1;")>; - -class VecBinaryOp : - NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a, regclass:$b), - asmstr.s, - [(set regclass:$dst, (OpNode regclass:$a, regclass:$b))], - sInst>; - -class VecShiftOp : - NVPTXVecInst<(outs regclass1:$dst), (ins regclass1:$a, regclass2:$b), - asmstr.s, - [(set regclass1:$dst, (OpNode regclass1:$a, regclass2:$b))], - sInst>; - -class VecUnaryOp : - NVPTXVecInst<(outs regclass:$dst), (ins regclass:$a), - asmstr.s, - [(set regclass:$dst, (OpNode regclass:$a))], sInst>; - -multiclass IntBinVOp { - def V2I64 : VecBinaryOp, OpNode, V2I64Regs, - i64op>; - def V4I32 : VecBinaryOp, OpNode, V4I32Regs, - i32op>; - def V2I32 : VecBinaryOp, OpNode, V2I32Regs, - i32op>; - def V4I16 : VecBinaryOp, OpNode, V4I16Regs, - i16op>; - def V2I16 : VecBinaryOp, OpNode, V2I16Regs, - i16op>; - def V4I8 : VecBinaryOp, OpNode, V4I8Regs, - i8op>; - def V2I8 : VecBinaryOp, OpNode, V2I8Regs, - i8op>; -} - -multiclass FloatBinVOp { - def V2F64 : VecBinaryOp, OpNode, - V2F64Regs, f64>; - def V4F32_ftz : VecBinaryOp, OpNode, - V4F32Regs, f32_ftz>, Requires<[doF32FTZ]>; - def V2F32_ftz : VecBinaryOp, OpNode, - V2F32Regs, f32_ftz>, Requires<[doF32FTZ]>; - def V4F32 : VecBinaryOp, OpNode, - V4F32Regs, f32>; - def V2F32 : VecBinaryOp, OpNode, - V2F32Regs, f32>; -} - -multiclass IntUnaryVOp { - def V2I64 : VecUnaryOp, OpNode, - V2I64Regs, i64op>; - def V4I32 : VecUnaryOp, OpNode, - V4I32Regs, i32op>; - def V2I32 : VecUnaryOp, OpNode, - V2I32Regs, i32op>; - def V4I16 : VecUnaryOp, OpNode, - V4I16Regs, i16op>; - def V2I16 : VecUnaryOp, OpNode, - V2I16Regs, i16op>; - def V4I8 : VecUnaryOp, OpNode, - V4I8Regs, i8op>; - def V2I8 : VecUnaryOp, OpNode, - V2I8Regs, i8op>; -} - - -// Integer Arithmetic -let VecInstType=isVecOther.Value in { -defm VAdd : IntBinVOp<"add.s", add, ADDi64rr, ADDi32rr, ADDi16rr, ADDi8rr>; -defm VSub : IntBinVOp<"sub.s", sub, SUBi64rr, SUBi32rr, SUBi16rr, SUBi8rr>; - -def AddCCV4I32 : VecBinaryOp, addc, V4I32Regs, - ADDCCi32rr>; -def AddCCV2I32 : VecBinaryOp, addc, V2I32Regs, - ADDCCi32rr>; -def SubCCV4I32 : VecBinaryOp, subc, V4I32Regs, - SUBCCi32rr>; -def SubCCV2I32 : VecBinaryOp, subc, V2I32Regs, - SUBCCi32rr>; -def AddCCCV4I32 : VecBinaryOp, adde, V4I32Regs, - ADDCCCi32rr>; -def AddCCCV2I32 : VecBinaryOp, adde, V2I32Regs, - ADDCCCi32rr>; -def SubCCCV4I32 : VecBinaryOp, sube, V4I32Regs, - SUBCCCi32rr>; -def SubCCCV2I32 : VecBinaryOp, sube, V2I32Regs, - SUBCCCi32rr>; - -def ShiftLV2I64 : VecShiftOp, shl, V2I64Regs, V2I32Regs, - SHLi64rr>; -def ShiftLV2I32 : VecShiftOp, shl, V2I32Regs, V2I32Regs, - SHLi32rr>; -def ShiftLV4I32 : VecShiftOp, shl, V4I32Regs, V4I32Regs, - SHLi32rr>; -def ShiftLV2I16 : VecShiftOp, shl, V2I16Regs, V2I32Regs, - SHLi16rr>; -def ShiftLV4I16 : VecShiftOp, shl, V4I16Regs, V4I32Regs, - SHLi16rr>; -def ShiftLV2I8 : VecShiftOp, shl, V2I8Regs, V2I32Regs, - SHLi8rr>; -def ShiftLV4I8 : VecShiftOp, shl, V4I8Regs, V4I32Regs, - SHLi8rr>; -} - -// cvt to v*i32, helpers for shift -class CVTtoVeci32 : - NVPTXVecInst<(outs outclass:$d), (ins inclass:$s), asmstr, [], sInst>; - -class VecCVTStrHelper { - string s=!strconcat(op, !strconcat("\t", - !strconcat(dest, !strconcat(", ", !strconcat(src, ";"))))); -} - -class Vec2CVTStr { - string s=!strconcat(VecCVTStrHelper.s, - !strconcat("\n\t", VecCVTStrHelper.s)); -} - -class Vec4CVTStr { - string s=!strconcat(VecCVTStrHelper.s, - !strconcat("\n\t", - !strconcat(VecCVTStrHelper.s, - !strconcat("\n\t", - !strconcat(VecCVTStrHelper.s, - !strconcat("\n\t", VecCVTStrHelper.s)))))); -} - -let VecInstType=isVecOther.Value in { -def CVTv2i8tov2i32 : CVTtoVeci32.s, Zint_extendext8to32>; -def CVTv2i16tov2i32 : CVTtoVeci32.s, Zint_extendext16to32>; -def CVTv4i8tov4i32 : CVTtoVeci32.s, Zint_extendext8to32>; -def CVTv4i16tov4i32 : CVTtoVeci32.s, Zint_extendext16to32>; -def CVTv2i64tov2i32 : CVTtoVeci32.s, TRUNC_64to32>; -} - -def : Pat<(shl V2I16Regs:$src1, V2I16Regs:$src2), - (ShiftLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; -def : Pat<(shl V2I8Regs:$src1, V2I8Regs:$src2), - (ShiftLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; -def : Pat<(shl V2I64Regs:$src1, V2I64Regs:$src2), - (ShiftLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; - -def : Pat<(shl V4I16Regs:$src1, V4I16Regs:$src2), - (ShiftLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; -def : Pat<(shl V4I8Regs:$src1, V4I8Regs:$src2), - (ShiftLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; - -let VecInstType=isVecOther.Value in { -def ShiftRAV2I64 : VecShiftOp, sra, V2I64Regs, V2I32Regs, - SRAi64rr>; -def ShiftRAV2I32 : VecShiftOp, sra, V2I32Regs, V2I32Regs, - SRAi32rr>; -def ShiftRAV4I32 : VecShiftOp, sra, V4I32Regs, V4I32Regs, - SRAi32rr>; -def ShiftRAV2I16 : VecShiftOp, sra, V2I16Regs, V2I32Regs, - SRAi16rr>; -def ShiftRAV4I16 : VecShiftOp, sra, V4I16Regs, V4I32Regs, - SRAi16rr>; -def ShiftRAV2I8 : VecShiftOp, sra, V2I8Regs, V2I32Regs, - SRAi8rr>; -def ShiftRAV4I8 : VecShiftOp, sra, V4I8Regs, V4I32Regs, - SRAi8rr>; - -def ShiftRLV2I64 : VecShiftOp, srl, V2I64Regs, V2I32Regs, - SRLi64rr>; -def ShiftRLV2I32 : VecShiftOp, srl, V2I32Regs, V2I32Regs, - SRLi32rr>; -def ShiftRLV4I32 : VecShiftOp, srl, V4I32Regs, V4I32Regs, - SRLi32rr>; -def ShiftRLV2I16 : VecShiftOp, srl, V2I16Regs, V2I32Regs, - SRLi16rr>; -def ShiftRLV4I16 : VecShiftOp, srl, V4I16Regs, V4I32Regs, - SRLi16rr>; -def ShiftRLV2I8 : VecShiftOp, srl, V2I8Regs, V2I32Regs, - SRLi8rr>; -def ShiftRLV4I8 : VecShiftOp, srl, V4I8Regs, V4I32Regs, - SRLi8rr>; - -defm VMult : IntBinVOp<"mul.lo.s", mul, MULTi64rr, MULTi32rr, MULTi16rr, - MULTi8rr>; -defm VMultHS : IntBinVOp<"mul.hi.s", mulhs, MULTHSi64rr, MULTHSi32rr, - MULTHSi16rr, - MULTHSi8rr>; -defm VMultHU : IntBinVOp<"mul.hi.u", mulhu, MULTHUi64rr, MULTHUi32rr, - MULTHUi16rr, - MULTHUi8rr>; -defm VSDiv : IntBinVOp<"div.s", sdiv, SDIVi64rr, SDIVi32rr, SDIVi16rr, - SDIVi8rr>; -defm VUDiv : IntBinVOp<"div.u", udiv, UDIVi64rr, UDIVi32rr, UDIVi16rr, - UDIVi8rr>; -defm VSRem : IntBinVOp<"rem.s", srem, SREMi64rr, SREMi32rr, SREMi16rr, - SREMi8rr>; -defm VURem : IntBinVOp<"rem.u", urem, UREMi64rr, UREMi32rr, UREMi16rr, - UREMi8rr>; -} - -def : Pat<(sra V2I16Regs:$src1, V2I16Regs:$src2), - (ShiftRAV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; -def : Pat<(sra V2I8Regs:$src1, V2I8Regs:$src2), - (ShiftRAV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; -def : Pat<(sra V2I64Regs:$src1, V2I64Regs:$src2), - (ShiftRAV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; - -def : Pat<(sra V4I16Regs:$src1, V4I16Regs:$src2), - (ShiftRAV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; -def : Pat<(sra V4I8Regs:$src1, V4I8Regs:$src2), - (ShiftRAV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; - -def : Pat<(srl V2I16Regs:$src1, V2I16Regs:$src2), - (ShiftRLV2I16 V2I16Regs:$src1, (CVTv2i16tov2i32 V2I16Regs:$src2))>; -def : Pat<(srl V2I8Regs:$src1, V2I8Regs:$src2), - (ShiftRLV2I8 V2I8Regs:$src1, (CVTv2i8tov2i32 V2I8Regs:$src2))>; -def : Pat<(srl V2I64Regs:$src1, V2I64Regs:$src2), - (ShiftRLV2I64 V2I64Regs:$src1, (CVTv2i64tov2i32 V2I64Regs:$src2))>; - -def : Pat<(srl V4I16Regs:$src1, V4I16Regs:$src2), - (ShiftRLV4I16 V4I16Regs:$src1, (CVTv4i16tov4i32 V4I16Regs:$src2))>; -def : Pat<(srl V4I8Regs:$src1, V4I8Regs:$src2), - (ShiftRLV4I8 V4I8Regs:$src1, (CVTv4i8tov4i32 V4I8Regs:$src2))>; - -multiclass VMAD { - def V4 : NVPTXVecInst<(outs regclassv4:$dst), - (ins regclassv4:$a, regclassv4:$b, regclassv4:$c), - V4MADStr.s, - [(set regclassv4:$dst, - (an (mn regclassv4:$a, regclassv4:$b), regclassv4:$c))], - sop>, - Requires<[Pred]>; - def V2 : NVPTXVecInst<(outs regclassv2:$dst), - (ins regclassv2:$a, regclassv2:$b, regclassv2:$c), - V2MADStr.s, - [(set regclassv2:$dst, - (an (mn regclassv2:$a, regclassv2:$b), regclassv2:$c))], - sop>, - Requires<[Pred]>; -} - -multiclass VMADV2Only { - def V2 : NVPTXVecInst<(outs regclass:$dst), - (ins regclass:$a, regclass:$b, regclass:$c), - V2MADStr.s, - [(set regclass:$dst, (add - (mul regclass:$a, regclass:$b), regclass:$c))], sop>, - Requires<[Pred]>; -} -multiclass VFMADV2Only { - def V2 : NVPTXVecInst<(outs regclass:$dst), - (ins regclass:$a, regclass:$b, regclass:$c), - V2MADStr.s, - [(set regclass:$dst, (fadd - (fmul regclass:$a, regclass:$b), regclass:$c))], sop>, - Requires<[Pred]>; -} - -let VecInstType=isVecOther.Value in { -defm I8MAD : VMAD<"mad.lo.s16", V4I8Regs, V2I8Regs, add, mul, MAD8rrr, true>; -defm I16MAD : VMAD<"mad.lo.s16", V4I16Regs, V2I16Regs, add, mul, MAD16rrr, - true>; -defm I32MAD : VMAD<"mad.lo.s32", V4I32Regs, V2I32Regs, add, mul, MAD32rrr, - true>; -defm I64MAD : VMADV2Only<"mad.lo.s64", V2I64Regs, MAD64rrr, true>; - -defm VNeg : IntUnaryVOp<"neg.s", ineg, INEG64, INEG32, INEG16, INEG8>; - -defm VAddf : FloatBinVOp<"add.", fadd, FADDf64rr, FADDf32rr, FADDf32rr_ftz>; -defm VSubf : FloatBinVOp<"sub.", fsub, FSUBf64rr, FSUBf32rr, FSUBf32rr_ftz>; -defm VMulf : FloatBinVOp<"mul.", fmul, FMULf64rr, FMULf32rr, FMULf32rr_ftz>; - -defm F32MAD_ftz : VMAD<"mad.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, - FMAD32_ftzrrr, doFMADF32_ftz>; -defm F32FMA_ftz : VMAD<"fma.rn.ftz.f32", V4F32Regs, V2F32Regs, fadd, fmul, - FMA32_ftzrrr, doFMAF32_ftz>; -defm F32MAD : VMAD<"mad.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMAD32rrr, - doFMADF32>; -defm F32FMA : VMAD<"fma.rn.f32", V4F32Regs, V2F32Regs, fadd, fmul, FMA32rrr, - doFMAF32>; -defm F64FMA : VFMADV2Only<"fma.rn.f64", V2F64Regs, FMA64rrr, doFMAF64>; -} - -let VecInstType=isVecOther.Value in { -def V4F32Div_prec_ftz : VecBinaryOp, fdiv, V4F32Regs, - FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; -def V2F32Div_prec_ftz : VecBinaryOp, fdiv, V2F32Regs, - FDIV32rr_prec_ftz>, Requires<[doF32FTZ, reqPTX20]>; -def V4F32Div_prec : VecBinaryOp, fdiv, V4F32Regs, - FDIV32rr_prec>, Requires<[reqPTX20]>; -def V2F32Div_prec : VecBinaryOp, fdiv, V2F32Regs, - FDIV32rr_prec>, Requires<[reqPTX20]>; -def V2F32Div_ftz : VecBinaryOp, fdiv, V2F32Regs, - FDIV32rr_ftz>, Requires<[doF32FTZ]>; -def V4F32Div_ftz : VecBinaryOp, fdiv, V4F32Regs, - FDIV32rr_ftz>, Requires<[doF32FTZ]>; -def V2F32Div : VecBinaryOp, fdiv, V2F32Regs, FDIV32rr>; -def V4F32Div : VecBinaryOp, fdiv, V4F32Regs, FDIV32rr>; -def V2F64Div : VecBinaryOp, fdiv, V2F64Regs, FDIV64rr>; -} - -def fnegpat : PatFrag<(ops node:$in), (fneg node:$in)>; - -let VecInstType=isVecOther.Value in { -def VNegv2f32_ftz : VecUnaryOp, fnegpat, V2F32Regs, - FNEGf32_ftz>, Requires<[doF32FTZ]>; -def VNegv4f32_ftz : VecUnaryOp, fnegpat, V4F32Regs, - FNEGf32_ftz>, Requires<[doF32FTZ]>; -def VNegv2f32 : VecUnaryOp, fnegpat, V2F32Regs, FNEGf32>; -def VNegv4f32 : VecUnaryOp, fnegpat, V4F32Regs, FNEGf32>; -def VNegv2f64 : VecUnaryOp, fnegpat, V2F64Regs, FNEGf64>; - -// Logical Arithmetic -defm VAnd : IntBinVOp<"and.b", and, ANDb64rr, ANDb32rr, ANDb16rr, ANDb8rr>; -defm VOr : IntBinVOp<"or.b", or, ORb64rr, ORb32rr, ORb16rr, ORb8rr>; -defm VXor : IntBinVOp<"xor.b", xor, XORb64rr, XORb32rr, XORb16rr, XORb8rr>; - -defm VNot : IntUnaryVOp<"not.b", not, NOT64, NOT32, NOT16, NOT8>; -} - - -multiclass V2FPCONTRACT32_SUB_PAT { - def : Pat<(fsub V2F32Regs:$a, (fmul V2F32Regs:$b, V2F32Regs:$c)), - (Inst (VNegv2f32 V2F32Regs:$b), V2F32Regs:$c, V2F32Regs:$a)>, - Requires<[Pred]>; - - def : Pat<(fsub (fmul V2F32Regs:$a, V2F32Regs:$b), V2F32Regs:$c), - (Inst V2F32Regs:$a, V2F32Regs:$b, (VNegv2f32 V2F32Regs:$c))>, - Requires<[Pred]>; -} - -defm V2FMAF32ext_ftz : V2FPCONTRACT32_SUB_PAT; -defm V2FMADF32ext_ftz : V2FPCONTRACT32_SUB_PAT; -defm V2FMAF32ext : V2FPCONTRACT32_SUB_PAT; -defm V2FMADF32ext : V2FPCONTRACT32_SUB_PAT; - -multiclass V4FPCONTRACT32_SUB_PAT { - def : Pat<(fsub V4F32Regs:$a, (fmul V4F32Regs:$b, V4F32Regs:$c)), - (Inst (VNegv4f32 V4F32Regs:$b), V4F32Regs:$c, V4F32Regs:$a)>, - Requires<[Pred]>; - - def : Pat<(fsub (fmul V4F32Regs:$a, V4F32Regs:$b), V4F32Regs:$c), - (Inst V4F32Regs:$a, V4F32Regs:$b, (VNegv4f32 V4F32Regs:$c))>, - Requires<[Pred]>; -} - -defm V4FMAF32ext_ftz : V4FPCONTRACT32_SUB_PAT; -defm V4FMADF32ext_ftz : V4FPCONTRACT32_SUB_PAT; -defm V4FMAF32ext : V4FPCONTRACT32_SUB_PAT; -defm V4FMADF32ext : V4FPCONTRACT32_SUB_PAT; - -multiclass V2FPCONTRACT64_SUB_PAT { - def : Pat<(fsub V2F64Regs:$a, (fmul V2F64Regs:$b, V2F64Regs:$c)), - (Inst (VNegv2f64 V2F64Regs:$b), V2F64Regs:$c, V2F64Regs:$a)>, - Requires<[Pred]>; - - def : Pat<(fsub (fmul V2F64Regs:$a, V2F64Regs:$b), V2F64Regs:$c), - (Inst V2F64Regs:$a, V2F64Regs:$b, (VNegv2f64 V2F64Regs:$c))>, - Requires<[Pred]>; -} - -defm V2FMAF64ext : V2FPCONTRACT64_SUB_PAT; - -class VecModStr -{ - string t1 = !strconcat("${c", elem); - string t2 = !strconcat(t1, ":vecv"); - string t3 = !strconcat(t2, vecsize); - string t4 = !strconcat(t3, extra); - string t5 = !strconcat(t4, l); - string s = !strconcat(t5, "}"); -} -class ShuffleOneLine -{ - string t1 = VecModStr.s; - string t2 = !strconcat(t1, "mov."); - string t3 = !strconcat(t2, type); - string t4 = !strconcat(t3, " \t${dst}_"); - string t5 = !strconcat(t4, elem); - string t6 = !strconcat(t5, ", $src1"); - string t7 = !strconcat(t6, VecModStr.s); - string t8 = !strconcat(t7, ";\n\t"); - string t9 = !strconcat(t8, VecModStr.s); - string t10 = !strconcat(t9, "mov."); - string t11 = !strconcat(t10, type); - string t12 = !strconcat(t11, " \t${dst}_"); - string t13 = !strconcat(t12, elem); - string t14 = !strconcat(t13, ", $src2"); - string t15 = !strconcat(t14, VecModStr.s); - string s = !strconcat(t15, ";"); -} -class ShuffleAsmStr2 -{ - string t1 = ShuffleOneLine<"2", "0", type>.s; - string t2 = !strconcat(t1, "\n\t"); - string s = !strconcat(t2, ShuffleOneLine<"2", "1", type>.s); -} -class ShuffleAsmStr4 -{ - string t1 = ShuffleOneLine<"4", "0", type>.s; - string t2 = !strconcat(t1, "\n\t"); - string t3 = !strconcat(t2, ShuffleOneLine<"4", "1", type>.s); - string t4 = !strconcat(t3, "\n\t"); - string t5 = !strconcat(t4, ShuffleOneLine<"4", "2", type>.s); - string t6 = !strconcat(t5, "\n\t"); - string s = !strconcat(t6, ShuffleOneLine<"4", "3", type>.s); -} - -let hasSideEffects=0, VecInstType=isVecShuffle.Value in { -def VecShuffle_v4f32 : NVPTXVecInst<(outs V4F32Regs:$dst), - (ins V4F32Regs:$src1, V4F32Regs:$src2, - i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), - !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", - ShuffleAsmStr4<"f32">.s), - [], FMOV32rr>; - -def VecShuffle_v4i32 : NVPTXVecInst<(outs V4I32Regs:$dst), - (ins V4I32Regs:$src1, V4I32Regs:$src2, - i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), - !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", - ShuffleAsmStr4<"u32">.s), - [], IMOV32rr>; - -def VecShuffle_v4i16 : NVPTXVecInst<(outs V4I16Regs:$dst), - (ins V4I16Regs:$src1, V4I16Regs:$src2, - i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), - !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", - ShuffleAsmStr4<"u16">.s), - [], IMOV16rr>; - -def VecShuffle_v4i8 : NVPTXVecInst<(outs V4I8Regs:$dst), - (ins V4I8Regs:$src1, V4I8Regs:$src2, - i8imm:$c0, i8imm:$c1, i8imm:$c2, i8imm:$c3), - !strconcat("//Mov $dst, $src1, $src2, $c0, $c1, $c2, $c3;\n\t", - ShuffleAsmStr4<"u16">.s), - [], IMOV8rr>; - -def VecShuffle_v2f32 : NVPTXVecInst<(outs V2F32Regs:$dst), - (ins V2F32Regs:$src1, V2F32Regs:$src2, - i8imm:$c0, i8imm:$c1), - !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", - ShuffleAsmStr2<"f32">.s), - [], FMOV32rr>; - -def VecShuffle_v2i32 : NVPTXVecInst<(outs V2I32Regs:$dst), - (ins V2I32Regs:$src1, V2I32Regs:$src2, - i8imm:$c0, i8imm:$c1), - !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", - ShuffleAsmStr2<"u32">.s), - [], IMOV32rr>; - -def VecShuffle_v2i8 : NVPTXVecInst<(outs V2I8Regs:$dst), - (ins V2I8Regs:$src1, V2I8Regs:$src2, - i8imm:$c0, i8imm:$c1), - !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", - ShuffleAsmStr2<"u16">.s), - [], IMOV8rr>; - -def VecShuffle_v2i16 : NVPTXVecInst<(outs V2I16Regs:$dst), - (ins V2I16Regs:$src1, V2I16Regs:$src2, - i8imm:$c0, i8imm:$c1), - !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", - ShuffleAsmStr2<"u16">.s), - [], IMOV16rr>; - -def VecShuffle_v2f64 : NVPTXVecInst<(outs V2F64Regs:$dst), - (ins V2F64Regs:$src1, V2F64Regs:$src2, - i8imm:$c0, i8imm:$c1), - !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", - ShuffleAsmStr2<"f64">.s), - [], FMOV64rr>; - -def VecShuffle_v2i64 : NVPTXVecInst<(outs V2I64Regs:$dst), - (ins V2I64Regs:$src1, V2I64Regs:$src2, - i8imm:$c0, i8imm:$c1), - !strconcat("//Mov $dst, $src1, $src2, $c0, $c1;\n\t", - ShuffleAsmStr2<"u64">.s), - [], IMOV64rr>; -} - -def ShuffleMask0 : SDNodeXForm(N); - return CurDAG->getTargetConstant(SVOp->getMaskElt(0), SDLoc(N), MVT::i32); -}]>; -def ShuffleMask1 : SDNodeXForm(N); - return CurDAG->getTargetConstant(SVOp->getMaskElt(1), SDLoc(N), MVT::i32); -}]>; -def ShuffleMask2 : SDNodeXForm(N); - return CurDAG->getTargetConstant(SVOp->getMaskElt(2), SDLoc(N), MVT::i32); -}]>; -def ShuffleMask3 : SDNodeXForm(N); - return CurDAG->getTargetConstant(SVOp->getMaskElt(3), SDLoc(N), MVT::i32); -}]>; - -// The spurious call is here to silence a compiler warning about N being -// unused. -def vec_shuf : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), - [{ N->getGluedNode(); return true; }]>; - -def : Pat<(v2f64 (vec_shuf:$op V2F64Regs:$src1, V2F64Regs:$src2)), - (VecShuffle_v2f64 V2F64Regs:$src1, V2F64Regs:$src2, - (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; - -def : Pat<(v4f32 (vec_shuf:$op V4F32Regs:$src1, V4F32Regs:$src2)), - (VecShuffle_v4f32 V4F32Regs:$src1, V4F32Regs:$src2, - (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), - (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; - -def : Pat<(v2f32 (vec_shuf:$op V2F32Regs:$src1, V2F32Regs:$src2)), - (VecShuffle_v2f32 V2F32Regs:$src1, V2F32Regs:$src2, - (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; - -def : Pat<(v2i64 (vec_shuf:$op V2I64Regs:$src1, V2I64Regs:$src2)), - (VecShuffle_v2i64 V2I64Regs:$src1, V2I64Regs:$src2, - (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; - -def : Pat<(v4i32 (vec_shuf:$op V4I32Regs:$src1, V4I32Regs:$src2)), - (VecShuffle_v4i32 V4I32Regs:$src1, V4I32Regs:$src2, - (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), - (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; - -def : Pat<(v2i32 (vec_shuf:$op V2I32Regs:$src1, V2I32Regs:$src2)), - (VecShuffle_v2i32 V2I32Regs:$src1, V2I32Regs:$src2, - (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; - -def : Pat<(v4i16 (vec_shuf:$op V4I16Regs:$src1, V4I16Regs:$src2)), - (VecShuffle_v4i16 V4I16Regs:$src1, V4I16Regs:$src2, - (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), - (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; - -def : Pat<(v2i16 (vec_shuf:$op V2I16Regs:$src1, V2I16Regs:$src2)), - (VecShuffle_v2i16 V2I16Regs:$src1, V2I16Regs:$src2, - (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; - -def : Pat<(v4i8 (vec_shuf:$op V4I8Regs:$src1, V4I8Regs:$src2)), - (VecShuffle_v4i8 V4I8Regs:$src1, V4I8Regs:$src2, - (ShuffleMask0 node:$op), (ShuffleMask1 node:$op), - (ShuffleMask2 node:$op), (ShuffleMask3 node:$op))>; - -def : Pat<(v2i8 (vec_shuf:$op V2I8Regs:$src1, V2I8Regs:$src2)), - (VecShuffle_v2i8 V2I8Regs:$src1, V2I8Regs:$src2, - (ShuffleMask0 node:$op), (ShuffleMask1 node:$op))>; - -class Build_Vector2 - : NVPTXVecInst<(outs vclass:$dst), - (ins sclass:$a1, sclass:$a2), - !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2}};"), - [(set vclass:$dst, (build_vector sclass:$a1, sclass:$a2))], - si>; -class Build_Vector4 - : NVPTXVecInst<(outs vclass:$dst), - (ins sclass:$a1, sclass:$a2, sclass:$a3, sclass:$a4), - !strconcat(asmstr, "\t${dst:vecfull}, {{$a1, $a2, $a3, $a4}};"), - [(set vclass:$dst, - (build_vector sclass:$a1, sclass:$a2, - sclass:$a3, sclass:$a4))], si>; - -let isAsCheapAsAMove=1, VecInstType=isVecBuild.Value in { -def Build_Vector2_f32 : Build_Vector2<"mov.v2.f32", V2F32Regs, Float32Regs, - FMOV32rr>; -def Build_Vector2_f64 : Build_Vector2<"mov.v2.f64", V2F64Regs, Float64Regs, - FMOV64rr>; - -def Build_Vector2_i32 : Build_Vector2<"mov.v2.u32", V2I32Regs, Int32Regs, - IMOV32rr>; -def Build_Vector2_i64 : Build_Vector2<"mov.v2.u64", V2I64Regs, Int64Regs, - IMOV64rr>; -def Build_Vector2_i16 : Build_Vector2<"mov.v2.u16", V2I16Regs, Int16Regs, - IMOV16rr>; -def Build_Vector2_i8 : Build_Vector2<"mov.v2.u16", V2I8Regs, Int8Regs, - IMOV8rr>; - -def Build_Vector4_f32 : Build_Vector4<"mov.v4.f32", V4F32Regs, Float32Regs, - FMOV32rr>; - -def Build_Vector4_i32 : Build_Vector4<"mov.v4.u32", V4I32Regs, Int32Regs, - IMOV32rr>; -def Build_Vector4_i16 : Build_Vector4<"mov.v4.u16", V4I16Regs, Int16Regs, - IMOV16rr>; -def Build_Vector4_i8 : Build_Vector4<"mov.v4.u16", V4I8Regs, Int8Regs, - IMOV8rr>; -} - -class Vec_Move - : NVPTXVecInst<(outs vclass:$dst), (ins vclass:$src), - !strconcat(asmstr, "\t${dst:vecfull}, ${src:vecfull};"), - [], sop>; - -let isAsCheapAsAMove=1, hasSideEffects=0, IsSimpleMove=1, - VecInstType=isVecOther.Value in { -def V4f32Mov : Vec_Move<"mov.v4.f32", V4F32Regs, FMOV32rr>; -def V2f32Mov : Vec_Move<"mov.v2.f32", V2F32Regs, FMOV32rr>; - -def V4i32Mov : Vec_Move<"mov.v4.u32", V4I32Regs, IMOV32rr>; -def V2i32Mov : Vec_Move<"mov.v2.u32", V2I32Regs, IMOV32rr>; - -def V4i16Mov : Vec_Move<"mov.v4.u16", V4I16Regs, IMOV16rr>; -def V2i16Mov : Vec_Move<"mov.v2.u16", V2I16Regs, IMOV16rr>; - -def V4i8Mov : Vec_Move<"mov.v4.u16", V4I8Regs, IMOV8rr>; -def V2i8Mov : Vec_Move<"mov.v2.u16", V2I8Regs, IMOV8rr>; - -def V2f64Mov : Vec_Move<"mov.v2.f64", V2F64Regs, FMOV64rr>; -def V2i64Mov : Vec_Move<"mov.v2.u64", V2I64Regs, IMOV64rr>; -} - -// extract subvector patterns -def extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR", - SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>>; - -def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 0)), - (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 0), - (V4f32Extract V4F32Regs:$src, 1))>; -def : Pat<(v2f32 (extract_subvec V4F32Regs:$src, 2)), - (Build_Vector2_f32 (V4f32Extract V4F32Regs:$src, 2), - (V4f32Extract V4F32Regs:$src, 3))>; -def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 0)), - (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 0), - (V4i32Extract V4I32Regs:$src, 1))>; -def : Pat<(v2i32 (extract_subvec V4I32Regs:$src, 2)), - (Build_Vector2_i32 (V4i32Extract V4I32Regs:$src, 2), - (V4i32Extract V4I32Regs:$src, 3))>; -def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 0)), - (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 0), - (V4i16Extract V4I16Regs:$src, 1))>; -def : Pat<(v2i16 (extract_subvec V4I16Regs:$src, 2)), - (Build_Vector2_i16 (V4i16Extract V4I16Regs:$src, 2), - (V4i16Extract V4I16Regs:$src, 3))>; -def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 0)), - (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 0), - (V4i8Extract V4I8Regs:$src, 1))>; -def : Pat<(v2i8 (extract_subvec V4I8Regs:$src, 2)), - (Build_Vector2_i8 (V4i8Extract V4I8Regs:$src, 2), - (V4i8Extract V4I8Regs:$src, 3))>; - -// Select instructions -class Select_OneLine { - string t1 = !strconcat("selp.", type); - string t2 = !strconcat(t1, " \t${dst}_"); - string t3 = !strconcat(t2, pos); - string t4 = !strconcat(t3, ", ${src1}_"); - string t5 = !strconcat(t4, pos); - string t6 = !strconcat(t5, ", ${src2}_"); - string t7 = !strconcat(t6, pos); - string s = !strconcat(t7, ", $p;"); -} - -class Select_Str2 { - string t1 = Select_OneLine.s; - string t2 = !strconcat(t1, "\n\t"); - string s = !strconcat(t2, Select_OneLine.s); -} - -class Select_Str4 { - string t1 = Select_OneLine.s; - string t2 = !strconcat(t1, "\n\t"); - string t3 = !strconcat(t2, Select_OneLine.s); - string t4 = !strconcat(t3, "\n\t"); - string t5 = !strconcat(t4, Select_OneLine.s); - string t6 = !strconcat(t5, "\n\t"); - string s = !strconcat(t6, Select_OneLine.s); - -} - -class Vec_Select - : NVPTXVecInst<(outs vclass:$dst), - (ins vclass:$src1, vclass:$src2, Int1Regs:$p), - asmstr, - [(set vclass:$dst, (select Int1Regs:$p, vclass:$src1, - vclass:$src2))], - sop>; - -let VecInstType=isVecOther.Value in { -def V2I64_Select : Vec_Select.s, SELECTi64rr>; -def V4I32_Select : Vec_Select.s, SELECTi32rr>; -def V2I32_Select : Vec_Select.s, SELECTi32rr>; -def V4I16_Select : Vec_Select.s, SELECTi16rr>; -def V2I16_Select : Vec_Select.s, SELECTi16rr>; -def V4I8_Select : Vec_Select.s, SELECTi8rr>; -def V2I8_Select : Vec_Select.s, SELECTi8rr>; - -def V2F64_Select : Vec_Select.s, SELECTf64rr>; -def V4F32_Select : Vec_Select.s, SELECTf32rr>; -def V2F32_Select : Vec_Select.s, SELECTf32rr>; -} - -// Comparison instructions - -// setcc convenience fragments. -def vsetoeq : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETOEQ)>; -def vsetogt : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETOGT)>; -def vsetoge : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETOGE)>; -def vsetolt : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETOLT)>; -def vsetole : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETOLE)>; -def vsetone : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETONE)>; -def vseto : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETO)>; -def vsetuo : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETUO)>; -def vsetueq : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETUEQ)>; -def vsetugt : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETUGT)>; -def vsetuge : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETUGE)>; -def vsetult : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETULT)>; -def vsetule : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETULE)>; -def vsetune : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETUNE)>; -def vseteq : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETEQ)>; -def vsetgt : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETGT)>; -def vsetge : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETGE)>; -def vsetlt : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETLT)>; -def vsetle : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETLE)>; -def vsetne : PatFrag<(ops node:$lhs, node:$rhs), - (setcc node:$lhs, node:$rhs, SETNE)>; - -class Vec_Compare - : NVPTXVecInst<(outs outrclass:$dst), - (ins inrclass:$a, inrclass:$b), - "Unsupported", - [(set outrclass:$dst, (op inrclass:$a, inrclass:$b))], - sop>; - -multiclass Vec_Compare_All -{ - def V2I8 : Vec_Compare; - def V4I8 : Vec_Compare; - def V2I16 : Vec_Compare; - def V4I16 : Vec_Compare; - def V2I32 : Vec_Compare; - def V4I32 : Vec_Compare; - def V2I64 : Vec_Compare; -} - -let VecInstType=isVecOther.Value in { - defm VecSGT : Vec_Compare_All; - defm VecUGT : Vec_Compare_All; - defm VecSLT : Vec_Compare_All; - defm VecULT : Vec_Compare_All; - defm VecSGE : Vec_Compare_All; - defm VecUGE : Vec_Compare_All; - defm VecSLE : Vec_Compare_All; - defm VecULE : Vec_Compare_All; - defm VecSEQ : Vec_Compare_All; - defm VecUEQ : Vec_Compare_All; - defm VecSNE : Vec_Compare_All; - defm VecUNE : Vec_Compare_All; -} - -multiclass FVec_Compare_All -{ - def V2F32 : Vec_Compare; - def V4F32 : Vec_Compare; - def V2F64 : Vec_Compare; -} - -let VecInstType=isVecOther.Value in { - defm FVecGT : FVec_Compare_All; - defm FVecLT : FVec_Compare_All; - defm FVecGE : FVec_Compare_All; - defm FVecLE : FVec_Compare_All; - defm FVecEQ : FVec_Compare_All; - defm FVecNE : FVec_Compare_All; - - defm FVecUGT : FVec_Compare_All; - defm FVecULT : FVec_Compare_All; - defm FVecUGE : FVec_Compare_All; - defm FVecULE : FVec_Compare_All; - defm FVecUEQ : FVec_Compare_All; - defm FVecUNE : FVec_Compare_All; - - defm FVecNUM : FVec_Compare_All; - defm FVecNAN : FVec_Compare_All; -} - -class LoadParamScalar4Inst : - NVPTXInst<(outs regclass:$d1, regclass:$d2, regclass:$d3, regclass:$d4), - (ins i32imm:$a, i32imm:$b), - !strconcat(!strconcat("ld.param", opstr), - "\t{{$d1, $d2, $d3, $d4}}, [retval0+$b];"), []>; - -class LoadParamScalar2Inst : - NVPTXInst<(outs regclass:$d1, regclass:$d2), - (ins i32imm:$a, i32imm:$b), - !strconcat(!strconcat("ld.param", opstr), - "\t{{$d1, $d2}}, [retval0+$b];"), []>; - - -class StoreParamScalar4Inst : - NVPTXInst<(outs), - (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, - i32imm:$a, i32imm:$b), - !strconcat(!strconcat("st.param", opstr), - "\t[param$a+$b], {{$s1, $s2, $s3, $s4}};"), []>; - -class StoreParamScalar2Inst : - NVPTXInst<(outs), - (ins regclass:$s1, regclass:$s2, i32imm:$a, i32imm:$b), - !strconcat(!strconcat("st.param", opstr), - "\t[param$a+$b], {{$s1, $s2}};"), []>; - -class StoreRetvalScalar4Inst : - NVPTXInst<(outs), - (ins regclass:$s1, regclass:$s2, regclass:$s3, regclass:$s4, - i32imm:$a), - !strconcat(!strconcat("st.param", opstr), - "\t[func_retval+$a], {{$s1, $s2, $s3, $s4}};"), []>; - -class StoreRetvalScalar2Inst : - NVPTXInst<(outs), - (ins regclass:$s1, regclass:$s2, i32imm:$a), - !strconcat(!strconcat("st.param", opstr), - "\t[func_retval+$a], {{$s1, $s2}};"), []>; - -def LoadParamScalar4I32 : LoadParamScalar4Inst; -def LoadParamScalar4I16 : LoadParamScalar4Inst; -def LoadParamScalar4I8 : LoadParamScalar4Inst; - -def LoadParamScalar2I64 : LoadParamScalar2Inst; -def LoadParamScalar2I32 : LoadParamScalar2Inst; -def LoadParamScalar2I16 : LoadParamScalar2Inst; -def LoadParamScalar2I8 : LoadParamScalar2Inst; - -def LoadParamScalar4F32 : LoadParamScalar4Inst; -def LoadParamScalar2F32 : LoadParamScalar2Inst; -def LoadParamScalar2F64 : LoadParamScalar2Inst; - -def StoreParamScalar4I32 : StoreParamScalar4Inst; -def StoreParamScalar4I16 : StoreParamScalar4Inst; -def StoreParamScalar4I8 : StoreParamScalar4Inst; - -def StoreParamScalar2I64 : StoreParamScalar2Inst; -def StoreParamScalar2I32 : StoreParamScalar2Inst; -def StoreParamScalar2I16 : StoreParamScalar2Inst; -def StoreParamScalar2I8 : StoreParamScalar2Inst; - -def StoreParamScalar4F32 : StoreParamScalar4Inst; -def StoreParamScalar2F32 : StoreParamScalar2Inst; -def StoreParamScalar2F64 : StoreParamScalar2Inst; - -def StoreRetvalScalar4I32 : StoreRetvalScalar4Inst; -def StoreRetvalScalar4I16 : StoreRetvalScalar4Inst; -def StoreRetvalScalar4I8 : StoreRetvalScalar4Inst; - -def StoreRetvalScalar2I64 : StoreRetvalScalar2Inst; -def StoreRetvalScalar2I32 : StoreRetvalScalar2Inst; -def StoreRetvalScalar2I16 : StoreRetvalScalar2Inst; -def StoreRetvalScalar2I8 : StoreRetvalScalar2Inst; - -def StoreRetvalScalar4F32 : StoreRetvalScalar4Inst; -def StoreRetvalScalar2F32 : StoreRetvalScalar2Inst; -def StoreRetvalScalar2F64 : StoreRetvalScalar2Inst; - -class LoadParamVecInst: - NVPTXVecInst<(outs regclass:$dst), (ins i32imm:$a, i32imm:$b), - "loadparam : $dst <- [$a, $b]", - [(set regclass:$dst, (LoadParam (i32 imm:$a), (i32 imm:$b)))], - sop>; - -class StoreParamVecInst - : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b), - "storeparam : [$a, $b] <- $val", - [(StoreParam (i32 imm:$a), (i32 imm:$b), regclass:$val)], sop>; - -class StoreRetvalVecInst - : NVPTXVecInst<(outs), (ins regclass:$val, i32imm:$a), - "storeretval : retval[$a] <- $val", - [(StoreRetval (i32 imm:$a), regclass:$val)], sop>; - -let VecInstType=isVecLD.Value in { -def LoadParamV4I32 : LoadParamVecInst; -def LoadParamV4I16 : LoadParamVecInst; -def LoadParamV4I8 : LoadParamVecInst; - -def LoadParamV2I64 : LoadParamVecInst; -def LoadParamV2I32 : LoadParamVecInst; -def LoadParamV2I16 : LoadParamVecInst; -def LoadParamV2I8 : LoadParamVecInst; - -def LoadParamV4F32 : LoadParamVecInst; -def LoadParamV2F32 : LoadParamVecInst; -def LoadParamV2F64 : LoadParamVecInst; -} - -let VecInstType=isVecST.Value in { -def StoreParamV4I32 : StoreParamVecInst; -def StoreParamV4I16 : StoreParamVecInst; -def StoreParamV4I8 : StoreParamVecInst; - -def StoreParamV2I64 : StoreParamVecInst; -def StoreParamV2I32 : StoreParamVecInst; -def StoreParamV2I16 : StoreParamVecInst; -def StoreParamV2I8 : StoreParamVecInst; - -def StoreParamV4F32 : StoreParamVecInst; -def StoreParamV2F32 : StoreParamVecInst; -def StoreParamV2F64 : StoreParamVecInst; - -def StoreRetvalV4I32 : StoreRetvalVecInst; -def StoreRetvalV4I16 : StoreRetvalVecInst; -def StoreRetvalV4I8 : StoreRetvalVecInst; - -def StoreRetvalV2I64 : StoreRetvalVecInst; -def StoreRetvalV2I32 : StoreRetvalVecInst; -def StoreRetvalV2I16 : StoreRetvalVecInst; -def StoreRetvalV2I8 : StoreRetvalVecInst; - -def StoreRetvalV4F32 : StoreRetvalVecInst; -def StoreRetvalV2F32 : StoreRetvalVecInst; -def StoreRetvalV2F64 : StoreRetvalVecInst; - -} - - -// Int vector to int scalar bit convert -// v4i8 -> i32 -def : Pat<(i32 (bitconvert V4I8Regs:$s)), - (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), - (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3))>; -// v4i16 -> i64 -def : Pat<(i64 (bitconvert V4I16Regs:$s)), - (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), - (V4i16Extract V4I16Regs:$s,1), - (V4i16Extract V4I16Regs:$s,2), - (V4i16Extract V4I16Regs:$s,3))>; -// v2i8 -> i16 -def : Pat<(i16 (bitconvert V2I8Regs:$s)), - (V2I8toI16 (V2i8Extract V2I8Regs:$s,0), (V2i8Extract V2I8Regs:$s,1))>; -// v2i16 -> i32 -def : Pat<(i32 (bitconvert V2I16Regs:$s)), - (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), - (V2i16Extract V2I16Regs:$s,1))>; -// v2i32 -> i64 -def : Pat<(i64 (bitconvert V2I32Regs:$s)), - (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), - (V2i32Extract V2I32Regs:$s,1))>; - -// Int scalar to int vector bit convert -let VecInstType=isVecDest.Value in { -// i32 -> v4i8 -def VecI32toV4I8 : NVPTXVecInst<(outs V4I8Regs:$d), (ins Int32Regs:$s), - "Error!", - [(set V4I8Regs:$d, (bitconvert Int32Regs:$s))], - I32toV4I8>; -// i64 -> v4i16 -def VecI64toV4I16 : NVPTXVecInst<(outs V4I16Regs:$d), (ins Int64Regs:$s), - "Error!", - [(set V4I16Regs:$d, (bitconvert Int64Regs:$s))], - I64toV4I16>; -// i16 -> v2i8 -def VecI16toV2I8 : NVPTXVecInst<(outs V2I8Regs:$d), (ins Int16Regs:$s), - "Error!", - [(set V2I8Regs:$d, (bitconvert Int16Regs:$s))], - I16toV2I8>; -// i32 -> v2i16 -def VecI32toV2I16 : NVPTXVecInst<(outs V2I16Regs:$d), (ins Int32Regs:$s), - "Error!", - [(set V2I16Regs:$d, (bitconvert Int32Regs:$s))], - I32toV2I16>; -// i64 -> v2i32 -def VecI64toV2I32 : NVPTXVecInst<(outs V2I32Regs:$d), (ins Int64Regs:$s), - "Error!", - [(set V2I32Regs:$d, (bitconvert Int64Regs:$s))], - I64toV2I32>; -} - -// Int vector to int vector bit convert -// v4i8 -> v2i16 -def : Pat<(v2i16 (bitconvert V4I8Regs:$s)), - (VecI32toV2I16 - (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), - (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; -// v4i16 -> v2i32 -def : Pat<(v2i32 (bitconvert V4I16Regs:$s)), - (VecI64toV2I32 - (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), - (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; -// v2i16 -> v4i8 -def : Pat<(v4i8 (bitconvert V2I16Regs:$s)), - (VecI32toV4I8 - (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; -// v2i32 -> v4i16 -def : Pat<(v4i16 (bitconvert V2I32Regs:$s)), - (VecI64toV4I16 - (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; -// v2i64 -> v4i32 -def : Pat<(v4i32 (bitconvert V2I64Regs:$s)), - (Build_Vector4_i32 - (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 0), - (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 0)), 1), - (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 0), - (V2i32Extract (VecI64toV2I32 (V2i64Extract V2I64Regs:$s, 1)), 1))>; -// v4i32 -> v2i64 -def : Pat<(v2i64 (bitconvert V4I32Regs:$s)), - (Build_Vector2_i64 - (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), (V4i32Extract V4I32Regs:$s,1)), - (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), (V4i32Extract V4I32Regs:$s,3)))>; - -// Fp scalar to fp vector convert -// f64 -> v2f32 -let VecInstType=isVecDest.Value in { -def VecF64toV2F32 : NVPTXVecInst<(outs V2F32Regs:$d), (ins Float64Regs:$s), - "Error!", - [(set V2F32Regs:$d, (bitconvert Float64Regs:$s))], - F64toV2F32>; -} - -// Fp vector to fp scalar convert -// v2f32 -> f64 -def : Pat<(f64 (bitconvert V2F32Regs:$s)), - (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1))>; - -// Fp scalar to int vector convert -// f32 -> v4i8 -def : Pat<(v4i8 (bitconvert Float32Regs:$s)), - (VecI32toV4I8 (BITCONVERT_32_F2I Float32Regs:$s))>; -// f32 -> v2i16 -def : Pat<(v2i16 (bitconvert Float32Regs:$s)), - (VecI32toV2I16 (BITCONVERT_32_F2I Float32Regs:$s))>; -// f64 -> v4i16 -def : Pat<(v4i16 (bitconvert Float64Regs:$s)), - (VecI64toV4I16 (BITCONVERT_64_F2I Float64Regs:$s))>; -// f64 -> v2i32 -def : Pat<(v2i32 (bitconvert Float64Regs:$s)), - (VecI64toV2I32 (BITCONVERT_64_F2I Float64Regs:$s))>; - -// Int vector to fp scalar convert -// v4i8 -> f32 -def : Pat<(f32 (bitconvert V4I8Regs:$s)), - (BITCONVERT_32_I2F - (V4I8toI32 (V4i8Extract V4I8Regs:$s,0), (V4i8Extract V4I8Regs:$s,1), - (V4i8Extract V4I8Regs:$s,2), (V4i8Extract V4I8Regs:$s,3)))>; -// v4i16 -> f64 -def : Pat<(f64 (bitconvert V4I16Regs:$s)), - (BITCONVERT_64_I2F - (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), (V4i16Extract V4I16Regs:$s,1), - (V4i16Extract V4I16Regs:$s,2), (V4i16Extract V4I16Regs:$s,3)))>; -// v2i16 -> f32 -def : Pat<(f32 (bitconvert V2I16Regs:$s)), - (BITCONVERT_32_I2F - (V2I16toI32 (V2i16Extract V2I16Regs:$s,0), (V2i16Extract V2I16Regs:$s,1)))>; -// v2i32 -> f64 -def : Pat<(f64 (bitconvert V2I32Regs:$s)), - (BITCONVERT_64_I2F - (V2I32toI64 (V2i32Extract V2I32Regs:$s,0), (V2i32Extract V2I32Regs:$s,1)))>; - -// Int scalar to fp vector convert -// i64 -> v2f32 -def : Pat<(v2f32 (bitconvert Int64Regs:$s)), - (VecF64toV2F32 (BITCONVERT_64_I2F Int64Regs:$s))>; - -// Fp vector to int scalar convert -// v2f32 -> i64 -def : Pat<(i64 (bitconvert V2F32Regs:$s)), - (BITCONVERT_64_F2I - (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), (V2f32Extract V2F32Regs:$s,1)))>; - -// Int vector to fp vector convert -// v2i64 -> v4f32 -def : Pat<(v4f32 (bitconvert V2I64Regs:$s)), - (Build_Vector4_f32 - (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 - (V2i64Extract V2I64Regs:$s, 0)), 0)), - (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 - (V2i64Extract V2I64Regs:$s, 0)), 1)), - (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 - (V2i64Extract V2I64Regs:$s, 1)), 0)), - (BITCONVERT_32_I2F (V2i32Extract (VecI64toV2I32 - (V2i64Extract V2I64Regs:$s, 1)), 1)))>; -// v2i64 -> v2f64 -def : Pat<(v2f64 (bitconvert V2I64Regs:$s)), - (Build_Vector2_f64 - (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,0)), - (BITCONVERT_64_I2F (V2i64Extract V2I64Regs:$s,1)))>; -// v2i32 -> v2f32 -def : Pat<(v2f32 (bitconvert V2I32Regs:$s)), - (Build_Vector2_f32 - (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,0)), - (BITCONVERT_32_I2F (V2i32Extract V2I32Regs:$s,1)))>; -// v4i32 -> v2f64 -def : Pat<(v2f64 (bitconvert V4I32Regs:$s)), - (Build_Vector2_f64 - (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,0), - (V4i32Extract V4I32Regs:$s,1))), - (BITCONVERT_64_I2F (V2I32toI64 (V4i32Extract V4I32Regs:$s,2), - (V4i32Extract V4I32Regs:$s,3))))>; -// v4i32 -> v4f32 -def : Pat<(v4f32 (bitconvert V4I32Regs:$s)), - (Build_Vector4_f32 - (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,0)), - (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,1)), - (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,2)), - (BITCONVERT_32_I2F (V4i32Extract V4I32Regs:$s,3)))>; -// v4i16 -> v2f32 -def : Pat<(v2f32 (bitconvert V4I16Regs:$s)), - (VecF64toV2F32 (BITCONVERT_64_I2F - (V4I16toI64 (V4i16Extract V4I16Regs:$s,0), - (V4i16Extract V4I16Regs:$s,1), - (V4i16Extract V4I16Regs:$s,2), - (V4i16Extract V4I16Regs:$s,3))))>; - -// Fp vector to int vector convert -// v2i64 <- v4f32 -def : Pat<(v2i64 (bitconvert V4F32Regs:$s)), - (Build_Vector2_i64 - (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,0), - (V4f32Extract V4F32Regs:$s,1))), - (BITCONVERT_64_F2I (V2F32toF64 (V4f32Extract V4F32Regs:$s,2), - (V4f32Extract V4F32Regs:$s,3))))>; -// v2i64 <- v2f64 -def : Pat<(v2i64 (bitconvert V2F64Regs:$s)), - (Build_Vector2_i64 - (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,0)), - (BITCONVERT_64_F2I (V2f64Extract V2F64Regs:$s,1)))>; -// v2i32 <- v2f32 -def : Pat<(v2i32 (bitconvert V2F32Regs:$s)), - (Build_Vector2_i32 - (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,0)), - (BITCONVERT_32_F2I (V2f32Extract V2F32Regs:$s,1)))>; -// v4i32 <- v2f64 -def : Pat<(v4i32 (bitconvert V2F64Regs:$s)), - (Build_Vector4_i32 - (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 - (V2f64Extract V2F64Regs:$s, 0)), 0)), - (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 - (V2f64Extract V2F64Regs:$s, 0)), 1)), - (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 - (V2f64Extract V2F64Regs:$s, 1)), 0)), - (BITCONVERT_32_F2I (V2f32Extract (VecF64toV2F32 - (V2f64Extract V2F64Regs:$s, 1)), 1)))>; -// v4i32 <- v4f32 -def : Pat<(v4i32 (bitconvert V4F32Regs:$s)), - (Build_Vector4_i32 - (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,0)), - (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,1)), - (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,2)), - (BITCONVERT_32_F2I (V4f32Extract V4F32Regs:$s,3)))>; -// v4i16 <- v2f32 -def : Pat<(v4i16 (bitconvert V2F32Regs:$s)), - (VecI64toV4I16 (BITCONVERT_64_F2I - (V2F32toF64 (V2f32Extract V2F32Regs:$s,0), - (V2f32Extract V2F32Regs:$s,1))))>; diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 98be18b10998f..2fbf51007c3df 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -251,7 +251,6 @@ namespace { struct PPCOperand; class PPCAsmParser : public MCTargetAsmParser { - const MCInstrInfo &MII; bool IsPPC64; bool IsDarwin; @@ -298,7 +297,7 @@ class PPCAsmParser : public MCTargetAsmParser { public: PPCAsmParser(const MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI), MII(MII) { + : MCTargetAsmParser(Options, STI, MII) { // Check for 64-bit vs. 32-bit pointer mode. const Triple &TheTriple = STI.getTargetTriple(); IsPPC64 = (TheTriple.getArch() == Triple::ppc64 || diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index bdad2fe8714fd..2a1de244da923 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -204,7 +204,8 @@ namespace { public: DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { } - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCMachObjectWriter( OS, @@ -220,7 +221,8 @@ namespace { ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) : PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { } - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 1488bd5b0be61..44ee9733b16e1 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbolELF.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" @@ -416,10 +417,9 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, } } -MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit, - bool IsLittleEndian, - uint8_t OSABI) { - MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI); - return createELFObjectWriter(MOTW, OS, IsLittleEndian); +std::unique_ptr +llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, + bool IsLittleEndian, uint8_t OSABI) { + auto MOTW = llvm::make_unique(Is64Bit, OSABI); + return createELFObjectWriter(std::move(MOTW), OS, IsLittleEndian); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index d30bf1a56e8aa..8ac461b96b88c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -24,6 +24,7 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { } IsLittleEndian = false; + SeparatorString = "@"; CommentString = ";"; ExceptionsType = ExceptionHandling::DwarfCFI; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 893233ee2300f..99fec6c554b06 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -19,6 +19,7 @@ #include "llvm/Support/MathExtras.h" #include +#include namespace llvm { @@ -47,12 +48,15 @@ MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI, const MCTargetOptions &Options); /// Construct an PPC ELF object writer. -MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, - bool IsLittleEndian, uint8_t OSABI); +std::unique_ptr createPPCELFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, + bool IsLittleEndian, + uint8_t OSABI); /// Construct a PPC Mach-O object writer. -MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, - uint32_t CPUType, - uint32_t CPUSubtype); +std::unique_ptr createPPCMachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); /// Returns true iff Val consists of one contiguous run of 1s with any number of /// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index d5506277ca880..4b9055ec70419 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -374,10 +374,10 @@ void PPCMachObjectWriter::RecordPPCRelocation( Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) { +std::unique_ptr +llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, + uint32_t CPUType, uint32_t CPUSubtype) { return createMachObjectWriter( - new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS, + llvm::make_unique(Is64Bit, CPUType, CPUSubtype), OS, /*IsLittleEndian=*/false); } diff --git a/lib/Target/PowerPC/P9InstrResources.td b/lib/Target/PowerPC/P9InstrResources.td index f7310b54448a2..510352d5a9b24 100644 --- a/lib/Target/PowerPC/P9InstrResources.td +++ b/lib/Target/PowerPC/P9InstrResources.td @@ -495,6 +495,18 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], XSNMSUBMSP )>; +// 7 cycle Restricted DP operation and one 2 cycle ALU operation. +// The DP is restricted so we need a full 5 dispatches. +def : InstRW<[P9_DPOpAndALUOp_9C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FMULo, + FMADDo, + FMSUBo, + FNMADDo, + FNMSUBo +)>; + // 7 cycle DP operation. One DP unit, one EXEC pipeline and two dispatch units. def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs @@ -823,6 +835,8 @@ def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs + DIVDo, + DIVDUo, DIVWEo, DIVWEUo )>; @@ -872,6 +886,13 @@ def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], FDIV )>; +// 33 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. +def : InstRW<[P9_DPOpAndALUOp_35C_8, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FDIVo +)>; + // 33 Cycle DP Instruction. Takes one slice and 2 dispatches. def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs @@ -884,6 +905,13 @@ def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], FDIVS )>; +// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. +def : InstRW<[P9_DPOpAndALUOp_24C_5, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + FDIVSo +)>; + // 22 Cycle DP Instruction. Takes one slice and 2 dispatches. def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C], (instrs diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 8d61e81b1fc7c..8784a83190292 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -26,12 +26,17 @@ #include "PPC.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" +#include "PPCTargetTransformInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" @@ -64,6 +69,13 @@ using namespace llvm; static cl::opt CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); #endif +// The latency of mtctr is only justified if there are more than 4 +// comparisons that will be removed as a result. +static cl::opt +SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, + cl::desc("Loops with a constant trip count smaller than " + "this value will not use the count register.")); + STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); namespace llvm { @@ -95,6 +107,8 @@ namespace { AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addRequired(); + AU.addRequired(); } private: @@ -107,10 +121,12 @@ namespace { const PPCTargetLowering *TLI; const DataLayout *DL; const TargetLibraryInfo *LibInfo; + const TargetTransformInfo *TTI; LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; bool PreserveLCSSA; + TargetSchedModel SchedModel; }; char PPCCTRLoops::ID = 0; @@ -179,6 +195,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) { LI = &getAnalysis().getLoopInfo(); SE = &getAnalysis().getSE(); DT = &getAnalysis().getDomTree(); + TTI = &getAnalysis().getTTI(F); DL = &F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable(); LibInfo = TLIP ? &TLIP->getTLI() : nullptr; @@ -462,10 +479,24 @@ bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { return false; } - bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; + // Do not convert small short loops to CTR loop. + unsigned ConstTripCount = SE->getSmallConstantTripCount(L); + if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { + SmallPtrSet EphValues; + auto AC = getAnalysis().getAssumptionCache( + *L->getHeader()->getParent()); + CodeMetrics::collectEphemeralValues(L, &AC, EphValues); + CodeMetrics Metrics; + for (BasicBlock *BB : L->blocks()) + Metrics.analyzeBasicBlock(BB, *TTI, EphValues); + // 6 is an approximate latency for the mtctr instruction. + if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) + return false; + } + // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 756e35a6e6c60..0a01fdf9e6764 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -312,11 +312,9 @@ static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { // Live in and live out values already must be in the mask, so don't bother // marking them. - for (MachineRegisterInfo::livein_iterator - I = MF->getRegInfo().livein_begin(), - E = MF->getRegInfo().livein_end(); I != E; ++I) { - unsigned RegNo = TRI->getEncodingValue(I->first); - if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. + for (std::pair LI : MF->getRegInfo().liveins()) { + unsigned RegNo = TRI->getEncodingValue(LI.first); + if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 945e764a2d23f..8ea3689b08e66 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -579,8 +579,6 @@ bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { unsigned MB, ME; if (isRunOfOnes(InsertMask, MB, ME)) { - SDValue Tmp1, Tmp2; - if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && isInt32Immediate(Op1.getOperand(1), Value)) { Op1 = Op1.getOperand(0); @@ -1063,6 +1061,25 @@ class BitPermutationSelector { return std::make_pair(Interesting = true, &Bits); } + case ISD::ZERO_EXTEND: { + // We support only the case with zero extension from i32 to i64 so far. + if (V.getValueType() != MVT::i64 || + V.getOperand(0).getValueType() != MVT::i32) + break; + + const SmallVector *LHSBits; + const unsigned NumOperandBits = 32; + std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), + NumOperandBits); + + for (unsigned i = 0; i < NumOperandBits; ++i) + Bits[i] = (*LHSBits)[i]; + + for (unsigned i = NumOperandBits; i < NumBits; ++i) + Bits[i] = ValueBit(ValueBit::ConstZero); + + return std::make_pair(Interesting, &Bits); + } } for (unsigned i = 0; i < NumBits; ++i) @@ -1324,6 +1341,24 @@ class BitPermutationSelector { return ~Mask; } + // This method extends an input value to 64 bit if input is 32-bit integer. + // While selecting instructions in BitPermutationSelector in 64-bit mode, + // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. + // In such case, we extend it to 64 bit to be consistent with other values. + SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { + if (V.getValueSizeInBits() == 64) + return V; + + assert(V.getValueSizeInBits() == 32); + SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); + SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, + MVT::i64), 0); + SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, + MVT::i64, ImDef, V, + SubRegIdx), 0); + return ExtVal; + } + // Depending on the number of groups for a particular value, it might be // better to rotate, mask explicitly (using andi/andis), and then or the // result. Select this part of the result first. @@ -1540,27 +1575,30 @@ class BitPermutationSelector { assert(InstMaskStart >= 32 && "Mask cannot start out of range"); assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); SDValue Ops[] = - { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), - getI32Imm(InstMaskEnd - 32, dl) }; + { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63) { SDValue Ops[] = - { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; + { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); } if (InstMaskStart == 0) { SDValue Ops[] = - { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskEnd, dl) }; + { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskEnd, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63 - RLAmt) { SDValue Ops[] = - { V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; + { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); } @@ -1601,15 +1639,16 @@ class BitPermutationSelector { assert(InstMaskStart >= 32 && "Mask cannot start out of range"); assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); SDValue Ops[] = - { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart - 32, dl), - getI32Imm(InstMaskEnd - 32, dl) }; + { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops), 0); } if (InstMaskEnd == 63 - RLAmt) { SDValue Ops[] = - { Base, V, getI32Imm(RLAmt, dl), getI32Imm(InstMaskStart, dl) }; + { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), + getI32Imm(InstMaskStart, dl) }; return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); } @@ -1759,10 +1798,14 @@ class BitPermutationSelector { SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, - VRot, getI32Imm(ANDIMask, dl)), 0); + ExtendToInt64(VRot, dl), + getI32Imm(ANDIMask, dl)), + 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, - VRot, getI32Imm(ANDISMask, dl)), 0); + ExtendToInt64(VRot, dl), + getI32Imm(ANDISMask, dl)), + 0); if (!ANDIVal) TotalVal = ANDISVal; @@ -1770,19 +1813,21 @@ class BitPermutationSelector { TotalVal = ANDIVal; else TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, - ANDIVal, ANDISVal), 0); + ExtendToInt64(ANDIVal, dl), ANDISVal), 0); } else { TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); TotalVal = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, - VRot, TotalVal), 0); + ExtendToInt64(VRot, dl), TotalVal), + 0); } if (!Res) Res = TotalVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, - Res, TotalVal), 0); + ExtendToInt64(Res, dl), TotalVal), + 0); // Now, remove all groups with this underlying value and rotation // factor. @@ -1902,10 +1947,10 @@ class BitPermutationSelector { SDValue ANDIVal, ANDISVal; if (ANDIMask != 0) ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, - Res, getI32Imm(ANDIMask, dl)), 0); + ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0); if (ANDISMask != 0) ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, - Res, getI32Imm(ANDISMask, dl)), 0); + ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0); if (!ANDIVal) Res = ANDISVal; @@ -1913,14 +1958,14 @@ class BitPermutationSelector { Res = ANDIVal; else Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, - ANDIVal, ANDISVal), 0); + ExtendToInt64(ANDIVal, dl), ANDISVal), 0); } else { if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1; SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, - Res, MaskVal), 0); + ExtendToInt64(Res, dl), MaskVal), 0); } } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 6295693ffff26..030aa49086783 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3618,6 +3618,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; @@ -3652,6 +3653,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( break; unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Addr = FIN; if (j) { @@ -3688,6 +3690,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) @@ -3733,6 +3736,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( // since otherwise we never run out of FPRs before running out // of GPRs. unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); + FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::f32) { @@ -13273,8 +13277,9 @@ bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return TargetLowering::isZExtFree(Val, VT2); } -bool PPCTargetLowering::isFPExtFree(EVT VT) const { - assert(VT.isFloatingPoint()); +bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const { + assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && + "invalid fpext types"); return true; } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 591f2ee1c4612..eac9dc53dc453 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -758,7 +758,7 @@ namespace llvm { bool isZExtFree(SDValue Val, EVT VT2) const override; - bool isFPExtFree(EVT VT) const override; + bool isFPExtFree(EVT DestVT, EVT SrcVT) const override; /// \brief Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 1e4cc4a3c80d0..70920294aea2a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -260,6 +260,7 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, switch (MI.getOpcode()) { default: return false; case PPC::EXTSW: + case PPC::EXTSW_32: case PPC::EXTSW_32_64: SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); @@ -281,7 +282,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case PPC::RESTORE_CRBIT: case PPC::LVX: case PPC::LXVD2X: - case PPC::LXVX: + case PPC::LXV: case PPC::QVLFDX: case PPC::QVLFSXs: case PPC::QVLFDXb: @@ -335,7 +336,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI, case PPC::SPILL_CRBIT: case PPC::STVX: case PPC::STXVD2X: - case PPC::STXVX: + case PPC::STXV: case PPC::QVSTFDX: case PPC::QVSTFSXs: case PPC::QVSTFDXb: @@ -1048,7 +1049,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); NonRI = true; } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { - unsigned Op = Subtarget.hasP9Vector() ? PPC::STXVX : PPC::STXVD2X; + unsigned Op = Subtarget.hasP9Vector() ? PPC::STXV : PPC::STXVD2X; NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op)) .addReg(SrcReg, getKillRegState(isKill)), @@ -1186,7 +1187,7 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL, FrameIdx)); NonRI = true; } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { - unsigned Op = Subtarget.hasP9Vector() ? PPC::LXVX : PPC::LXVD2X; + unsigned Op = Subtarget.hasP9Vector() ? PPC::LXV : PPC::LXVD2X; NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op), DestReg), FrameIdx)); NonRI = true; @@ -1633,37 +1634,20 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, // Get the unique definition of SrcReg. MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); if (!MI) return false; - int MIOpC = MI->getOpcode(); bool equalityOnly = false; bool noSub = false; if (isPPC64) { if (is32BitSignedCompare) { // We can perform this optimization only if MI is sign-extending. - if (MIOpC == PPC::SRAW || MIOpC == PPC::SRAWo || - MIOpC == PPC::SRAWI || MIOpC == PPC::SRAWIo || - MIOpC == PPC::EXTSB || MIOpC == PPC::EXTSBo || - MIOpC == PPC::EXTSH || MIOpC == PPC::EXTSHo || - MIOpC == PPC::EXTSW || MIOpC == PPC::EXTSWo) { + if (isSignExtended(*MI)) noSub = true; - } else + else return false; } else if (is32BitUnsignedCompare) { - // 32-bit rotate and mask instructions are zero extending only if MB <= ME - bool isZeroExtendingRotate = - (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINMo || - MIOpC == PPC::RLWNM || MIOpC == PPC::RLWNMo) - && MI->getOperand(3).getImm() <= MI->getOperand(4).getImm(); - // We can perform this optimization, equality only, if MI is // zero-extending. - // FIXME: Other possible target instructions include ANDISo and - // RLWINM aliases, such as ROTRWI, EXTLWI, SLWI and SRWI. - if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo || - MIOpC == PPC::SLW || MIOpC == PPC::SLWo || - MIOpC == PPC::SRW || MIOpC == PPC::SRWo || - MIOpC == PPC::ANDIo || - isZeroExtendingRotate) { + if (isZeroExtended(*MI)) { noSub = true; equalityOnly = true; } else @@ -1731,38 +1715,47 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, else if (MI->getParent() != CmpInstr.getParent()) return false; else if (Value != 0) { - // The record-form instructions set CR bit based on signed comparison against 0. - // We try to convert a compare against 1 or -1 into a compare against 0. - bool Success = false; - if (!equalityOnly && MRI->hasOneUse(CRReg)) { - MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg); - if (UseMI->getOpcode() == PPC::BCC) { - PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); - unsigned PredCond = PPC::getPredicateCondition(Pred); - unsigned PredHint = PPC::getPredicateHint(Pred); - int16_t Immed = (int16_t)Value; - - // When modyfing the condition in the predicate, we propagate hint bits - // from the original predicate to the new one. - if (Immed == -1 && PredCond == PPC::PRED_GT) { - // We convert "greater than -1" into "greater than or equal to 0", - // since we are assuming signed comparison by !equalityOnly - PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), - PPC::getPredicate(PPC::PRED_GE, PredHint))); - Success = true; - } - else if (Immed == 1 && PredCond == PPC::PRED_LT) { - // We convert "less than 1" into "less than or equal to 0". - PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), - PPC::getPredicate(PPC::PRED_LE, PredHint))); - Success = true; - } - } - } + // The record-form instructions set CR bit based on signed comparison + // against 0. We try to convert a compare against 1 or -1 into a compare + // against 0 to exploit record-form instructions. For example, we change + // the condition "greater than -1" into "greater than or equal to 0" + // and "less than 1" into "less than or equal to 0". + + // Since we optimize comparison based on a specific branch condition, + // we don't optimize if condition code is used by more than once. + if (equalityOnly || !MRI->hasOneUse(CRReg)) + return false; - // PPC does not have a record-form SUBri. - if (!Success) + MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg); + if (UseMI->getOpcode() != PPC::BCC) return false; + + PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm(); + PPC::Predicate NewPred = Pred; + unsigned PredCond = PPC::getPredicateCondition(Pred); + unsigned PredHint = PPC::getPredicateHint(Pred); + int16_t Immed = (int16_t)Value; + + // When modyfing the condition in the predicate, we propagate hint bits + // from the original predicate to the new one. + if (Immed == -1 && PredCond == PPC::PRED_GT) + // We convert "greater than -1" into "greater than or equal to 0", + // since we are assuming signed comparison by !equalityOnly + NewPred = PPC::getPredicate(PPC::PRED_GE, PredHint); + else if (Immed == -1 && PredCond == PPC::PRED_LE) + // We convert "less than or equal to -1" into "less than 0". + NewPred = PPC::getPredicate(PPC::PRED_LT, PredHint); + else if (Immed == 1 && PredCond == PPC::PRED_LT) + // We convert "less than 1" into "less than or equal to 0". + NewPred = PPC::getPredicate(PPC::PRED_LE, PredHint); + else if (Immed == 1 && PredCond == PPC::PRED_GE) + // We convert "greater than or equal to 1" into "greater than 0". + NewPred = PPC::getPredicate(PPC::PRED_GT, PredHint); + else + return false; + + PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)), + NewPred)); } // Search for Sub. @@ -1810,7 +1803,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg, if (!MI) MI = Sub; int NewOpC = -1; - MIOpC = MI->getOpcode(); + int MIOpC = MI->getOpcode(); if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8) NewOpC = MIOpC; else { @@ -2103,3 +2096,248 @@ PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const { int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) { return PPC::getRecordFormOpcode(Opcode); } + +// This function returns true if the machine instruction +// always outputs a value by sign-extending a 32 bit value, +// i.e. 0 to 31-th bits are same as 32-th bit. +static bool isSignExtendingOp(const MachineInstr &MI) { + int Opcode = MI.getOpcode(); + if (Opcode == PPC::LI || Opcode == PPC::LI8 || + Opcode == PPC::LIS || Opcode == PPC::LIS8 || + Opcode == PPC::SRAW || Opcode == PPC::SRAWo || + Opcode == PPC::SRAWI || Opcode == PPC::SRAWIo || + Opcode == PPC::LWA || Opcode == PPC::LWAX || + Opcode == PPC::LWA_32 || Opcode == PPC::LWAX_32 || + Opcode == PPC::LHA || Opcode == PPC::LHAX || + Opcode == PPC::LHA8 || Opcode == PPC::LHAX8 || + Opcode == PPC::LBZ || Opcode == PPC::LBZX || + Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || + Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || + Opcode == PPC::LHZ || Opcode == PPC::LHZX || + Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || + Opcode == PPC::EXTSB || Opcode == PPC::EXTSBo || + Opcode == PPC::EXTSH || Opcode == PPC::EXTSHo || + Opcode == PPC::EXTSB8 || Opcode == PPC::EXTSH8 || + Opcode == PPC::EXTSW || Opcode == PPC::EXTSWo || + Opcode == PPC::EXTSH8_32_64 || Opcode == PPC::EXTSW_32_64 || + Opcode == PPC::EXTSB8_32_64) + return true; + + if (Opcode == PPC::RLDICL && MI.getOperand(3).getImm() >= 33) + return true; + + if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo) && + MI.getOperand(3).getImm() > 0 && + MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) + return true; + + return false; +} + +// This function returns true if the machine instruction +// always outputs zeros in higher 32 bits. +static bool isZeroExtendingOp(const MachineInstr &MI) { + int Opcode = MI.getOpcode(); + // The 16-bit immediate is sign-extended in li/lis. + // If the most significant bit is zero, all higher bits are zero. + if (Opcode == PPC::LI || Opcode == PPC::LI8 || + Opcode == PPC::LIS || Opcode == PPC::LIS8) { + int64_t Imm = MI.getOperand(1).getImm(); + if (((uint64_t)Imm & ~0x7FFFuLL) == 0) + return true; + } + + // We have some variations of rotate-and-mask instructions + // that clear higher 32-bits. + if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo || + Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo || + Opcode == PPC::RLDICL_32_64) && + MI.getOperand(3).getImm() >= 32) + return true; + + if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) && + MI.getOperand(3).getImm() >= 32 && + MI.getOperand(3).getImm() <= 63 - MI.getOperand(2).getImm()) + return true; + + if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo || + Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && + MI.getOperand(3).getImm() <= MI.getOperand(4).getImm()) + return true; + + // There are other instructions that clear higher 32-bits. + if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo || + Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo || + Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8 || + Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo || + Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo || + Opcode == PPC::POPCNTD || Opcode == PPC::POPCNTW || + Opcode == PPC::SLW || Opcode == PPC::SLWo || + Opcode == PPC::SRW || Opcode == PPC::SRWo || + Opcode == PPC::SLW8 || Opcode == PPC::SRW8 || + Opcode == PPC::SLWI || Opcode == PPC::SLWIo || + Opcode == PPC::SRWI || Opcode == PPC::SRWIo || + Opcode == PPC::LWZ || Opcode == PPC::LWZX || + Opcode == PPC::LWZU || Opcode == PPC::LWZUX || + Opcode == PPC::LWBRX || Opcode == PPC::LHBRX || + Opcode == PPC::LHZ || Opcode == PPC::LHZX || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LBZ || Opcode == PPC::LBZX || + Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LWZ8 || Opcode == PPC::LWZX8 || + Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8 || + Opcode == PPC::LWBRX8 || Opcode == PPC::LHBRX8 || + Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8 || + Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8 || + Opcode == PPC::ANDIo || Opcode == PPC::ANDISo || + Opcode == PPC::ROTRWI || Opcode == PPC::ROTRWIo || + Opcode == PPC::EXTLWI || Opcode == PPC::EXTLWIo || + Opcode == PPC::MFVSRWZ) + return true; + + return false; +} + +// We limit the max depth to track incoming values of PHIs or binary ops +// (e.g. AND) to avoid exsessive cost. +const unsigned MAX_DEPTH = 1; + +bool +PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, + const unsigned Depth) const { + const MachineFunction *MF = MI.getParent()->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + + // If we know this instruction returns sign- or zero-extended result, + // return true. + if (SignExt ? isSignExtendingOp(MI): + isZeroExtendingOp(MI)) + return true; + + switch (MI.getOpcode()) { + case PPC::COPY: { + unsigned SrcReg = MI.getOperand(1).getReg(); + + // In both ELFv1 and v2 ABI, method parameters and the return value + // are sign- or zero-extended. + if (MF->getSubtarget().isSVR4ABI()) { + const PPCFunctionInfo *FuncInfo = MF->getInfo(); + // We check the ZExt/SExt flags for a method parameter. + if (MI.getParent()->getBasicBlock() == + &MF->getFunction()->getEntryBlock()) { + unsigned VReg = MI.getOperand(0).getReg(); + if (MF->getRegInfo().isLiveIn(VReg)) + return SignExt ? FuncInfo->isLiveInSExt(VReg) : + FuncInfo->isLiveInZExt(VReg); + } + + // For a method return value, we check the ZExt/SExt flags in attribute. + // We assume the following code sequence for method call. + // ADJCALLSTACKDOWN 32, %R1, %R1 + // BL8_NOP ,... + // ADJCALLSTACKUP 32, 0, %R1, %R1 + // %vreg5 = COPY %X3; G8RC:%vreg5 + if (SrcReg == PPC::X3) { + const MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::const_instr_iterator II = + MachineBasicBlock::const_instr_iterator(&MI); + if (II != MBB->instr_begin() && + (--II)->getOpcode() == PPC::ADJCALLSTACKUP) { + const MachineInstr &CallMI = *(--II); + if (CallMI.isCall() && CallMI.getOperand(0).isGlobal()) { + const Function *CalleeFn = + dyn_cast(CallMI.getOperand(0).getGlobal()); + if (!CalleeFn) + return false; + const IntegerType *IntTy = + dyn_cast(CalleeFn->getReturnType()); + const AttributeSet &Attrs = + CalleeFn->getAttributes().getRetAttributes(); + if (IntTy && IntTy->getBitWidth() <= 32) + return Attrs.hasAttribute(SignExt ? Attribute::SExt : + Attribute::ZExt); + } + } + } + } + + // If this is a copy from another register, we recursively check source. + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI != NULL) + return isSignOrZeroExtended(*SrcMI, SignExt, Depth); + + return false; + } + + case PPC::ANDIo: + case PPC::ANDISo: + case PPC::ORI: + case PPC::ORIS: + case PPC::XORI: + case PPC::XORIS: + case PPC::ANDIo8: + case PPC::ANDISo8: + case PPC::ORI8: + case PPC::ORIS8: + case PPC::XORI8: + case PPC::XORIS8: { + // logical operation with 16-bit immediate does not change the upper bits. + // So, we track the operand register as we do for register copy. + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI != NULL) + return isSignOrZeroExtended(*SrcMI, SignExt, Depth); + + return false; + } + + // If all incoming values are sign-/zero-extended, + // the output of AND, OR, ISEL or PHI is also sign-/zero-extended. + case PPC::AND: + case PPC::AND8: + case PPC::OR: + case PPC::OR8: + case PPC::ISEL: + case PPC::PHI: { + if (Depth >= MAX_DEPTH) + return false; + + // The input registers for PHI are operand 1, 3, ... + // The input registers for others are operand 1 and 2. + unsigned E = 3, D = 1; + if (MI.getOpcode() == PPC::PHI) { + E = MI.getNumOperands(); + D = 2; + } + + for (unsigned I = 1; I != E; I += D) { + if (MI.getOperand(I).isReg()) { + unsigned SrcReg = MI.getOperand(I).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (SrcMI == NULL || !isSignOrZeroExtended(*SrcMI, SignExt, Depth+1)) + return false; + } + else + return false; + } + return true; + } + + default: + break; + } + return false; +} diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index b0629c88cf57b..ab86a54f6fea7 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -293,6 +293,21 @@ class PPCInstrInfo : public PPCGenInstrInfo { } const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const; static int getRecordFormOpcode(unsigned Opcode); + + bool isSignOrZeroExtended(const MachineInstr &MI, bool SignExt, + const unsigned PhiDepth) const; + + /// Return true if the output of the instruction is always a sign-extended, + /// i.e. 0 to 31-th bits are same as 32-th bit. + bool isSignExtended(const MachineInstr &MI, const unsigned depth = 0) const { + return isSignOrZeroExtended(MI, true, depth); + } + + /// Return true if the output of the instruction is always zero-extended, + /// i.e. 0 to 31-th bits are all zeros + bool isZeroExtended(const MachineInstr &MI, const unsigned depth = 0) const { + return isSignOrZeroExtended(MI, false, depth); + } }; } diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp index 20921f4019e93..beb4099290042 100644 --- a/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -29,14 +29,27 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" +#include "llvm/ADT/Statistic.h" #include "MCTargetDesc/PPCPredicates.h" using namespace llvm; #define DEBUG_TYPE "ppc-mi-peepholes" +STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions"); +STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions"); STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI"); +static cl::opt + EnableSExtElimination("ppc-eliminate-signext", + cl::desc("enable elimination of sign-extensions"), + cl::init(false), cl::Hidden); + +static cl::opt + EnableZExtElimination("ppc-eliminate-zeroext", + cl::desc("enable elimination of zero-extensions"), + cl::init(false), cl::Hidden); + namespace llvm { void initializePPCMIPeepholePass(PassRegistry&); } @@ -110,6 +123,59 @@ static MachineInstr *getVRegDefOrNull(MachineOperand *Op, return MRI->getVRegDef(Reg); } +// This function returns number of known zero bits in output of MI +// starting from the most significant bit. +static unsigned +getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) { + unsigned Opcode = MI->getOpcode(); + if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo || + Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo) + return MI->getOperand(3).getImm(); + + if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) && + MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm()) + return MI->getOperand(3).getImm(); + + if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo || + Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo || + Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) && + MI->getOperand(3).getImm() <= MI->getOperand(4).getImm()) + return 32 + MI->getOperand(3).getImm(); + + if (Opcode == PPC::ANDIo) { + uint16_t Imm = MI->getOperand(2).getImm(); + return 48 + countLeadingZeros(Imm); + } + + if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo || + Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo || + Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8) + // The result ranges from 0 to 32. + return 58; + + if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo || + Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo) + // The result ranges from 0 to 64. + return 57; + + if (Opcode == PPC::LHZ || Opcode == PPC::LHZX || + Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 || + Opcode == PPC::LHZU || Opcode == PPC::LHZUX || + Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8) + return 48; + + if (Opcode == PPC::LBZ || Opcode == PPC::LBZX || + Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 || + Opcode == PPC::LBZU || Opcode == PPC::LBZUX || + Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8) + return 56; + + if (TII->isZeroExtended(*MI)) + return 32; + + return 0; +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; @@ -309,6 +375,53 @@ bool PPCMIPeephole::simplifyCode(void) { MI.getOperand(2).setImm(NewElem); } } + + // Splat is fed by a SWAP which is a permute of this form + // XXPERMDI %VA, %VA, 2 + // Since the splat instruction can use any of the vector elements to do + // the splat we do not have to rearrange the elements in the vector + // with a swap before we do the splat. We can simply do the splat from + // a different index. + // If the swap has only one use (the splat) then we can completely + // remove the swap too. + if (DefOpcode == PPC::XXPERMDI && MI.getOperand(1).isImm()) { + unsigned SwapRes = DefMI->getOperand(0).getReg(); + unsigned SwapOp1 = DefMI->getOperand(1).getReg(); + unsigned SwapOp2 = DefMI->getOperand(2).getReg(); + unsigned SwapImm = DefMI->getOperand(3).getImm(); + unsigned SplatImm = MI.getOperand(1).getImm(); + + // Break if this permute is not a swap. + if (SwapOp1 != SwapOp2 || SwapImm != 2) + break; + + unsigned NewElem = 0; + // Compute the new index to use for the splat. + if (MI.getOpcode() == PPC::VSPLTB) + NewElem = (SplatImm + 8) & 0xF; + else if (MI.getOpcode() == PPC::VSPLTH) + NewElem = (SplatImm + 4) & 0x7; + else if (MI.getOpcode() == PPC::XXSPLTW) + NewElem = (SplatImm + 2) & 0x3; + else { + DEBUG(dbgs() << "Unknown splat opcode."); + DEBUG(MI.dump()); + break; + } + + if (MRI->hasOneNonDBGUse(SwapRes)) { + DEBUG(dbgs() << "Removing redundant swap: "); + DEBUG(DefMI->dump()); + ToErase = DefMI; + } + Simplified = true; + DEBUG(dbgs() << "Changing splat immediate from " << SplatImm << + " to " << NewElem << " in instruction: "); + DEBUG(MI.dump()); + MI.getOperand(1).setImm(NewElem); + MI.getOperand(2).setReg(SwapOp1); + } + break; } case PPC::XVCVDPSP: { @@ -367,6 +480,156 @@ bool PPCMIPeephole::simplifyCode(void) { } break; } + case PPC::EXTSH: + case PPC::EXTSH8: + case PPC::EXTSH8_32_64: { + if (!EnableSExtElimination) break; + unsigned NarrowReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); + // If we've used a zero-extending load that we will sign-extend, + // just do a sign-extending load. + if (SrcMI->getOpcode() == PPC::LHZ || + SrcMI->getOpcode() == PPC::LHZX) { + if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg())) + break; + auto is64Bit = [] (unsigned Opcode) { + return Opcode == PPC::EXTSH8; + }; + auto isXForm = [] (unsigned Opcode) { + return Opcode == PPC::LHZX; + }; + auto getSextLoadOp = [] (bool is64Bit, bool isXForm) { + if (is64Bit) + if (isXForm) return PPC::LHAX8; + else return PPC::LHA8; + else + if (isXForm) return PPC::LHAX; + else return PPC::LHA; + }; + unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()), + isXForm(SrcMI->getOpcode())); + DEBUG(dbgs() << "Zero-extending load\n"); + DEBUG(SrcMI->dump()); + DEBUG(dbgs() << "and sign-extension\n"); + DEBUG(MI.dump()); + DEBUG(dbgs() << "are merged into sign-extending load\n"); + SrcMI->setDesc(TII->get(Opc)); + SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } + break; + } + case PPC::EXTSW: + case PPC::EXTSW_32: + case PPC::EXTSW_32_64: { + if (!EnableSExtElimination) break; + unsigned NarrowReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(NarrowReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg); + // If we've used a zero-extending load that we will sign-extend, + // just do a sign-extending load. + if (SrcMI->getOpcode() == PPC::LWZ || + SrcMI->getOpcode() == PPC::LWZX) { + if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg())) + break; + auto is64Bit = [] (unsigned Opcode) { + return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64; + }; + auto isXForm = [] (unsigned Opcode) { + return Opcode == PPC::LWZX; + }; + auto getSextLoadOp = [] (bool is64Bit, bool isXForm) { + if (is64Bit) + if (isXForm) return PPC::LWAX; + else return PPC::LWA; + else + if (isXForm) return PPC::LWAX_32; + else return PPC::LWA_32; + }; + unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()), + isXForm(SrcMI->getOpcode())); + DEBUG(dbgs() << "Zero-extending load\n"); + DEBUG(SrcMI->dump()); + DEBUG(dbgs() << "and sign-extension\n"); + DEBUG(MI.dump()); + DEBUG(dbgs() << "are merged into sign-extending load\n"); + SrcMI->setDesc(TII->get(Opc)); + SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg()); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } else if (MI.getOpcode() == PPC::EXTSW_32_64 && + TII->isSignExtended(*SrcMI)) { + // We can eliminate EXTSW if the input is known to be already + // sign-extended. + DEBUG(dbgs() << "Removing redundant sign-extension\n"); + unsigned TmpReg = + MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF), + TmpReg); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG), + MI.getOperand(0).getReg()) + .addReg(TmpReg) + .addReg(NarrowReg) + .addImm(PPC::sub_32); + ToErase = &MI; + Simplified = true; + NumEliminatedSExt++; + } + break; + } + case PPC::RLDICL: { + // We can eliminate RLDICL (e.g. for zero-extension) + // if all bits to clear are already zero in the input. + // This code assume following code sequence for zero-extension. + // %vreg6 = COPY %vreg5:sub_32; (optional) + // %vreg8 = IMPLICIT_DEF; + // %vreg7 = INSERT_SUBREG %vreg8, %vreg6, sub_32; + if (!EnableZExtElimination) break; + + if (MI.getOperand(2).getImm() != 0) + break; + + unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + break; + + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG && + SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg())) + break; + + MachineInstr *ImpDefMI, *SubRegMI; + ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); + SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg()); + if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break; + + SrcMI = SubRegMI; + if (SubRegMI->getOpcode() == PPC::COPY) { + unsigned CopyReg = SubRegMI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(CopyReg)) + SrcMI = MRI->getVRegDef(CopyReg); + } + + unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII); + if (MI.getOperand(3).getImm() <= KnownZeroCount) { + DEBUG(dbgs() << "Removing redundant zero-extension\n"); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), + MI.getOperand(0).getReg()) + .addReg(SrcReg); + ToErase = &MI; + Simplified = true; + NumEliminatedZExt++; + } + break; + } // TODO: Any instruction that has an immediate form fed only by a PHI // whose operands are all load immediate can be folded away. We currently @@ -394,9 +657,10 @@ bool PPCMIPeephole::simplifyCode(void) { for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) { MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI); - if (!LiMI || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) || - !MDT->dominates(DefDomMI, LiMI) || - (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)) + if (!LiMI || + (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8) + || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) || + !MDT->dominates(DefDomMI, LiMI)) return false; } @@ -784,7 +1048,7 @@ bool PPCMIPeephole::eliminateRedundantCompare(void) { } else continue; } - else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()){ + else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) { // In case of comparisons between a register and an immediate, // the operand register must be same for two compare instructions. unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(), diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp index bc2d9a08b5e86..3923417257e8c 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp @@ -43,3 +43,17 @@ MCSymbol *PPCFunctionInfo::getTOCOffsetSymbol() const { "func_toc" + Twine(MF.getFunctionNumber())); } + +bool PPCFunctionInfo::isLiveInSExt(unsigned VReg) const { + for (const std::pair &LiveIn : LiveInAttrs) + if (LiveIn.first == VReg) + return LiveIn.second.isSExt(); + return false; +} + +bool PPCFunctionInfo::isLiveInZExt(unsigned VReg) const { + for (const std::pair &LiveIn : LiveInAttrs) + if (LiveIn.first == VReg) + return LiveIn.second.isZExt(); + return false; +} diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 202e10058b733..34371f7bede7e 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetCallingConv.h" namespace llvm { @@ -113,6 +114,10 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// copies bool IsSplitCSR = false; + /// We keep track attributes for each live-in virtual registers + /// to use SExt/ZExt flags in later optimization. + std::vector> LiveInAttrs; + public: explicit PPCFunctionInfo(MachineFunction &MF) : MF(MF) {} @@ -175,6 +180,19 @@ class PPCFunctionInfo : public MachineFunctionInfo { unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } + /// This function associates attributes for each live-in virtual register. + void addLiveInAttr(unsigned VReg, ISD::ArgFlagsTy Flags) { + LiveInAttrs.push_back(std::make_pair(VReg, Flags)); + } + + /// This function returns true if the spesified vreg is + /// a live-in register and sign-extended. + bool isLiveInSExt(unsigned VReg) const; + + /// This function returns true if the spesified vreg is + /// a live-in register and zero-extended. + bool isLiveInZExt(unsigned VReg) const; + int getCRSpillFrameIndex() const { return CRSpillFrameIndex; } void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index af62066a17415..d46c1383297ff 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -933,11 +933,16 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, SReg = MF.getRegInfo().createVirtualRegister(RC); // Insert a set of rA with the full offset value before the ld, st, or add - BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi) - .addImm(Offset >> 16); - BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) - .addReg(SRegHi, RegState::Kill) - .addImm(Offset); + if (isInt<16>(Offset)) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LI8 : PPC::LI), SReg) + .addImm(Offset); + else { + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi) + .addImm(Offset >> 16); + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) + .addReg(SRegHi, RegState::Kill) + .addImm(Offset); + } // Convert into indexed form of the instruction: // diff --git a/lib/Target/PowerPC/PPCScheduleP9.td b/lib/Target/PowerPC/PPCScheduleP9.td index 6830488deb206..b24f4fc603a15 100644 --- a/lib/Target/PowerPC/PPCScheduleP9.td +++ b/lib/Target/PowerPC/PPCScheduleP9.td @@ -301,6 +301,9 @@ let SchedModel = P9Model in { def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>; def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>; def P9_ALUOpAndALUOp_4C : WriteSequence<[P9_ALU_2C, P9_ALU_2C]>; + def P9_DPOpAndALUOp_9C : WriteSequence<[P9_DP_7C, P9_ALU_2C]>; + def P9_DPOpAndALUOp_24C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_2C]>; + def P9_DPOpAndALUOp_35C_8 : WriteSequence<[P9_DP_33C_8, P9_ALU_2C]>; // ***************** Defining Itinerary Class Resources ***************** diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index d3295a9d22e84..52c5b688d3568 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -189,6 +189,17 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, return PPCTTIImpl::getIntImmCost(Imm, Ty); } +unsigned PPCTTIImpl::getUserCost(const User *U, + ArrayRef Operands) { + if (U->getType()->isVectorTy()) { + // Instructions that need to be split should cost more. + std::pair LT = TLI->getTypeLegalizationCost(DL, U->getType()); + return LT.first * BaseT::getUserCost(U, Operands); + } + + return BaseT::getUserCost(U, Operands); +} + void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { if (ST->getDarwinDirective() == PPC::DIR_A2) { diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h index b6b93ba9379e8..60dea0b022636 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -51,6 +51,8 @@ class PPCTTIImpl : public BasicTTIImplBase { int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty); + unsigned getUserCost(const User *U, ArrayRef Operands); + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); diff --git a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index b0db5f4d8fbd0..486784859bd71 100644 --- a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -72,7 +72,7 @@ class RISCVAsmParser : public MCTargetAsmParser { RISCVAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI) { + : MCTargetAsmParser(Options, STI, MII) { setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } }; @@ -146,6 +146,8 @@ struct RISCVOperand : public MCParsedAsmOperand { template bool isBareSimmNLsb0() const { int64_t Imm; RISCVMCExpr::VariantKind VK; + if (!isImm()) + return false; bool IsConstantImm = evaluateConstantImm(Imm, VK); bool IsValid; if (!IsConstantImm) @@ -185,6 +187,8 @@ struct RISCVOperand : public MCParsedAsmOperand { bool isUImm5() const { int64_t Imm; RISCVMCExpr::VariantKind VK; + if (!isImm()) + return false; bool IsConstantImm = evaluateConstantImm(Imm, VK); return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } @@ -193,6 +197,8 @@ struct RISCVOperand : public MCParsedAsmOperand { RISCVMCExpr::VariantKind VK; int64_t Imm; bool IsValid; + if (!isImm()) + return false; bool IsConstantImm = evaluateConstantImm(Imm, VK); if (!IsConstantImm) IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm); @@ -205,6 +211,8 @@ struct RISCVOperand : public MCParsedAsmOperand { bool isUImm12() const { int64_t Imm; RISCVMCExpr::VariantKind VK; + if (!isImm()) + return false; bool IsConstantImm = evaluateConstantImm(Imm, VK); return IsConstantImm && isUInt<12>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } @@ -215,6 +223,8 @@ struct RISCVOperand : public MCParsedAsmOperand { RISCVMCExpr::VariantKind VK; int64_t Imm; bool IsValid; + if (!isImm()) + return false; bool IsConstantImm = evaluateConstantImm(Imm, VK); if (!IsConstantImm) IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK, Imm); @@ -280,7 +290,7 @@ struct RISCVOperand : public MCParsedAsmOperand { } static std::unique_ptr createImm(const MCExpr *Val, SMLoc S, - SMLoc E, MCContext &Ctx) { + SMLoc E) { auto Op = make_unique(Immediate); Op->Imm.Val = Val; Op->StartLoc = S; @@ -470,7 +480,7 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) { return parseOperandWithModifier(Operands); } - Operands.push_back(RISCVOperand::createImm(Res, S, E, getContext())); + Operands.push_back(RISCVOperand::createImm(Res, S, E)); return MatchOperand_Success; } @@ -510,7 +520,7 @@ RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) { } const MCExpr *ModExpr = RISCVMCExpr::create(SubExpr, VK, getContext()); - Operands.push_back(RISCVOperand::createImm(ModExpr, S, E, getContext())); + Operands.push_back(RISCVOperand::createImm(ModExpr, S, E)); return MatchOperand_Success; } diff --git a/lib/Target/RISCV/CMakeLists.txt b/lib/Target/RISCV/CMakeLists.txt index b9f3fc110c746..bac4d4c353d26 100644 --- a/lib/Target/RISCV/CMakeLists.txt +++ b/lib/Target/RISCV/CMakeLists.txt @@ -3,14 +3,25 @@ set(LLVM_TARGET_DEFINITIONS RISCV.td) tablegen(LLVM RISCVGenRegisterInfo.inc -gen-register-info) tablegen(LLVM RISCVGenInstrInfo.inc -gen-instr-info) tablegen(LLVM RISCVGenMCCodeEmitter.inc -gen-emitter) +tablegen(LLVM RISCVGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM RISCVGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM RISCVGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM RISCVGenCallingConv.inc -gen-callingconv) +tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel) tablegen(LLVM RISCVGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(RISCVCommonTableGen) add_llvm_target(RISCVCodeGen + RISCVAsmPrinter.cpp + RISCVFrameLowering.cpp + RISCVInstrInfo.cpp + RISCVISelDAGToDAG.cpp + RISCVISelLowering.cpp + RISCVMCInstLower.cpp + RISCVRegisterInfo.cpp + RISCVSubtarget.cpp RISCVTargetMachine.cpp ) diff --git a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index e64d875a567fa..003686ac2f312 100644 --- a/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -56,14 +56,14 @@ extern "C" void LLVMInitializeRISCVDisassembler() { } static const unsigned GPRDecoderTable[] = { - RISCV::X0_32, RISCV::X1_32, RISCV::X2_32, RISCV::X3_32, - RISCV::X4_32, RISCV::X5_32, RISCV::X6_32, RISCV::X7_32, - RISCV::X8_32, RISCV::X9_32, RISCV::X10_32, RISCV::X11_32, - RISCV::X12_32, RISCV::X13_32, RISCV::X14_32, RISCV::X15_32, - RISCV::X16_32, RISCV::X17_32, RISCV::X18_32, RISCV::X19_32, - RISCV::X20_32, RISCV::X21_32, RISCV::X22_32, RISCV::X23_32, - RISCV::X24_32, RISCV::X25_32, RISCV::X26_32, RISCV::X27_32, - RISCV::X28_32, RISCV::X29_32, RISCV::X30_32, RISCV::X31_32 + RISCV::X0, RISCV::X1, RISCV::X2, RISCV::X3, + RISCV::X4, RISCV::X5, RISCV::X6, RISCV::X7, + RISCV::X8, RISCV::X9, RISCV::X10, RISCV::X11, + RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, + RISCV::X16, RISCV::X17, RISCV::X18, RISCV::X19, + RISCV::X20, RISCV::X21, RISCV::X22, RISCV::X23, + RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27, + RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31 }; static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo, diff --git a/lib/Target/RISCV/LLVMBuild.txt b/lib/Target/RISCV/LLVMBuild.txt index e15963b5bd7b4..ab21565b0c2e9 100644 --- a/lib/Target/RISCV/LLVMBuild.txt +++ b/lib/Target/RISCV/LLVMBuild.txt @@ -30,5 +30,6 @@ has_disassembler = 1 type = Library name = RISCVCodeGen parent = RISCV -required_libraries = AsmPrinter Core CodeGen MC RISCVAsmPrinter RISCVDesc RISCVInfo Support Target +required_libraries = AsmPrinter Core CodeGen MC RISCVAsmPrinter RISCVDesc + RISCVInfo SelectionDAG Support Target add_to_library_groups = RISCV diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 692a179e927d0..add63b6e77f77 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -39,7 +39,8 @@ class RISCVAsmBackend : public MCAsmBackend { const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsResolved) const override; - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, @@ -182,7 +183,7 @@ void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, return; } -MCObjectWriter * +std::unique_ptr RISCVAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createRISCVELFObjectWriter(OS, OSABI, Is64Bit); } diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index cfb124262c61f..9fafbb0a95ac6 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -26,9 +26,10 @@ enum { InstFormatR = 1, InstFormatI = 2, InstFormatS = 3, - InstFormatSB = 4, + InstFormatB = 4, InstFormatU = 5, - InstFormatOther = 6, + InstFormatJ = 6, + InstFormatOther = 7, InstFormatMask = 15 }; diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index 95d4242e40423..e256156dc9628 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -61,8 +62,10 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, } } -MCObjectWriter *llvm::createRISCVELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI, bool Is64Bit) { - MCELFObjectTargetWriter *MOTW = new RISCVELFObjectWriter(OSABI, Is64Bit); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true); +std::unique_ptr +llvm::createRISCVELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, + bool Is64Bit) { + return createELFObjectWriter( + llvm::make_unique(OSABI, Is64Bit), OS, + /*IsLittleEndian=*/true); } diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index f821215933170..f94c37aae8f4c 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -159,7 +159,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo, cast(Expr)->getKind() == MCSymbolRefExpr::VK_None) { if (Desc.getOpcode() == RISCV::JAL) { FixupKind = RISCV::fixup_riscv_jal; - } else if (MIFrm == RISCVII::InstFormatSB) { + } else if (MIFrm == RISCVII::InstFormatB) { FixupKind = RISCV::fixup_riscv_branch; } } diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp index 2b35eab577bfa..45de976ec6c2e 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp @@ -42,7 +42,7 @@ static MCInstrInfo *createRISCVMCInstrInfo() { static MCRegisterInfo *createRISCVMCRegisterInfo(const Triple &TT) { MCRegisterInfo *X = new MCRegisterInfo(); - InitRISCVMCRegisterInfo(X, RISCV::X1_32); + InitRISCVMCRegisterInfo(X, RISCV::X1); return X; } @@ -51,6 +51,14 @@ static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI, return new RISCVMCAsmInfo(TT); } +static MCSubtargetInfo *createRISCVMCSubtargetInfo(const Triple &TT, + StringRef CPU, StringRef FS) { + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = TT.isArch64Bit() ? "generic-rv64" : "generic-rv32"; + return createRISCVMCSubtargetInfoImpl(TT, CPUName, FS); +} + static MCInstPrinter *createRISCVMCInstPrinter(const Triple &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, @@ -67,6 +75,6 @@ extern "C" void LLVMInitializeRISCVTargetMC() { TargetRegistry::RegisterMCAsmBackend(*T, createRISCVAsmBackend); TargetRegistry::RegisterMCCodeEmitter(*T, createRISCVMCCodeEmitter); TargetRegistry::RegisterMCInstPrinter(*T, createRISCVMCInstPrinter); - TargetRegistry::RegisterMCSubtargetInfo(*T, createRISCVMCSubtargetInfoImpl); + TargetRegistry::RegisterMCSubtargetInfo(*T, createRISCVMCSubtargetInfo); } } diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h index 9891fd52b2f4e..bea2f8800fa69 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h +++ b/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h @@ -17,6 +17,7 @@ #include "llvm/Config/config.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/DataTypes.h" +#include namespace llvm { class MCAsmBackend; @@ -43,8 +44,8 @@ MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU, const MCTargetOptions &Options); -MCObjectWriter *createRISCVELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, - bool Is64Bit); +std::unique_ptr +createRISCVELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, bool Is64Bit); } // Defines symbolic names for RISC-V registers. diff --git a/lib/Target/RISCV/RISCV.h b/lib/Target/RISCV/RISCV.h new file mode 100644 index 0000000000000..1b6140203c870 --- /dev/null +++ b/lib/Target/RISCV/RISCV.h @@ -0,0 +1,31 @@ +//===-- RISCV.h - Top-level interface for RISCV -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// RISC-V back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCV_H +#define LLVM_LIB_TARGET_RISCV_RISCV_H + +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class RISCVTargetMachine; +class MCInst; +class MachineInstr; + +void LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI); + +FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM); +} + +#endif diff --git a/lib/Target/RISCV/RISCV.td b/lib/Target/RISCV/RISCV.td index 19e11839ac3a6..54aa570e13b08 100644 --- a/lib/Target/RISCV/RISCV.td +++ b/lib/Target/RISCV/RISCV.td @@ -9,19 +9,42 @@ include "llvm/Target/Target.td" -include "RISCVRegisterInfo.td" -include "RISCVInstrInfo.td" +//===----------------------------------------------------------------------===// +// RISC-V subtarget features and instruction predicates. +//===----------------------------------------------------------------------===// +def Feature64Bit : SubtargetFeature<"64bit", "HasRV64", "true", + "Implements RV64">; -def RISCVInstrInfo : InstrInfo; +def RV64 : HwMode<"+64bit">; +def RV32 : HwMode<"-64bit">; -def Feature64Bit : SubtargetFeature<"64bit", "HasRV64", "true", - "Implements RV64">; +//===----------------------------------------------------------------------===// +// Registers, calling conventions, instruction descriptions. +//===----------------------------------------------------------------------===// + +include "RISCVRegisterInfo.td" +include "RISCVCallingConv.td" +include "RISCVInstrInfo.td" + +//===----------------------------------------------------------------------===// +// RISC-V processors supported. +//===----------------------------------------------------------------------===// def : ProcessorModel<"generic-rv32", NoSchedModel, []>; def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>; +//===----------------------------------------------------------------------===// +// Define the RISC-V target. +//===----------------------------------------------------------------------===// + +def RISCVInstrInfo : InstrInfo { + // TODO: disable guessInstructionProperties when + // https://reviews.llvm.org/D37065 lands. + let guessInstructionProperties = 1; +} + def RISCVAsmParser : AsmParser { let ShouldEmitMatchRegisterAltName = 1; } diff --git a/lib/Target/RISCV/RISCVAsmPrinter.cpp b/lib/Target/RISCV/RISCVAsmPrinter.cpp new file mode 100644 index 0000000000000..1c213b6c7e999 --- /dev/null +++ b/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -0,0 +1,67 @@ +//===-- RISCVAsmPrinter.cpp - RISCV LLVM assembly writer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to the RISCV assembly language. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "InstPrinter/RISCVInstPrinter.h" +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +namespace { +class RISCVAsmPrinter : public AsmPrinter { +public: + explicit RISCVAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} + + StringRef getPassName() const override { return "RISCV Assembly Printer"; } + + void EmitInstruction(const MachineInstr *MI) override; + + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); +}; +} + +// Simple pseudo-instructions have their lowering (with expansion to real +// instructions) auto-generated. +#include "RISCVGenMCPseudoLowering.inc" + +void RISCVAsmPrinter::EmitInstruction(const MachineInstr *MI) { + // Do any auto-generated pseudo lowerings. + if (emitPseudoExpansionLowering(*OutStreamer, MI)) + return; + + MCInst TmpInst; + LowerRISCVMachineInstrToMCInst(MI, TmpInst); + EmitToStreamer(*OutStreamer, TmpInst); +} + +// Force static initialization. +extern "C" void LLVMInitializeRISCVAsmPrinter() { + RegisterAsmPrinter X(getTheRISCV32Target()); + RegisterAsmPrinter Y(getTheRISCV64Target()); +} diff --git a/lib/Target/RISCV/RISCVCallingConv.td b/lib/Target/RISCV/RISCVCallingConv.td new file mode 100644 index 0000000000000..e0c25e32e0125 --- /dev/null +++ b/lib/Target/RISCV/RISCVCallingConv.td @@ -0,0 +1,29 @@ +//===-- RISCVCallingConv.td - Calling Conventions RISCV ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the RISCV architecture. +// +//===----------------------------------------------------------------------===// + +// RISCV 32-bit C return-value convention. +def RetCC_RISCV32 : CallingConv<[CCIfType<[i32], CCAssignToReg<[X10, X11]>>]>; + +// RISCV 32-bit C Calling convention. +def CC_RISCV32 : CallingConv<[ + // Promote i8/i16 args to i32 + CCIfType<[ i8, i16 ], CCPromoteToType>, + + // All arguments get passed in integer registers if there is space. + CCIfType<[i32], CCAssignToReg<[ X10, X11, X12, X13, X14, X15, X16, X17]>>, + + // Could be assigned to the stack in 8-byte aligned units, but unsupported + CCAssignToStack<8, 8> +]>; + +def CSR : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp new file mode 100644 index 0000000000000..fd3b258e26cc1 --- /dev/null +++ b/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -0,0 +1,29 @@ +//===-- RISCVFrameLowering.cpp - RISCV Frame Information ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the RISCV implementation of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#include "RISCVFrameLowering.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const { return true; } + +void RISCVFrameLowering::emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const {} + +void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const {} diff --git a/lib/Target/RISCV/RISCVFrameLowering.h b/lib/Target/RISCV/RISCVFrameLowering.h new file mode 100644 index 0000000000000..14772ddac4acd --- /dev/null +++ b/lib/Target/RISCV/RISCVFrameLowering.h @@ -0,0 +1,35 @@ +//===-- RISCVFrameLowering.h - Define frame lowering for RISCV -*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements RISCV-specific bits of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVFRAMELOWERING_H +#define LLVM_LIB_TARGET_RISCV_RISCVFRAMELOWERING_H + +#include "llvm/Target/TargetFrameLowering.h" + +namespace llvm { +class RISCVSubtarget; + +class RISCVFrameLowering : public TargetFrameLowering { +public: + explicit RISCVFrameLowering(const RISCVSubtarget &STI) + : TargetFrameLowering(StackGrowsDown, + /*StackAlignment=*/16, + /*LocalAreaOffset=*/0) {} + + void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + bool hasFP(const MachineFunction &MF) const override; +}; +} +#endif diff --git a/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp new file mode 100644 index 0000000000000..78f61fa41847d --- /dev/null +++ b/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -0,0 +1,63 @@ +//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the RISCV target. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "riscv-isel" + +// RISCV-specific code to select RISCV machine instructions for +// SelectionDAG operations. +namespace { +class RISCVDAGToDAGISel final : public SelectionDAGISel { +public: + explicit RISCVDAGToDAGISel(RISCVTargetMachine &TargetMachine) + : SelectionDAGISel(TargetMachine) {} + + StringRef getPassName() const override { + return "RISCV DAG->DAG Pattern Instruction Selection"; + } + + void Select(SDNode *Node) override; + +// Include the pieces autogenerated from the target description. +#include "RISCVGenDAGISel.inc" +}; +} + +void RISCVDAGToDAGISel::Select(SDNode *Node) { + // Dump information about the Node being selected. + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); + + // If we have a custom node, we have already selected + if (Node->isMachineOpcode()) { + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); + Node->setNodeId(-1); + return; + } + + // Select the default instruction. + SelectCode(Node); +} + +// This pass converts a legalized DAG into a RISCV-specific DAG, ready +// for instruction scheduling. +FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { + return new RISCVDAGToDAGISel(TM); +} diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp new file mode 100644 index 0000000000000..d76170b7b7876 --- /dev/null +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -0,0 +1,170 @@ +//===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that RISCV uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "RISCVISelLowering.h" +#include "RISCV.h" +#include "RISCVRegisterInfo.h" +#include "RISCVSubtarget.h" +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-lower" + +RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, + const RISCVSubtarget &STI) + : TargetLowering(TM), Subtarget(STI) { + + MVT XLenVT = Subtarget.getXLenVT(); + + // Set up the register classes. + addRegisterClass(XLenVT, &RISCV::GPRRegClass); + + // Compute derived properties from the register classes. + computeRegisterProperties(STI.getRegisterInfo()); + + setStackPointerRegisterToSaveRestore(RISCV::X2); + + // TODO: add all necessary setOperationAction calls. + + setBooleanContents(ZeroOrOneBooleanContent); + + // Function alignments (log2). + setMinFunctionAlignment(3); + setPrefFunctionAlignment(3); +} + +SDValue RISCVTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: + report_fatal_error("unimplemented operand"); + } +} + +// Calling Convention Implementation. +#include "RISCVGenCallingConv.inc" + +// Transform physical registers into virtual registers. +SDValue RISCVTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl &InVals) const { + + switch (CallConv) { + default: + report_fatal_error("Unsupported calling convention"); + case CallingConv::C: + break; + } + + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + MVT XLenVT = Subtarget.getXLenVT(); + + if (IsVarArg) + report_fatal_error("VarArg not supported"); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV32); + + for (auto &VA : ArgLocs) { + if (!VA.isRegLoc()) + report_fatal_error("Defined with too many args"); + + // Arguments passed in registers. + EVT RegVT = VA.getLocVT(); + if (RegVT != XLenVT) { + DEBUG(dbgs() << "LowerFormalArguments Unhandled argument type: " + << RegVT.getEVTString() << "\n"); + report_fatal_error("unhandled argument type"); + } + const unsigned VReg = + RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + SDValue ArgIn = DAG.getCopyFromReg(Chain, DL, VReg, RegVT); + + InVals.push_back(ArgIn); + } + return Chain; +} + +SDValue +RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SDLoc &DL, SelectionDAG &DAG) const { + if (IsVarArg) { + report_fatal_error("VarArg not supported"); + } + + // Stores the assignment of the return value to a location. + SmallVector RVLocs; + + // Info about the registers and stack slot. + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + CCInfo.AnalyzeReturn(Outs, RetCC_RISCV32); + + SDValue Flag; + SmallVector RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag); + + // Guarantee that all emitted copies are stuck together. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) { + RetOps.push_back(Flag); + } + + return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); +} + +const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch ((RISCVISD::NodeType)Opcode) { + case RISCVISD::FIRST_NUMBER: + break; + case RISCVISD::RET_FLAG: + return "RISCVISD::RET_FLAG"; + } + return nullptr; +} diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h new file mode 100644 index 0000000000000..9fed48fc04e5b --- /dev/null +++ b/lib/Target/RISCV/RISCVISelLowering.h @@ -0,0 +1,62 @@ +//===-- RISCVISelLowering.h - RISCV DAG Lowering Interface ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that RISCV uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H +#define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H + +#include "RISCV.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { +class RISCVSubtarget; +namespace RISCVISD { +enum NodeType : unsigned { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + RET_FLAG +}; +} + +class RISCVTargetLowering : public TargetLowering { + const RISCVSubtarget &Subtarget; + +public: + explicit RISCVTargetLowering(const TargetMachine &TM, + const RISCVSubtarget &STI); + + // Provide custom lowering hooks for some operations. + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + + // This method returns the name of a target specific DAG node. + const char *getTargetNodeName(unsigned Opcode) const override; + +private: + // Lower incoming arguments, copy physregs into vregs + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool IsVarArg, + const SmallVectorImpl &Ins, + const SDLoc &DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SDLoc &DL, + SelectionDAG &DAG) const override; + bool shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const override { + return true; + } +}; +} + +#endif diff --git a/lib/Target/RISCV/RISCVInstrFormats.td b/lib/Target/RISCV/RISCVInstrFormats.td index 383b73cf4e011..48f6cf8762df6 100644 --- a/lib/Target/RISCV/RISCVInstrFormats.td +++ b/lib/Target/RISCV/RISCVInstrFormats.td @@ -35,12 +35,40 @@ def InstFormatPseudo : InstFormat<0>; def InstFormatR : InstFormat<1>; def InstFormatI : InstFormat<2>; def InstFormatS : InstFormat<3>; -def InstFormatSB : InstFormat<4>; +def InstFormatB : InstFormat<4>; def InstFormatU : InstFormat<5>; -def InstFormatOther : InstFormat<6>; +def InstFormatJ : InstFormat<6>; +def InstFormatOther : InstFormat<7>; -class RISCVInst pattern, - InstFormat format> +// The following opcode names and match those given in Table 19.1 in the +// RISC-V User-level ISA specification ("RISC-V base opcode map"). +class RISCVOpcode val> { + bits<7> Value = val; +} +def OPC_LOAD : RISCVOpcode<0b0000011>; +def OPC_LOAD_FP : RISCVOpcode<0b0000111>; +def OPC_MISC_MEM : RISCVOpcode<0b0001111>; +def OPC_OP_IMM : RISCVOpcode<0b0010011>; +def OPC_AUIPC : RISCVOpcode<0b0010111>; +def OPC_OP_IMM_32 : RISCVOpcode<0b0011011>; +def OPC_STORE : RISCVOpcode<0b0100011>; +def OPC_STORE_FP : RISCVOpcode<0b0100111>; +def OPC_AMO : RISCVOpcode<0b0101111>; +def OPC_OP : RISCVOpcode<0b0110011>; +def OPC_LUI : RISCVOpcode<0b0110111>; +def OPC_OP_32 : RISCVOpcode<0b0111011>; +def OPC_MADD : RISCVOpcode<0b1000011>; +def OPC_MSUB : RISCVOpcode<0b1000111>; +def OPC_NMSUB : RISCVOpcode<0b1001011>; +def OPC_NMADD : RISCVOpcode<0b1001111>; +def OPC_OP_FP : RISCVOpcode<0b1010011>; +def OPC_BRANCH : RISCVOpcode<0b1100011>; +def OPC_JALR : RISCVOpcode<0b1100111>; +def OPC_JAL : RISCVOpcode<0b1101111>; +def OPC_SYSTEM : RISCVOpcode<0b1110011>; + +class RVInst pattern, InstFormat format> : Instruction { field bits<32> Inst; // SoftFail is a field the disassembler can use to provide a way for @@ -58,7 +86,7 @@ class RISCVInst pattern, dag OutOperandList = outs; dag InOperandList = ins; - let AsmString = asmstr; + let AsmString = opcodestr # "\t" # argstr; let Pattern = pattern; let TSFlags{3-0} = format.Value; @@ -66,14 +94,18 @@ class RISCVInst pattern, // Pseudo instructions class Pseudo pattern> - : RISCVInst { + : RVInst { let isPseudo = 1; let isCodeGenOnly = 1; } -class FR funct7, bits<3> funct3, bits<7> opcode, dag outs, dag ins, - string asmstr, list pattern> : RISCVInst -{ +// Instruction formats are listed in the order they appear in the RISC-V +// instruction set manual (R, I, S, B, U, J) with sub-formats (e.g. RVInstR4, +// RVInstRAtomic) sorted alphabetically. + +class RVInstR funct7, bits<3> funct3, RISCVOpcode opcode, dag outs, + dag ins, string opcodestr, string argstr> + : RVInst { bits<5> rs2; bits<5> rs1; bits<5> rd; @@ -83,12 +115,12 @@ class FR funct7, bits<3> funct3, bits<7> opcode, dag outs, dag ins, let Inst{19-15} = rs1; let Inst{14-12} = funct3; let Inst{11-7} = rd; - let Opcode = opcode; + let Opcode = opcode.Value; } -class FI funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list pattern> - : RISCVInst -{ +class RVInstI funct3, RISCVOpcode opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst { bits<12> imm12; bits<5> rs1; bits<5> rd; @@ -97,12 +129,12 @@ class FI funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list< let Inst{19-15} = rs1; let Inst{14-12} = funct3; let Inst{11-7} = rd; - let Opcode = opcode; + let Opcode = opcode.Value; } -class FI32Shift funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list pattern> - : RISCVInst -{ +class RVInstIShift funct3, RISCVOpcode opcode, + dag outs, dag ins, string opcodestr, string argstr> + : RVInst { bits<5> shamt; bits<5> rs1; bits<5> rd; @@ -114,12 +146,12 @@ class FI32Shift funct3, bits<7> opcode, dag outs, dag in let Inst{19-15} = rs1; let Inst{14-12} = funct3; let Inst{11-7} = rd; - let Opcode = opcode; + let Opcode = opcode.Value; } -class FS funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list pattern> - : RISCVInst -{ +class RVInstS funct3, RISCVOpcode opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst { bits<12> imm12; bits<5> rs2; bits<5> rs1; @@ -129,12 +161,12 @@ class FS funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list< let Inst{19-15} = rs1; let Inst{14-12} = funct3; let Inst{11-7} = imm12{4-0}; - let Opcode = opcode; + let Opcode = opcode.Value; } -class FSB funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list pattern> - : RISCVInst -{ +class RVInstB funct3, RISCVOpcode opcode, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst { bits<12> imm12; bits<5> rs2; bits<5> rs1; @@ -146,23 +178,23 @@ class FSB funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list let Inst{14-12} = funct3; let Inst{11-8} = imm12{3-0}; let Inst{7} = imm12{10}; - let Opcode = opcode; + let Opcode = opcode.Value; } -class FU opcode, dag outs, dag ins, string asmstr, list pattern> - : RISCVInst -{ +class RVInstU + : RVInst { bits<20> imm20; bits<5> rd; let Inst{31-12} = imm20; let Inst{11-7} = rd; - let Opcode = opcode; + let Opcode = opcode.Value; } -class FUJ opcode, dag outs, dag ins, string asmstr, list pattern> - : RISCVInst -{ +class RVInstJ + : RVInst { bits<20> imm20; bits<5> rd; @@ -171,5 +203,5 @@ class FUJ opcode, dag outs, dag ins, string asmstr, list pattern> let Inst{20} = imm20{10}; let Inst{19-12} = imm20{18-11}; let Inst{11-7} = rd; - let Opcode = opcode; + let Opcode = opcode.Value; } diff --git a/lib/Target/RISCV/RISCVInstrInfo.cpp b/lib/Target/RISCV/RISCVInstrInfo.cpp new file mode 100644 index 0000000000000..92db5358ce4d0 --- /dev/null +++ b/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -0,0 +1,31 @@ +//===-- RISCVInstrInfo.cpp - RISCV Instruction Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the RISCV implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "RISCVInstrInfo.h" +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "RISCVTargetMachine.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_CTOR_DTOR +#include "RISCVGenInstrInfo.inc" + +using namespace llvm; + +RISCVInstrInfo::RISCVInstrInfo() : RISCVGenInstrInfo() {} diff --git a/lib/Target/RISCV/RISCVInstrInfo.h b/lib/Target/RISCV/RISCVInstrInfo.h new file mode 100644 index 0000000000000..50404d5554dec --- /dev/null +++ b/lib/Target/RISCV/RISCVInstrInfo.h @@ -0,0 +1,32 @@ +//===-- RISCVInstrInfo.h - RISCV Instruction Information --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the RISCV implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H +#define LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H + +#include "RISCVRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "RISCVGenInstrInfo.inc" + +namespace llvm { + +class RISCVInstrInfo : public RISCVGenInstrInfo { + +public: + RISCVInstrInfo(); +}; +} + +#endif diff --git a/lib/Target/RISCV/RISCVInstrInfo.td b/lib/Target/RISCV/RISCVInstrInfo.td index 1a5f32ecabe13..23adf1eda9d09 100644 --- a/lib/Target/RISCV/RISCVInstrInfo.td +++ b/lib/Target/RISCV/RISCVInstrInfo.td @@ -13,6 +13,17 @@ include "RISCVInstrFormats.td" +//===----------------------------------------------------------------------===// +// RISC-V specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def RetFlag : SDNode<"RISCVISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +//===----------------------------------------------------------------------===// +// Operand and SDNode transformation definitions. +//===----------------------------------------------------------------------===// + class ImmAsmOperand : AsmOperandClass { let Name = prefix # "Imm" # width # suffix; let RenderMethod = "addImmOperands"; @@ -20,11 +31,11 @@ class ImmAsmOperand : AsmOperandClass { } class SImmAsmOperand - : ImmAsmOperand<"S", width, suffix> { + : ImmAsmOperand<"S", width, suffix> { } class UImmAsmOperand - : ImmAsmOperand<"U", width, suffix> { + : ImmAsmOperand<"U", width, suffix> { } def FenceArg : AsmOperandClass { @@ -33,107 +44,139 @@ def FenceArg : AsmOperandClass { let DiagnosticType = "InvalidFenceArg"; } -def fencearg : Operand { +def fencearg : Operand { let ParserMatchClass = FenceArg; let PrintMethod = "printFenceArg"; let DecoderMethod = "decodeUImmOperand<4>"; } -def uimm5 : Operand { +def uimm5 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5>; let DecoderMethod = "decodeUImmOperand<5>"; } -def simm12 : Operand { +def simm12 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<12>; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeSImmOperand<12>"; } -def uimm12 : Operand { +def uimm12 : Operand { let ParserMatchClass = UImmAsmOperand<12>; let DecoderMethod = "decodeUImmOperand<12>"; } // A 13-bit signed immediate where the least significant bit is zero. -def simm13_lsb0 : Operand { +def simm13_lsb0 : Operand { let ParserMatchClass = SImmAsmOperand<13, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<13>"; } -def uimm20 : Operand { +def uimm20 : Operand { let ParserMatchClass = UImmAsmOperand<20>; let EncoderMethod = "getImmOpValue"; let DecoderMethod = "decodeUImmOperand<20>"; } // A 21-bit signed immediate where the least significant bit is zero. -def simm21_lsb0 : Operand { +def simm21_lsb0 : Operand { let ParserMatchClass = SImmAsmOperand<21, "Lsb0">; let EncoderMethod = "getImmOpValueAsr1"; let DecoderMethod = "decodeSImmOperandAndLsl1<21>"; } -// As noted in RISCVRegisterInfo.td, the hope is that support for -// variable-sized register classes will mean that instruction definitions do -// not need to be duplicated for 32-bit and 64-bit register classes. For now -// we use 'GPR', which is 32-bit. When codegen for both RV32 and RV64 is -// added, we will need to duplicate instruction definitions unless a proposal -// like -// is adopted. +//===----------------------------------------------------------------------===// +// Instruction Class Templates +//===----------------------------------------------------------------------===// -def LUI : FU<0b0110111, (outs GPR:$rd), (ins uimm20:$imm20), - "lui\t$rd, $imm20", []>; +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class BranchCC_rri funct3, string opcodestr> + : RVInstB { + let isBranch = 1; + let isTerminator = 1; +} -def AUIPC : FU<0b0010111, (outs GPR:$rd), (ins uimm20:$imm20), - "auipc\t$rd, $imm20", []>; +let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in +class Load_ri funct3, string opcodestr> + : RVInstI; + +// Operands for stores are in the order srcreg, base, offset rather than +// reflecting the order these fields are specified in the instruction +// encoding. +let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in +class Store_rri funct3, string opcodestr> + : RVInstS; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class ALU_ri funct3, string opcodestr> + : RVInstI; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class Shift_ri funct3, string opcodestr> + : RVInstIShift; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class ALU_rr funct7, bits<3> funct3, string opcodestr> + : RVInstR; + +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class CSR_ir funct3, string opcodestr> : + RVInstI; + +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class CSR_ii funct3, string opcodestr> : + RVInstI; -def JAL : FUJ<0b1101111, (outs GPR:$rd), (ins simm21_lsb0:$imm20), - "jal\t$rd, $imm20", []>; +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// -def JALR : FI<0b000, 0b1100111, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12), - "jalr\t$rd, $rs1, $imm12", []>; +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +def LUI : RVInstU; -class Bcc funct3, string OpcodeStr> : - FSB { -} +def AUIPC : RVInstU; -def BEQ : Bcc<0b000, "beq">; -def BNE : Bcc<0b001, "bne">; -def BLT : Bcc<0b100, "blt">; -def BGE : Bcc<0b101, "bge">; -def BLTU : Bcc<0b110, "bltu">; -def BGEU : Bcc<0b111, "bgeu">; +let isCall = 1 in +def JAL : RVInstJ; -class LD_ri funct3, string OpcodeStr> : - FI { - let mayLoad = 1; -} +let isCall = 1 in +def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd), + (ins GPR:$rs1, simm12:$imm12), + "jalr", "$rd, $rs1, $imm12">; +} // hasSideEffects = 0, mayLoad = 0, mayStore = 0 -def LB : LD_ri<0b000, "lb">; -def LH : LD_ri<0b001, "lh">; -def LW : LD_ri<0b010, "lw">; -def LBU : LD_ri<0b100, "lbu">; -def LHU : LD_ri<0b101, "lhu">; +def BEQ : BranchCC_rri<0b000, "beq">; +def BNE : BranchCC_rri<0b001, "bne">; +def BLT : BranchCC_rri<0b100, "blt">; +def BGE : BranchCC_rri<0b101, "bge">; +def BLTU : BranchCC_rri<0b110, "bltu">; +def BGEU : BranchCC_rri<0b111, "bgeu">; -class ST_ri funct3, string OpcodeStr> : - FS { - let mayStore = 1; -} +def LB : Load_ri<0b000, "lb">; +def LH : Load_ri<0b001, "lh">; +def LW : Load_ri<0b010, "lw">; +def LBU : Load_ri<0b100, "lbu">; +def LHU : Load_ri<0b101, "lhu">; -def SB : ST_ri<0b000, "sb">; -def SH : ST_ri<0b001, "sh">; -def SW : ST_ri<0b010, "sw">; - -class ALU_ri funct3, string OpcodeStr> : - FI -{ -} +def SB : Store_rri<0b000, "sb">; +def SH : Store_rri<0b001, "sh">; +def SW : Store_rri<0b010, "sw">; def ADDI : ALU_ri<0b000, "addi">; def SLTI : ALU_ri<0b010, "slti">; @@ -142,21 +185,9 @@ def XORI : ALU_ri<0b100, "xori">; def ORI : ALU_ri<0b110, "ori">; def ANDI : ALU_ri<0b111, "andi">; -class SHIFT32_ri funct3, string OpcodeStr> : - FI32Shift -{ -} - -def SLLI : SHIFT32_ri<0, 0b001, "slli">; -def SRLI : SHIFT32_ri<0, 0b101, "srli">; -def SRAI : SHIFT32_ri<1, 0b101, "srai">; - -class ALU_rr funct7, bits<3> funct3, string OpcodeStr> : - FR -{ -} +def SLLI : Shift_ri<0, 0b001, "slli">; +def SRLI : Shift_ri<0, 0b101, "srli">; +def SRAI : Shift_ri<1, 0b101, "srai">; def ADD : ALU_rr<0b0000000, 0b000, "add">; def SUB : ALU_rr<0b0100000, 0b000, "sub">; @@ -169,8 +200,10 @@ def SRA : ALU_rr<0b0100000, 0b101, "sra">; def OR : ALU_rr<0b0000000, 0b110, "or">; def AND : ALU_rr<0b0000000, 0b111, "and">; -def FENCE : FI<0b000, 0b0001111, (outs), (ins fencearg:$pred, fencearg:$succ), - "fence\t$pred, $succ", []> { +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { +def FENCE : RVInstI<0b000, OPC_MISC_MEM, (outs), + (ins fencearg:$pred, fencearg:$succ), + "fence", "$pred, $succ"> { bits<4> pred; bits<4> succ; @@ -179,37 +212,78 @@ def FENCE : FI<0b000, 0b0001111, (outs), (ins fencearg:$pred, fencearg:$succ), let imm12 = {0b0000,pred,succ}; } -def FENCEI : FI<0b001, 0b0001111, (outs), (ins), "fence.i", []> { +def FENCE_I : RVInstI<0b001, OPC_MISC_MEM, (outs), (ins), "fence.i", ""> { let rs1 = 0; let rd = 0; let imm12 = 0; } -let rs1=0, rd=0 in { - def ECALL : FI<0b000, 0b1110011, (outs), (ins), "ecall", []> { - let imm12=0; - } - def EBREAK : FI<0b000, 0b1110011, (outs), (ins), "ebreak", []> { - let imm12=1; - } +def ECALL : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ecall", ""> { + let rs1 = 0; + let rd = 0; + let imm12 = 0; } -class CSR_rr funct3, string OpcodeStr> : - FI -{ +def EBREAK : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "ebreak", ""> { + let rs1 = 0; + let rd = 0; + let imm12 = 1; } +} // hasSideEffects = 1, mayLoad = 0, mayStore = 0 -def CSRRW : CSR_rr<0b001, "csrrw">; -def CSRRS : CSR_rr<0b010, "csrrs">; -def CSRRC : CSR_rr<0b011, "csrrc">; +def CSRRW : CSR_ir<0b001, "csrrw">; +def CSRRS : CSR_ir<0b010, "csrrs">; +def CSRRC : CSR_ir<0b011, "csrrc">; -class CSR_ri funct3, string OpcodeStr> : - FI -{ -} +def CSRRWI : CSR_ii<0b101, "csrrwi">; +def CSRRSI : CSR_ii<0b110, "csrrsi">; +def CSRRCI : CSR_ii<0b111, "csrrci">; + +//===----------------------------------------------------------------------===// +// Pseudo-instructions and codegen patterns +// +// Naming convention: For 'generic' pattern classes, we use the naming +// convention PatTy1Ty2. For pattern classes which offer a more complex +// expension, prefix the class name, e.g. BccPat. +//===----------------------------------------------------------------------===// -def CSRRWI : CSR_ri<0b101, "csrrwi">; -def CSRRSI : CSR_ri<0b110, "csrrsi">; -def CSRRCI : CSR_ri<0b111, "csrrci">; +/// Generic pattern classes + +class PatGprGpr + : Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>; +class PatGprSimm12 + : Pat<(OpNode GPR:$rs1, simm12:$imm12), (Inst GPR:$rs1, simm12:$imm12)>; +class PatGprUimm5 + : Pat<(OpNode GPR:$rs1, uimm5:$shamt), + (Inst GPR:$rs1, uimm5:$shamt)>; + +/// Simple arithmetic operations + +def : PatGprGpr; +def : PatGprSimm12; +def : PatGprGpr; +def : PatGprGpr; +def : PatGprSimm12; +def : PatGprGpr; +def : PatGprSimm12; +def : PatGprGpr; +def : PatGprSimm12; +def : PatGprGpr; +def : PatGprUimm5; +def : PatGprGpr; +def : PatGprUimm5; +def : PatGprGpr; +def : PatGprUimm5; + +/// Setcc + +def : PatGprGpr; +def : PatGprSimm12; +def : PatGprGpr; +def : PatGprSimm12; + +/// Branches and jumps + +let isBarrier = 1, isReturn = 1, isTerminator = 1 in +def PseudoRET : Pseudo<(outs), (ins), [(RetFlag)]>, + PseudoInstExpansion<(JALR X0, X1, 0)>; diff --git a/lib/Target/RISCV/RISCVMCInstLower.cpp b/lib/Target/RISCV/RISCVMCInstLower.cpp new file mode 100644 index 0000000000000..1ac8d982ff983 --- /dev/null +++ b/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -0,0 +1,50 @@ +//===-- RISCVMCInstLower.cpp - Convert RISCV MachineInstr to an MCInst ------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower RISCV MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void llvm::LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, + MCInst &OutMI) { + OutMI.setOpcode(MI->getOpcode()); + + for (const MachineOperand &MO : MI->operands()) { + MCOperand MCOp; + switch (MO.getType()) { + default: + report_fatal_error( + "LowerRISCVMachineInstrToMCInst: unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) + continue; + MCOp = MCOperand::createReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + } + + OutMI.addOperand(MCOp); + } +} diff --git a/lib/Target/RISCV/RISCVRegisterInfo.cpp b/lib/Target/RISCV/RISCVRegisterInfo.cpp new file mode 100644 index 0000000000000..4f6c528061cb4 --- /dev/null +++ b/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -0,0 +1,61 @@ +//===-- RISCVRegisterInfo.cpp - RISCV Register Information ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the RISCV implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "RISCVRegisterInfo.h" +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_REGINFO_TARGET_DESC +#include "RISCVGenRegisterInfo.inc" + +using namespace llvm; + +RISCVRegisterInfo::RISCVRegisterInfo(unsigned HwMode) + : RISCVGenRegisterInfo(RISCV::X1, /*DwarfFlavour*/0, /*EHFlavor*/0, + /*PC*/0, HwMode) {} + +const MCPhysReg * +RISCVRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + return CSR_SaveList; +} + +BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + + // Use markSuperRegs to ensure any register aliases are also reserved + markSuperRegs(Reserved, RISCV::X0); // zero + markSuperRegs(Reserved, RISCV::X1); // ra + markSuperRegs(Reserved, RISCV::X2); // sp + markSuperRegs(Reserved, RISCV::X3); // gp + markSuperRegs(Reserved, RISCV::X4); // tp + markSuperRegs(Reserved, RISCV::X8); // fp + assert(checkAllSuperRegsMarked(Reserved)); + return Reserved; +} + +void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + report_fatal_error("Subroutines not supported yet"); +} + +unsigned RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + return RISCV::X8; +} diff --git a/lib/Target/RISCV/RISCVRegisterInfo.h b/lib/Target/RISCV/RISCVRegisterInfo.h new file mode 100644 index 0000000000000..94af9f44ecde1 --- /dev/null +++ b/lib/Target/RISCV/RISCVRegisterInfo.h @@ -0,0 +1,40 @@ +//===-- RISCVRegisterInfo.h - RISCV Register Information Impl ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the RISCV implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVREGISTERINFO_H +#define LLVM_LIB_TARGET_RISCV_RISCVREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "RISCVGenRegisterInfo.inc" + +namespace llvm { + +struct RISCVRegisterInfo : public RISCVGenRegisterInfo { + + RISCVRegisterInfo(unsigned HwMode); + + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; + + BitVector getReservedRegs(const MachineFunction &MF) const override; + + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + unsigned getFrameRegister(const MachineFunction &MF) const override; +}; +} + +#endif diff --git a/lib/Target/RISCV/RISCVRegisterInfo.td b/lib/Target/RISCV/RISCVRegisterInfo.td index f04de217bf0d6..78c036a37b904 100644 --- a/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/lib/Target/RISCV/RISCVRegisterInfo.td @@ -8,83 +8,67 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Declarations that describe the RISC-V register file +// Declarations that describe the RISC-V register files //===----------------------------------------------------------------------===// let Namespace = "RISCV" in { - def sub_32 : SubRegIndex<32>; - - class RISCVReg32 Enc, string n, list alt = []> : Register { - let HWEncoding{4-0} = Enc; - let AltNames = alt; - } - - // RISCV64 registers don't define an AsmName or AltName. If they specified - // names aliasing the RISCVReg32 registers, the generation of the default - // MatchRegisterName/MatchRegisterAltName would fail. When necessary, - // RISCVAsmParser will need to convert a register number from a RISCVReg32 - // to the equivalent RISCVReg64. - class RISCVReg64 : Register<""> { - let HWEncoding{4-0} = subreg.HWEncoding{4-0}; - let SubRegs = [subreg]; - let SubRegIndices = [sub_32]; - } - - def ABIRegAltName : RegAltNameIndex; +class RISCVReg Enc, string n, list alt = []> : Register { + let HWEncoding{4-0} = Enc; + let AltNames = alt; } +def ABIRegAltName : RegAltNameIndex; +} // Namespace = "RISCV" // Integer registers let RegAltNameIndices = [ABIRegAltName] in { - def X0_32 : RISCVReg32<0, "x0", ["zero"]>, DwarfRegNum<[0]>; - def X1_32 : RISCVReg32<1, "x1", ["ra"]>, DwarfRegNum<[1]>; - def X2_32 : RISCVReg32<2, "x2", ["sp"]>, DwarfRegNum<[2]>; - def X3_32 : RISCVReg32<3, "x3", ["gp"]>, DwarfRegNum<[3]>; - def X4_32 : RISCVReg32<4, "x4", ["tp"]>, DwarfRegNum<[4]>; - def X5_32 : RISCVReg32<5, "x5", ["t0"]>, DwarfRegNum<[5]>; - def X6_32 : RISCVReg32<6, "x6", ["t1"]>, DwarfRegNum<[6]>; - def X7_32 : RISCVReg32<7, "x7", ["t2"]>, DwarfRegNum<[7]>; - def X8_32 : RISCVReg32<8, "x8", ["s0"]>, DwarfRegNum<[8]>; - def X9_32 : RISCVReg32<9, "x9", ["s1"]>, DwarfRegNum<[9]>; - def X10_32 : RISCVReg32<10,"x10", ["a0"]>, DwarfRegNum<[10]>; - def X11_32 : RISCVReg32<11,"x11", ["a1"]>, DwarfRegNum<[11]>; - def X12_32 : RISCVReg32<12,"x12", ["a2"]>, DwarfRegNum<[12]>; - def X13_32 : RISCVReg32<13,"x13", ["a3"]>, DwarfRegNum<[13]>; - def X14_32 : RISCVReg32<14,"x14", ["a4"]>, DwarfRegNum<[14]>; - def X15_32 : RISCVReg32<15,"x15", ["a5"]>, DwarfRegNum<[15]>; - def X16_32 : RISCVReg32<16,"x16", ["a6"]>, DwarfRegNum<[16]>; - def X17_32 : RISCVReg32<17,"x17", ["a7"]>, DwarfRegNum<[17]>; - def X18_32 : RISCVReg32<18,"x18", ["s2"]>, DwarfRegNum<[18]>; - def X19_32 : RISCVReg32<19,"x19", ["s3"]>, DwarfRegNum<[19]>; - def X20_32 : RISCVReg32<20,"x20", ["s4"]>, DwarfRegNum<[20]>; - def X21_32 : RISCVReg32<21,"x21", ["s5"]>, DwarfRegNum<[21]>; - def X22_32 : RISCVReg32<22,"x22", ["s6"]>, DwarfRegNum<[22]>; - def X23_32 : RISCVReg32<23,"x23", ["s7"]>, DwarfRegNum<[23]>; - def X24_32 : RISCVReg32<24,"x24", ["s8"]>, DwarfRegNum<[24]>; - def X25_32 : RISCVReg32<25,"x25", ["s9"]>, DwarfRegNum<[25]>; - def X26_32 : RISCVReg32<26,"x26", ["s10"]>, DwarfRegNum<[26]>; - def X27_32 : RISCVReg32<27,"x27", ["s11"]>, DwarfRegNum<[27]>; - def X28_32 : RISCVReg32<28,"x28", ["t3"]>, DwarfRegNum<[28]>; - def X29_32 : RISCVReg32<29,"x29", ["t4"]>, DwarfRegNum<[29]>; - def X30_32 : RISCVReg32<30,"x30", ["t5"]>, DwarfRegNum<[30]>; - def X31_32 : RISCVReg32<31,"x31", ["t6"]>, DwarfRegNum<[31]>; + def X0 : RISCVReg<0, "x0", ["zero"]>, DwarfRegNum<[0]>; + def X1 : RISCVReg<1, "x1", ["ra"]>, DwarfRegNum<[1]>; + def X2 : RISCVReg<2, "x2", ["sp"]>, DwarfRegNum<[2]>; + def X3 : RISCVReg<3, "x3", ["gp"]>, DwarfRegNum<[3]>; + def X4 : RISCVReg<4, "x4", ["tp"]>, DwarfRegNum<[4]>; + def X5 : RISCVReg<5, "x5", ["t0"]>, DwarfRegNum<[5]>; + def X6 : RISCVReg<6, "x6", ["t1"]>, DwarfRegNum<[6]>; + def X7 : RISCVReg<7, "x7", ["t2"]>, DwarfRegNum<[7]>; + def X8 : RISCVReg<8, "x8", ["s0"]>, DwarfRegNum<[8]>; + def X9 : RISCVReg<9, "x9", ["s1"]>, DwarfRegNum<[9]>; + def X10 : RISCVReg<10,"x10", ["a0"]>, DwarfRegNum<[10]>; + def X11 : RISCVReg<11,"x11", ["a1"]>, DwarfRegNum<[11]>; + def X12 : RISCVReg<12,"x12", ["a2"]>, DwarfRegNum<[12]>; + def X13 : RISCVReg<13,"x13", ["a3"]>, DwarfRegNum<[13]>; + def X14 : RISCVReg<14,"x14", ["a4"]>, DwarfRegNum<[14]>; + def X15 : RISCVReg<15,"x15", ["a5"]>, DwarfRegNum<[15]>; + def X16 : RISCVReg<16,"x16", ["a6"]>, DwarfRegNum<[16]>; + def X17 : RISCVReg<17,"x17", ["a7"]>, DwarfRegNum<[17]>; + def X18 : RISCVReg<18,"x18", ["s2"]>, DwarfRegNum<[18]>; + def X19 : RISCVReg<19,"x19", ["s3"]>, DwarfRegNum<[19]>; + def X20 : RISCVReg<20,"x20", ["s4"]>, DwarfRegNum<[20]>; + def X21 : RISCVReg<21,"x21", ["s5"]>, DwarfRegNum<[21]>; + def X22 : RISCVReg<22,"x22", ["s6"]>, DwarfRegNum<[22]>; + def X23 : RISCVReg<23,"x23", ["s7"]>, DwarfRegNum<[23]>; + def X24 : RISCVReg<24,"x24", ["s8"]>, DwarfRegNum<[24]>; + def X25 : RISCVReg<25,"x25", ["s9"]>, DwarfRegNum<[25]>; + def X26 : RISCVReg<26,"x26", ["s10"]>, DwarfRegNum<[26]>; + def X27 : RISCVReg<27,"x27", ["s11"]>, DwarfRegNum<[27]>; + def X28 : RISCVReg<28,"x28", ["t3"]>, DwarfRegNum<[28]>; + def X29 : RISCVReg<29,"x29", ["t4"]>, DwarfRegNum<[29]>; + def X30 : RISCVReg<30,"x30", ["t5"]>, DwarfRegNum<[30]>; + def X31 : RISCVReg<31,"x31", ["t6"]>, DwarfRegNum<[31]>; } -foreach Index = 0-31 in { - def X#Index#_64 : RISCVReg64("X"#Index#"_32")>, DwarfRegNum<[Index]>; -} +def XLenVT : ValueTypeByHwMode<[RV32, RV64, DefaultMode], + [i32, i64, i32]>; -// We currently define separate register classes for the 32-bit and 64-bit -// GPRs. Once variable-sized register classes -// or -// similar are implemented, we can just use one 'GPR' class for most -// instruction definitions. - -// TODO: once codegen is implemented, registers should be listed in an order -// reflecting the preferred register allocation sequence. -def GPR : RegisterClass<"RISCV", [i32], 32, (add - (sequence "X%u_32", 0, 31) -)>; - -def GPR64 : RegisterClass<"RISCV", [i64], 64, (add - (sequence "X%u_64", 0, 31) -)>; +// The order of registers represents the preferred allocation sequence. +// Registers are listed in the order caller-save, callee-save, specials. +def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add + (sequence "X%u", 10, 17), + (sequence "X%u", 5, 7), + (sequence "X%u", 28, 31), + (sequence "X%u", 8, 9), + (sequence "X%u", 18, 27), + (sequence "X%u", 0, 4) + )> { + let RegInfos = RegInfoByHwMode< + [RV32, RV64, DefaultMode], + [RegInfo<32,32,32>, RegInfo<64,64,64>, RegInfo<32,32,32>]>; +} diff --git a/lib/Target/RISCV/RISCVSubtarget.cpp b/lib/Target/RISCV/RISCVSubtarget.cpp new file mode 100644 index 0000000000000..b221ea84a33c2 --- /dev/null +++ b/lib/Target/RISCV/RISCVSubtarget.cpp @@ -0,0 +1,48 @@ +//===-- RISCVSubtarget.cpp - RISCV Subtarget Information ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RISCV specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "RISCVSubtarget.h" +#include "RISCV.h" +#include "RISCVFrameLowering.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-subtarget" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "RISCVGenSubtargetInfo.inc" + +void RISCVSubtarget::anchor() {} + +RISCVSubtarget &RISCVSubtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS, + bool Is64Bit) { + // Determine default and user-specified characteristics + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = Is64Bit ? "generic-rv64" : "generic-rv32"; + ParseSubtargetFeatures(CPUName, FS); + if (Is64Bit) { + XLenVT = MVT::i64; + XLen = 64; + } + return *this; +} + +RISCVSubtarget::RISCVSubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM) + : RISCVGenSubtargetInfo(TT, CPU, FS), + FrameLowering(initializeSubtargetDependencies(CPU, FS, TT.isArch64Bit())), + InstrInfo(), RegInfo(getHwMode()), TLInfo(TM, *this) {} diff --git a/lib/Target/RISCV/RISCVSubtarget.h b/lib/Target/RISCV/RISCVSubtarget.h new file mode 100644 index 0000000000000..657b0e656202e --- /dev/null +++ b/lib/Target/RISCV/RISCVSubtarget.h @@ -0,0 +1,75 @@ +//===-- RISCVSubtarget.h - Define Subtarget for the RISCV -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the RISCV specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVSUBTARGET_H +#define LLVM_LIB_TARGET_RISCV_RISCVSUBTARGET_H + +#include "RISCVFrameLowering.h" +#include "RISCVISelLowering.h" +#include "RISCVInstrInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "RISCVGenSubtargetInfo.inc" + +namespace llvm { +class StringRef; + +class RISCVSubtarget : public RISCVGenSubtargetInfo { + virtual void anchor(); + bool HasRV64 = false; + unsigned XLen = 32; + MVT XLenVT = MVT::i32; + RISCVFrameLowering FrameLowering; + RISCVInstrInfo InstrInfo; + RISCVRegisterInfo RegInfo; + RISCVTargetLowering TLInfo; + SelectionDAGTargetInfo TSInfo; + + /// Initializes using the passed in CPU and feature strings so that we can + /// use initializer lists for subtarget initialization. + RISCVSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS, + bool Is64Bit); + +public: + // Initializes the data members to match that of the specified triple. + RISCVSubtarget(const Triple &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM); + + // Parses features string setting specified subtarget options. The + // definition of this function is auto-generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + const RISCVFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + const RISCVInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const RISCVRegisterInfo *getRegisterInfo() const override { + return &RegInfo; + } + const RISCVTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + bool is64Bit() const { return HasRV64; } + MVT getXLenVT() const { return XLenVT; } + unsigned getXLen() const { return XLen; } +}; +} // End llvm namespace + +#endif diff --git a/lib/Target/RISCV/RISCVTargetMachine.cpp b/lib/Target/RISCV/RISCVTargetMachine.cpp index 78d9cf53b5d6e..34da6de504df0 100644 --- a/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "RISCV.h" #include "RISCVTargetMachine.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Passes.h" @@ -58,10 +59,31 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveCodeModel(CM), OL), - TLOF(make_unique()) { + TLOF(make_unique()), + Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } +namespace { +class RISCVPassConfig : public TargetPassConfig { +public: + RISCVPassConfig(RISCVTargetMachine &TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + RISCVTargetMachine &getRISCVTargetMachine() const { + return getTM(); + } + + bool addInstSelector() override; +}; +} + TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) { - return new TargetPassConfig(*this, PM); + return new RISCVPassConfig(*this, PM); +} + +bool RISCVPassConfig::addInstSelector() { + addPass(createRISCVISelDag(getRISCVTargetMachine())); + + return false; } diff --git a/lib/Target/RISCV/RISCVTargetMachine.h b/lib/Target/RISCV/RISCVTargetMachine.h index 5c2ec956ee29b..02361dddebf7b 100644 --- a/lib/Target/RISCV/RISCVTargetMachine.h +++ b/lib/Target/RISCV/RISCVTargetMachine.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_RISCV_RISCVTARGETMACHINE_H #include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "RISCVSubtarget.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -22,6 +23,7 @@ namespace llvm { class RISCVTargetMachine : public LLVMTargetMachine { std::unique_ptr TLOF; + RISCVSubtarget Subtarget; public: RISCVTargetMachine(const Target &T, const Triple &TT, StringRef CPU, @@ -29,6 +31,10 @@ class RISCVTargetMachine : public LLVMTargetMachine { Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT); + const RISCVSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; TargetLoweringObjectFile *getObjFileLowering() const override { diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 087c037614a9d..05f78a48badfc 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -108,7 +108,7 @@ class SparcAsmParser : public MCTargetAsmParser { SparcAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, sti), Parser(parser) { + : MCTargetAsmParser(Options, sti, MII), Parser(parser) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 0a72a4438218d..a38545ecf4303 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -291,7 +291,8 @@ namespace { } } - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType); return createSparcELFObjectWriter(OS, Is64Bit, IsLittleEndian, OSABI); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp index d35e45e034665..a204036a0975d 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" @@ -131,10 +132,9 @@ bool SparcELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym, } } -MCObjectWriter *llvm::createSparcELFObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit, - bool IsLittleEndian, - uint8_t OSABI) { - MCELFObjectTargetWriter *MOTW = new SparcELFObjectWriter(Is64Bit, OSABI); - return createELFObjectWriter(MOTW, OS, IsLittleEndian); +std::unique_ptr +llvm::createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, + bool IsLittleEndian, uint8_t OSABI) { + auto MOTW = llvm::make_unique(Is64Bit, OSABI); + return createELFObjectWriter(std::move(MOTW), OS, IsLittleEndian); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h index 4e754c132d11a..563e6f4efbe6e 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h @@ -16,6 +16,8 @@ #include "llvm/Support/DataTypes.h" +#include + namespace llvm { class MCAsmBackend; class MCCodeEmitter; @@ -41,8 +43,9 @@ MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII, MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU, const MCTargetOptions &Options); -MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, - bool IsLIttleEndian, uint8_t OSABI); +std::unique_ptr +createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, + bool IsLIttleEndian, uint8_t OSABI); } // End llvm namespace // Defines symbolic names for Sparc registers. This defines a mapping from diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td index ee37b7227db28..9e0a297c88123 100644 --- a/lib/Target/Sparc/Sparc.td +++ b/lib/Target/Sparc/Sparc.td @@ -98,9 +98,18 @@ def : Proc<"tsc701", []>; def : Proc<"myriad2", [FeatureLeon, LeonCASA]>; def : Proc<"myriad2.1", [FeatureLeon, LeonCASA]>; def : Proc<"myriad2.2", [FeatureLeon, LeonCASA]>; +def : Proc<"myriad2.3", [FeatureLeon, LeonCASA]>; def : Proc<"ma2100", [FeatureLeon, LeonCASA]>; def : Proc<"ma2150", [FeatureLeon, LeonCASA]>; +def : Proc<"ma2155", [FeatureLeon, LeonCASA]>; def : Proc<"ma2450", [FeatureLeon, LeonCASA]>; +def : Proc<"ma2455", [FeatureLeon, LeonCASA]>; +def : Proc<"ma2x5x", [FeatureLeon, LeonCASA]>; +def : Proc<"ma2080", [FeatureLeon, LeonCASA]>; +def : Proc<"ma2085", [FeatureLeon, LeonCASA]>; +def : Proc<"ma2480", [FeatureLeon, LeonCASA]>; +def : Proc<"ma2485", [FeatureLeon, LeonCASA]>; +def : Proc<"ma2x8x", [FeatureLeon, LeonCASA]>; def : Proc<"v9", [FeatureV9]>; def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>; def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS, diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 33680789ee082..bde067d6c1294 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -425,7 +425,7 @@ class SystemZAsmParser : public MCTargetAsmParser { SystemZAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, sti), Parser(parser) { + : MCTargetAsmParser(Options, sti, MII), Parser(parser) { MCAsmParserExtension::Initialize(Parser); // Alias the .word directive to .short. @@ -543,6 +543,7 @@ class SystemZAsmParser : public MCTargetAsmParser { #define GET_REGISTER_MATCHER #define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER #include "SystemZGenAsmMatcher.inc" // Used for the .insn directives; contains information needed to parse the @@ -1168,7 +1169,8 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands, return false; } -std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS); +static std::string SystemZMnemonicSpellCheck(StringRef S, uint64_t FBS, + unsigned VariantID = 0); bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 51ac410a9c819..e035c3b87a408 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -66,7 +66,8 @@ class SystemZMCAsmBackend : public MCAsmBackend { llvm_unreachable("SystemZ does do not have assembler relaxation"); } bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createSystemZObjectWriter(OS, OSABI); } }; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index df0a8161e6e7c..238926d6c8e01 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include @@ -160,8 +161,8 @@ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx, } } -MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI) { - MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false); +std::unique_ptr +llvm::createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) { + return createELFObjectWriter(llvm::make_unique(OSABI), + OS, /*IsLittleEndian=*/false); } diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h index dbca3485290aa..99b157e37275d 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h @@ -12,6 +12,8 @@ #include "llvm/Support/DataTypes.h" +#include + namespace llvm { class MCAsmBackend; @@ -91,7 +93,8 @@ MCAsmBackend *createSystemZMCAsmBackend(const Target &T, const Triple &TT, StringRef CPU, const MCTargetOptions &Options); -MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI); +std::unique_ptr createSystemZObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI); } // end namespace llvm // Defines symbolic names for SystemZ registers. diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 9e24a3b954834..d49d7316e682b 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -221,13 +221,17 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); // Even though i128 is not a legal type, we still need to custom lower // the atomic operations in order to exploit SystemZ instructions. setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); + + // We can use the CC result of compare-and-swap to implement + // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); @@ -3483,25 +3487,38 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); } -// Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two -// into a fullword ATOMIC_CMP_SWAPW operation. +// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); - - // We have native support for 32-bit compare and swap. - EVT NarrowVT = Node->getMemoryVT(); - EVT WideVT = MVT::i32; - if (NarrowVT == WideVT) - return Op; - - int64_t BitSize = NarrowVT.getSizeInBits(); SDValue ChainIn = Node->getOperand(0); SDValue Addr = Node->getOperand(1); SDValue CmpVal = Node->getOperand(2); SDValue SwapVal = Node->getOperand(3); MachineMemOperand *MMO = Node->getMemOperand(); SDLoc DL(Node); + + // We have native support for 32-bit and 64-bit compare and swap, but we + // still need to expand extracting the "success" result from the CC. + EVT NarrowVT = Node->getMemoryVT(); + EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; + if (NarrowVT == WideVT) { + SDVTList Tys = DAG.getVTList(WideVT, MVT::Other, MVT::Glue); + SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; + SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, + DL, Tys, Ops, NarrowVT, MMO); + SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2), + SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); + + DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1)); + return SDValue(); + } + + // Convert 8-bit and 16-bit compare and swap to a loop, implemented + // via a fullword ATOMIC_CMP_SWAPW operation. + int64_t BitSize = NarrowVT.getSizeInBits(); EVT PtrVT = Addr.getValueType(); // Get the address of the containing word. @@ -3520,12 +3537,18 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, DAG.getConstant(0, DL, WideVT), BitShift); // Construct the ATOMIC_CMP_SWAPW node. - SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); + SDVTList VTList = DAG.getVTList(WideVT, MVT::Other, MVT::Glue); SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, VTList, Ops, NarrowVT, MMO); - return AtomicOp; + SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(2), + SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); + + DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(1)); + return SDValue(); } SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, @@ -4753,7 +4776,7 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); case ISD::ATOMIC_LOAD_UMAX: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); - case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return lowerATOMIC_CMP_SWAP(Op, DAG); case ISD::STACKSAVE: return lowerSTACKSAVE(Op, DAG); @@ -4847,16 +4870,20 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N, Results.push_back(Res); break; } - case ISD::ATOMIC_CMP_SWAP: { + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { SDLoc DL(N); - SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other); + SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other, MVT::Glue); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), lowerI128ToGR128(DAG, N->getOperand(2)), lowerI128ToGR128(DAG, N->getOperand(3)) }; MachineMemOperand *MMO = cast(N)->getMemOperand(); SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, DL, Tys, Ops, MVT::i128, MMO); + SDValue Success = emitSETCC(DAG, DL, Res.getValue(2), + SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); + Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); Results.push_back(lowerGR128ToI128(DAG, Res)); + Results.push_back(Success); Results.push_back(Res.getValue(1)); break; } @@ -4972,6 +4999,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_LOADW_UMIN); OPCODE(ATOMIC_LOADW_UMAX); OPCODE(ATOMIC_CMP_SWAPW); + OPCODE(ATOMIC_CMP_SWAP); OPCODE(ATOMIC_LOAD_128); OPCODE(ATOMIC_STORE_128); OPCODE(ATOMIC_CMP_SWAP_128); @@ -5999,6 +6027,12 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); + // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in + // to the block after the loop. At this point, CC may have been defined + // either by the CR in LoopMBB or by the CS in SetMBB. + if (!MI.registerDefIsDead(SystemZ::CC)) + DoneMBB->addLiveIn(SystemZ::CC); + MI.eraseFromParent(); return DoneMBB; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 92e03c3b8b0cd..e2e27d9598d1a 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -308,6 +308,10 @@ enum NodeType : unsigned { // Operand 5: the width of the field in bits (8 or 16) ATOMIC_CMP_SWAPW, + // Atomic compare-and-swap returning glue (condition code). + // Val, OUTCHAIN, glue = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) + ATOMIC_CMP_SWAP, + // 128-bit atomic load. // Val, OUTCHAIN = ATOMIC_LOAD_128(INCHAIN, ptr) ATOMIC_LOAD_128, @@ -317,7 +321,7 @@ enum NodeType : unsigned { ATOMIC_STORE_128, // 128-bit atomic compare-and-swap. - // Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) + // Val, OUTCHAIN, glue = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) ATOMIC_CMP_SWAP_128, // Byte swapping load. diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 766d07e8d89af..55a796cddf437 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1717,8 +1717,8 @@ let mayLoad = 1, Defs = [CC] in // Compare and swap. let Defs = [CC] in { - defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>; - def CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>; + defm CS : CmpSwapRSPair<"cs", 0xBA, 0xEB14, z_atomic_cmp_swap, GR32>; + def CSG : CmpSwapRSY<"csg", 0xEB30, z_atomic_cmp_swap, GR64>; } // Compare double and swap. diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 570218254f8b5..d067f331f677e 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -55,6 +55,11 @@ def SDT_ZAtomicCmpSwapW : SDTypeProfile<1, 6, SDTCisVT<4, i32>, SDTCisVT<5, i32>, SDTCisVT<6, i32>]>; +def SDT_ZAtomicCmpSwap : SDTypeProfile<1, 3, + [SDTCisInt<0>, + SDTCisPtrTy<1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; def SDT_ZAtomicLoad128 : SDTypeProfile<1, 1, [SDTCisVT<0, untyped>, SDTCisPtrTy<1>]>; @@ -296,7 +301,15 @@ def z_atomic_loadw_min : AtomicWOp<"ATOMIC_LOADW_MIN">; def z_atomic_loadw_max : AtomicWOp<"ATOMIC_LOADW_MAX">; def z_atomic_loadw_umin : AtomicWOp<"ATOMIC_LOADW_UMIN">; def z_atomic_loadw_umax : AtomicWOp<"ATOMIC_LOADW_UMAX">; -def z_atomic_cmp_swapw : AtomicWOp<"ATOMIC_CMP_SWAPW", SDT_ZAtomicCmpSwapW>; + +def z_atomic_cmp_swap : SDNode<"SystemZISD::ATOMIC_CMP_SWAP", + SDT_ZAtomicCmpSwap, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPOutGlue, SDNPMemOperand]>; +def z_atomic_cmp_swapw : SDNode<"SystemZISD::ATOMIC_CMP_SWAPW", + SDT_ZAtomicCmpSwapW, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, + SDNPOutGlue, SDNPMemOperand]>; def z_atomic_load_128 : SDNode<"SystemZISD::ATOMIC_LOAD_128", SDT_ZAtomicLoad128, @@ -307,7 +320,7 @@ def z_atomic_store_128 : SDNode<"SystemZISD::ATOMIC_STORE_128", def z_atomic_cmp_swap_128 : SDNode<"SystemZISD::ATOMIC_CMP_SWAP_128", SDT_ZAtomicCmpSwap128, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, - SDNPMemOperand]>; + SDNPOutGlue, SDNPMemOperand]>; def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index d14a0fb0b0b2b..05f93ce516210 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -10,6 +10,7 @@ #include "SystemZRegisterInfo.h" #include "SystemZInstrInfo.h" #include "SystemZSubtarget.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" @@ -152,6 +153,72 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MI->getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } +bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { + assert (MI->isCopy() && "Only expecting COPY instructions"); + + // Coalesce anything which is not a COPY involving a subreg to/from GR128. + if (!(NewRC->hasSuperClassEq(&SystemZ::GR128BitRegClass) && + (getRegSizeInBits(*SrcRC) <= 64 || getRegSizeInBits(*DstRC) <= 64))) + return true; + + // Allow coalescing of a GR128 subreg COPY only if the live ranges are small + // and local to one MBB with not too much interferring registers. Otherwise + // regalloc may run out of registers. + + unsigned WideOpNo = (getRegSizeInBits(*SrcRC) == 128 ? 1 : 0); + unsigned GR128Reg = MI->getOperand(WideOpNo).getReg(); + unsigned GRNarReg = MI->getOperand((WideOpNo == 1) ? 0 : 1).getReg(); + LiveInterval &IntGR128 = LIS.getInterval(GR128Reg); + LiveInterval &IntGRNar = LIS.getInterval(GRNarReg); + + // Check that the two virtual registers are local to MBB. + MachineBasicBlock *MBB = MI->getParent(); + if (LIS.isLiveInToMBB(IntGR128, MBB) || LIS.isLiveOutOfMBB(IntGR128, MBB) || + LIS.isLiveInToMBB(IntGRNar, MBB) || LIS.isLiveOutOfMBB(IntGRNar, MBB)) + return false; + + // Find the first and last MIs of the registers. + MachineInstr *FirstMI = nullptr, *LastMI = nullptr; + if (WideOpNo == 1) { + FirstMI = LIS.getInstructionFromIndex(IntGR128.beginIndex()); + LastMI = LIS.getInstructionFromIndex(IntGRNar.endIndex()); + } else { + FirstMI = LIS.getInstructionFromIndex(IntGRNar.beginIndex()); + LastMI = LIS.getInstructionFromIndex(IntGR128.endIndex()); + } + assert (FirstMI && LastMI && "No instruction from index?"); + + // Check if coalescing seems safe by finding the set of clobbered physreg + // pairs in the region. + BitVector PhysClobbered(getNumRegs()); + MachineBasicBlock::iterator MII = FirstMI, MEE = LastMI; + MEE++; + for (; MII != MEE; ++MII) { + for (const MachineOperand &MO : MII->operands()) + if (MO.isReg() && isPhysicalRegister(MO.getReg())) { + for (MCSuperRegIterator SI(MO.getReg(), this, true/*IncludeSelf*/); + SI.isValid(); ++SI) + if (NewRC->contains(*SI)) { + PhysClobbered.set(*SI); + break; + } + } + } + + // Demand an arbitrary margin of free regs. + unsigned const DemandedFreeGR128 = 3; + if (PhysClobbered.count() > (NewRC->getNumRegs() - DemandedFreeGR128)) + return false; + + return true; +} + unsigned SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const SystemZFrameLowering *TFI = getFrameLowering(MF); diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index e41c06c98af29..8b690e6da9f90 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -18,6 +18,8 @@ namespace llvm { +class LiveIntervals; + namespace SystemZ { // Return the subreg to use for referring to the even and odd registers // in a GR128 pair. Is32Bit says whether we want a GR32 or GR64. @@ -59,6 +61,16 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override; + + /// \brief SrcRC and DstRC will be morphed into NewRC if this returns true. + bool shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, + unsigned SubReg, + const TargetRegisterClass *DstRC, + unsigned DstSubReg, + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; + unsigned getFrameRegister(const MachineFunction &MF) const override; }; diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index 4829f73e080e2..52dc4cda02fbc 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -91,6 +91,11 @@ class SystemZSubtarget : public SystemZGenSubtargetInfo { return &TSInfo; } + // True if the subtarget should run MachineScheduler after aggressive + // coalescing. This currently replaces the SelectionDAG scheduler with the + // "source" order scheduler. + bool enableMachineScheduler() const override { return true; } + // This is important for reducing register pressure in vector code. bool useAA() const override { return true; } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 1357cb5735f8a..226a3b35f2cf8 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -41,7 +41,8 @@ class WebAssemblyAsmBackendELF final : public MCAsmBackend { const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsPCRel) const override; - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override; // No instruction requires relaxation bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -82,7 +83,8 @@ class WebAssemblyAsmBackend final : public MCAsmBackend { const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsPCRel) const override; - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override; // No instruction requires relaxation bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -131,7 +133,7 @@ void WebAssemblyAsmBackendELF::applyFixup(const MCAssembler &Asm, Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); } -MCObjectWriter * +std::unique_ptr WebAssemblyAsmBackendELF::createObjectWriter(raw_pwrite_stream &OS) const { return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0); } @@ -191,7 +193,7 @@ void WebAssemblyAsmBackend::applyFixup(const MCAssembler &Asm, Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); } -MCObjectWriter * +std::unique_ptr WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { return createWebAssemblyWasmObjectWriter(OS, Is64Bit); } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp index 2146f67959b82..b67ecfa455b36 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp @@ -16,6 +16,7 @@ #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -58,10 +59,10 @@ unsigned WebAssemblyELFObjectWriter::getRelocType(MCContext &Ctx, } } -MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit, - uint8_t OSABI) { - MCELFObjectTargetWriter *MOTW = - new WebAssemblyELFObjectWriter(Is64Bit, OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +std::unique_ptr +llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, + uint8_t OSABI) { + auto MOTW = llvm::make_unique(Is64Bit, OSABI); + return createELFObjectWriter(std::move(MOTW), OS, /*IsLittleEndian=*/true); } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 6a1bd8d0ddb4b..7dca89ab822d8 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -18,6 +18,7 @@ #include "llvm/BinaryFormat/Wasm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/DataTypes.h" +#include namespace llvm { @@ -39,11 +40,13 @@ MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII); MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT); -MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit, uint8_t OSABI); +std::unique_ptr +createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, uint8_t OSABI); -MCObjectWriter *createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit); +std::unique_ptr +createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit); namespace WebAssembly { enum OperandType { @@ -111,6 +114,8 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { case WebAssembly::LOAD8_U_I32: case WebAssembly::LOAD8_S_I64: case WebAssembly::LOAD8_U_I64: + case WebAssembly::ATOMIC_LOAD8_U_I32: + case WebAssembly::ATOMIC_LOAD8_U_I64: case WebAssembly::STORE8_I32: case WebAssembly::STORE8_I64: return 0; @@ -118,6 +123,8 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { case WebAssembly::LOAD16_U_I32: case WebAssembly::LOAD16_S_I64: case WebAssembly::LOAD16_U_I64: + case WebAssembly::ATOMIC_LOAD16_U_I32: + case WebAssembly::ATOMIC_LOAD16_U_I64: case WebAssembly::STORE16_I32: case WebAssembly::STORE16_I64: return 1; @@ -129,11 +136,13 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) { case WebAssembly::LOAD32_U_I64: case WebAssembly::STORE32_I64: case WebAssembly::ATOMIC_LOAD_I32: + case WebAssembly::ATOMIC_LOAD32_U_I64: return 2; case WebAssembly::LOAD_I64: case WebAssembly::LOAD_F64: case WebAssembly::STORE_I64: case WebAssembly::STORE_F64: + case WebAssembly::ATOMIC_LOAD_I64: return 3; default: llvm_unreachable("Only loads and stores have p2align values"); diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index 00bf02469bdd9..c82a64d58246e 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -219,8 +219,8 @@ void WebAssemblyTargetWasmStreamer::emitGlobal( // section. This will later be decoded and turned into contents for the // Globals Section. Streamer.PushSection(); - Streamer.SwitchSection(Streamer.getContext() - .getWasmSection(".global_variables", 0, 0)); + Streamer.SwitchSection(Streamer.getContext().getWasmSection( + ".global_variables", SectionKind::getMetadata())); for (const wasm::Global &G : Globals) { Streamer.EmitIntValue(int32_t(G.Type), 1); Streamer.EmitIntValue(G.Mutable, 1); @@ -240,8 +240,8 @@ void WebAssemblyTargetWasmStreamer::emitGlobal( void WebAssemblyTargetWasmStreamer::emitStackPointer(uint32_t Index) { Streamer.PushSection(); - Streamer.SwitchSection(Streamer.getContext() - .getWasmSection(".stack_pointer", 0, 0)); + Streamer.SwitchSection(Streamer.getContext().getWasmSection( + ".stack_pointer", SectionKind::getMetadata())); Streamer.EmitIntValue(Index, 4); Streamer.PopSection(); } diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp index 995984b036164..39abde26df7fa 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MCWasmObjectWriter.h" #include "llvm/MC/MCValue.h" @@ -93,8 +94,9 @@ WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target, } } -MCObjectWriter *llvm::createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit) { - MCWasmObjectTargetWriter *MOTW = new WebAssemblyWasmObjectWriter(Is64Bit); - return createWasmObjectWriter(MOTW, OS); +std::unique_ptr +llvm::createWebAssemblyWasmObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit) { + auto MOTW = llvm::make_unique(Is64Bit); + return createWasmObjectWriter(std::move(MOTW), OS); } diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h index c8917b8d7e48a..a37f8bcf6ba59 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYASMPRINTER_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYASMPRINTER_H +#include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCStreamer.h" @@ -17,7 +18,6 @@ namespace llvm { class MCSymbol; -class WebAssemblyFunctionInfo; class WebAssemblyTargetStreamer; class WebAssemblyMCInstLower; diff --git a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp index 76a2ff3f9803b..19df75c7091bf 100644 --- a/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp +++ b/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp @@ -24,6 +24,7 @@ //===----------------------------------------------------------------------===// #include "WebAssembly.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" @@ -68,10 +69,19 @@ static void FindUses(Value *V, Function &F, if (BitCastOperator *BC = dyn_cast(U.getUser())) FindUses(BC, F, Uses, ConstantBCs); else if (U.get()->getType() != F.getType()) { + CallSite CS(U.getUser()); + if (!CS) + // Skip uses that aren't immediately called + continue; + Value *Callee = CS.getCalledValue(); + if (Callee != V) + // Skip calls where the function isn't the callee + continue; if (isa(U.get())) { // Only add constant bitcasts to the list once; they get RAUW'd auto c = ConstantBCs.insert(cast(U.get())); - if (!c.second) continue; + if (!c.second) + continue; } Uses.push_back(std::make_pair(&U, &F)); } diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index 355802f760b9e..a49172df158f6 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -17,19 +17,180 @@ //===----------------------------------------------------------------------===// let Defs = [ARGUMENTS] in { -// TODO: add the rest of the atomic loads -def ATOMIC_LOAD_I32 : CLoadI32<"i32.atomic.load", 0xfe10>; -def ATOMIC_LOAD_I64 : CLoadI64<"i64.atomic.load", 0xfe11>; +def ATOMIC_LOAD_I32 : WebAssemblyLoad; +def ATOMIC_LOAD_I64 : WebAssemblyLoad; } // Defs = [ARGUMENTS] // Select loads with no constant offset. let Predicates = [HasAtomics] in { -class ALoadPatNoOffset : - Pat<(ty (node I32:$addr)), (inst 0, 0, $addr)>; -def : ALoadPatNoOffset; -def : ALoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; -} +// Select loads with a constant offset. + +// Pattern with address + immediate offset +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; + +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; + +def : LoadPatExternalSym; +def : LoadPatExternalSym; + + +// Select loads with just a constant offset. +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; + +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; + +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; + +} // Predicates = [HasAtomics] + +// Extending loads. Note that there are only zero-extending atomic loads, no +// sign-extending loads. +let Defs = [ARGUMENTS] in { +def ATOMIC_LOAD8_U_I32 : WebAssemblyLoad; +def ATOMIC_LOAD16_U_I32 : WebAssemblyLoad; +def ATOMIC_LOAD8_U_I64 : WebAssemblyLoad; +def ATOMIC_LOAD16_U_I64 : WebAssemblyLoad; +def ATOMIC_LOAD32_U_I64 : WebAssemblyLoad; +} // Defs = [ARGUMENTS] + +// Fragments for exending loads. These are different from regular loads because +// the SDNodes are derived from AtomicSDNode rather than LoadSDNode and +// therefore don't have the extension type field. So instead of matching that, +// we match the patterns that the type legalizer expands them to. + +// We directly match zext patterns and select the zext atomic loads. +// i32 (zext (i8 (atomic_load_8))) gets legalized to +// i32 (and (i32 (atomic_load_8)), 255) +// These can be selected to a single zero-extending atomic load instruction. +def zext_aload_8 : PatFrag<(ops node:$addr), + (and (i32 (atomic_load_8 node:$addr)), 255)>; +def zext_aload_16 : PatFrag<(ops node:$addr), + (and (i32 (atomic_load_16 node:$addr)), 65535)>; +// Unlike regular loads, extension to i64 is handled differently than i32. +// i64 (zext (i8 (atomic_load_8))) gets legalized to +// i64 (and (i64 (anyext (i32 (atomic_load_8)))), 255) +def zext_aload_8_64 : + PatFrag<(ops node:$addr), + (and (i64 (anyext (i32 (atomic_load_8 node:$addr)))), 255)>; +def zext_aload_16_64 : + PatFrag<(ops node:$addr), + (and (i64 (anyext (i32 (atomic_load_16 node:$addr)))), 65535)>; +def zext_aload_32_64 : + PatFrag<(ops node:$addr), + (zext (i32 (atomic_load node:$addr)))>; + +// We don't have single sext atomic load instructions. So for sext loads, we +// match bare subword loads (for 32-bit results) and anyext loads (for 64-bit +// results) and select a zext load; the next instruction will be sext_inreg +// which is selected by itself. +def anyext_aload_8_64 : + PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_8 node:$addr)))>; +def anyext_aload_16_64 : + PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>; + +let Predicates = [HasAtomics] in { +// Select zero-extending loads with no constant offset. +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; + +// Select sign-extending loads with no constant offset +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +def : LoadPatNoOffset; +// 32->64 sext load gets selected as i32.atomic.load, i64.extend_s/i64 + + +// Zero-extending loads with constant offset +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; + +// Sign-extending loads with constant offset +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +def : LoadPatImmOff; +// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64 + +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; +def : LoadPatGlobalAddr; + +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; +def : LoadPatExternalSym; + + +// Extending loads with just a constant offset +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; +def : LoadPatOffsetOnly; + +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; +def : LoadPatGlobalAddrOffOnly; + +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; +def : LoadPatExternSymOffOnly; + + +} // Predicates = [HasAtomics] //===----------------------------------------------------------------------===// // Atomic stores diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 1897027b57f44..9d58895ca5a69 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -55,28 +55,19 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off), let Defs = [ARGUMENTS] in { -// Classes to define both atomic and non-atomic integer loads -class CLoadI32 : - I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), Opcode>; - -class CLoadI64 : - I<(outs I64:$dst), +// Defines atomic and non-atomic loads, regular and extending. +class WebAssemblyLoad : + I<(outs rc:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr), [], !strconcat(Name, "\t$dst, ${off}(${addr})${p2align}"), Opcode>; // Basic load. // FIXME: When we can break syntax compatibility, reorder the fields in the // asmstrings to match the binary encoding. -def LOAD_I32 : CLoadI32<"i32.load", 0x28>; -def LOAD_I64 : CLoadI64<"i64.load", 0x29>; -def LOAD_F32 : I<(outs F32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "f32.load\t$dst, ${off}(${addr})${p2align}", 0x2a>; -def LOAD_F64 : I<(outs F64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "f64.load\t$dst, ${off}(${addr})${p2align}", 0x2b>; +def LOAD_I32 : WebAssemblyLoad; +def LOAD_I64 : WebAssemblyLoad; +def LOAD_F32 : WebAssemblyLoad; +def LOAD_F64 : WebAssemblyLoad; } // Defs = [ARGUMENTS] @@ -153,36 +144,16 @@ def : LoadPatExternSymOffOnly; let Defs = [ARGUMENTS] in { // Extending load. -def LOAD8_S_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load8_s\t$dst, ${off}(${addr})${p2align}", 0x2c>; -def LOAD8_U_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load8_u\t$dst, ${off}(${addr})${p2align}", 0x2d>; -def LOAD16_S_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load16_s\t$dst, ${off}(${addr})${p2align}", 0x2e>; -def LOAD16_U_I32 : I<(outs I32:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i32.load16_u\t$dst, ${off}(${addr})${p2align}", 0x2f>; -def LOAD8_S_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load8_s\t$dst, ${off}(${addr})${p2align}", 0x30>; -def LOAD8_U_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load8_u\t$dst, ${off}(${addr})${p2align}", 0x31>; -def LOAD16_S_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load16_s\t$dst, ${off}(${addr})${p2align}", 0x32>; -def LOAD16_U_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load16_u\t$dst, ${off}(${addr})${p2align}", 0x33>; -def LOAD32_S_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load32_s\t$dst, ${off}(${addr})${p2align}", 0x34>; -def LOAD32_U_I64 : I<(outs I64:$dst), - (ins P2Align:$p2align, offset32_op:$off, I32:$addr), - [], "i64.load32_u\t$dst, ${off}(${addr})${p2align}", 0x35>; +def LOAD8_S_I32 : WebAssemblyLoad; +def LOAD8_U_I32 : WebAssemblyLoad; +def LOAD16_S_I32 : WebAssemblyLoad; +def LOAD16_U_I32 : WebAssemblyLoad; +def LOAD8_S_I64 : WebAssemblyLoad; +def LOAD8_U_I64 : WebAssemblyLoad; +def LOAD16_S_I64 : WebAssemblyLoad; +def LOAD16_U_I64 : WebAssemblyLoad; +def LOAD32_S_I64 : WebAssemblyLoad; +def LOAD32_U_I64 : WebAssemblyLoad; } // Defs = [ARGUMENTS] @@ -290,7 +261,6 @@ def : LoadPatNoOffset; def : LoadPatNoOffset; def : LoadPatNoOffset; - // Select "don't care" extending loads with a constant offset. def : LoadPatImmOff; def : LoadPatImmOff; @@ -313,7 +283,6 @@ def : LoadPatExternalSym; def : LoadPatExternalSym; def : LoadPatExternalSym; - // Select "don't care" extending loads with just a constant offset. def : LoadPatOffsetOnly; def : LoadPatOffsetOnly; diff --git a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp index a418f65e0ee4e..c4b9e915b41e4 100644 --- a/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp +++ b/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -97,6 +97,12 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) { case WebAssembly::LOAD32_S_I64: case WebAssembly::LOAD32_U_I64: case WebAssembly::ATOMIC_LOAD_I32: + case WebAssembly::ATOMIC_LOAD8_U_I32: + case WebAssembly::ATOMIC_LOAD16_U_I32: + case WebAssembly::ATOMIC_LOAD_I64: + case WebAssembly::ATOMIC_LOAD8_U_I64: + case WebAssembly::ATOMIC_LOAD16_U_I64: + case WebAssembly::ATOMIC_LOAD32_U_I64: RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo); break; case WebAssembly::STORE_I32: diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index bd176bac4c4d0..896c50a93287f 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -7,11 +7,12 @@ // //===----------------------------------------------------------------------===// +#include "InstPrinter/X86IntelInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86TargetStreamer.h" #include "X86AsmInstrumentation.h" #include "X86AsmParserCommon.h" #include "X86Operand.h" -#include "InstPrinter/X86IntelInstPrinter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -68,7 +69,6 @@ static const char OpPrecedence[] = { }; class X86AsmParser : public MCTargetAsmParser { - const MCInstrInfo &MII; ParseInstructionInfo *InstInfo; std::unique_ptr Instrumentation; bool Code16GCC; @@ -81,6 +81,13 @@ class X86AsmParser : public MCTargetAsmParser { return Result; } + X86TargetStreamer &getTargetStreamer() { + assert(getParser().getStreamer().getTargetStreamer() && + "do not have a target streamer"); + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast(TS); + } + unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst, uint64_t &ErrorInfo, bool matchingInlineAsm, unsigned VariantID = 0) { @@ -339,9 +346,7 @@ class X86AsmParser : public MCTargetAsmParser { IntelExprStateMachine() : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), Scale(1), Imm(0), Sym(nullptr), BracCount(0), - MemExpr(false) { - Info.clear(); - } + MemExpr(false) {} void addImm(int64_t imm) { Imm += imm; } short getBracCount() { return BracCount; } @@ -580,7 +585,15 @@ class X86AsmParser : public MCTargetAsmParser { return false; } bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, - StringRef &ErrMsg) { + const InlineAsmIdentifierInfo &IDInfo, + bool ParsingInlineAsm, StringRef &ErrMsg) { + // InlineAsm: Treat an enum value as an integer + if (ParsingInlineAsm) + if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) + return onInteger(IDInfo.Enum.EnumVal, ErrMsg); + // Treat a symbolic constant like an integer + if (auto *CE = dyn_cast(SymRef)) + return onInteger(CE->getValue(), ErrMsg); PrevState = State; bool HasSymbol = Sym != nullptr; switch (State) { @@ -592,11 +605,13 @@ class X86AsmParser : public MCTargetAsmParser { case IES_NOT: case IES_INIT: case IES_LBRAC: - MemExpr = !(SymRef->getKind() == MCExpr::Constant); + MemExpr = true; State = IES_INTEGER; Sym = SymRef; SymName = SymRefName; IC.pushOperand(IC_IMM); + if (ParsingInlineAsm) + Info = IDInfo; break; } if (HasSymbol) @@ -832,6 +847,15 @@ class X86AsmParser : public MCTargetAsmParser { bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); + /// CodeView FPO data directives. + bool parseDirectiveFPOProc(SMLoc L); + bool parseDirectiveFPOSetFrame(SMLoc L); + bool parseDirectiveFPOPushReg(SMLoc L); + bool parseDirectiveFPOStackAlloc(SMLoc L); + bool parseDirectiveFPOEndPrologue(SMLoc L); + bool parseDirectiveFPOEndProc(SMLoc L); + bool parseDirectiveFPOData(SMLoc L); + bool processInstruction(MCInst &Inst, const OperandVector &Ops); /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds @@ -885,7 +909,7 @@ class X86AsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI = copySTI(); FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit}); FeatureBitset OldMode = STI.getFeatureBits() & AllModes; - unsigned FB = ComputeAvailableFeatures( + uint64_t FB = ComputeAvailableFeatures( STI.ToggleFeature(OldMode.flip(mode))); setAvailableFeatures(FB); @@ -915,7 +939,7 @@ class X86AsmParser : public MCTargetAsmParser { X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, const MCInstrInfo &mii, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr), + : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr), Code16GCC(false) { // Initialize the set of available features. @@ -1261,38 +1285,43 @@ std::unique_ptr X86AsmParser::CreateMemForInlineAsm( const InlineAsmIdentifierInfo &Info) { // If we found a decl other than a VarDecl, then assume it is a FuncDecl or // some other label reference. - if (isa(Disp) && Info.OpDecl && !Info.IsVarDecl) { + if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) { // Insert an explicit size if the user didn't have one. if (!Size) { Size = getPointerWidth(); InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start, /*Len=*/0, Size); } - // Create an absolute memory reference in order to match against // instructions taking a PC relative operand. return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size, - Identifier, Info.OpDecl); + Identifier, Info.Label.Decl); } - - // We either have a direct symbol reference, or an offset from a symbol. The // parser always puts the symbol on the LHS, so look there for size // calculation purposes. unsigned FrontendSize = 0; - const MCBinaryExpr *BinOp = dyn_cast(Disp); - bool IsSymRef = - isa(BinOp ? BinOp->getLHS() : Disp); - if (IsSymRef && !Size && Info.Type) - FrontendSize = Info.Type * 8; // Size is in terms of bits in this context. - - // When parsing inline assembly we set the base register to a non-zero value + void *Decl = nullptr; + bool IsGlobalLV = false; + if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { + // Size is in terms of bits in this context. + FrontendSize = Info.Var.Type * 8; + Decl = Info.Var.Decl; + IsGlobalLV = Info.Var.IsGlobalLV; + } + // It is widely common for MS InlineAsm to use a global variable and one/two + // registers in a mmory expression, and though unaccessible via rip/eip. + if (IsGlobalLV && (BaseReg || IndexReg)) { + return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End); + // Otherwise, we set the base register to a non-zero value // if we don't know the actual value at this time. This is necessary to // get the matching correct in some cases. - BaseReg = BaseReg ? BaseReg : 1; - return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, - IndexReg, Scale, Start, End, Size, Identifier, - Info.OpDecl, FrontendSize); + } else { + BaseReg = BaseReg ? BaseReg : 1; + return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg, + IndexReg, Scale, Start, End, Size, Identifier, + Decl, FrontendSize); + } } // Some binary bitwise operators have a named synonymous @@ -1348,44 +1377,53 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { break; case AsmToken::String: case AsmToken::Identifier: { - // This could be a register or a symbolic displacement. - unsigned TmpReg; - const MCExpr *Val; SMLoc IdentLoc = Tok.getLoc(); StringRef Identifier = Tok.getString(); UpdateLocLex = false; - if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) { - if (SM.onRegister(TmpReg, ErrMsg)) + // Register + unsigned Reg; + if (Tok.isNot(AsmToken::String) && !ParseRegister(Reg, IdentLoc, End)) { + if (SM.onRegister(Reg, ErrMsg)) return Error(Tok.getLoc(), ErrMsg); - } else if (ParseIntelNamedOperator(Identifier, SM)) { - UpdateLocLex = true; - } else if (!isParsingInlineAsm()) { - if (getParser().parsePrimaryExpr(Val, End)) + break; + } + // Operator synonymous ("not", "or" etc.) + if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM))) + break; + // Symbol reference, when parsing assembly content + InlineAsmIdentifierInfo Info; + const MCExpr *Val; + if (!isParsingInlineAsm()) { + if (getParser().parsePrimaryExpr(Val, End)) { return Error(Tok.getLoc(), "Unexpected identifier!"); - if (auto *CE = dyn_cast(Val)) { - if (SM.onInteger(CE->getValue(), ErrMsg)) - return Error(IdentLoc, ErrMsg); - } else if (SM.onIdentifierExpr(Val, Identifier, ErrMsg)) + } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { return Error(IdentLoc, ErrMsg); - } else if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { + } else + break; + } + // MS InlineAsm operators (TYPE/LENGTH/SIZE) + if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { if (OpKind == IOK_OFFSET) return Error(IdentLoc, "Dealing OFFSET operator as part of" "a compound immediate expression is yet to be supported"); - int64_t Val = ParseIntelInlineAsmOperator(OpKind); - if (!Val) + if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { + if (SM.onInteger(Val, ErrMsg)) + return Error(IdentLoc, ErrMsg); + } else return true; - if (SM.onInteger(Val, ErrMsg)) - return Error(IdentLoc, ErrMsg); - } else if (Identifier.count('.') && PrevTK == AsmToken::RBrac) { - if (ParseIntelDotOperator(SM, End)) - return true; - } else if (ParseIntelInlineAsmIdentifier(Val, Identifier, - SM.getIdentifierInfo(), - /*Unevaluated=*/false, End)) { + break; + } + // MS Dot Operator expression + if (Identifier.count('.') && PrevTK == AsmToken::RBrac) { + if (ParseIntelDotOperator(SM, End)) + return true; + break; + } + // MS InlineAsm identifier + if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) return true; - } else if (SM.onIdentifierExpr(Val, Identifier, ErrMsg)) { + else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg)) return Error(IdentLoc, ErrMsg); - } break; } case AsmToken::Integer: { @@ -1405,7 +1443,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (IDVal == "b" && Sym->isUndefined()) return Error(Loc, "invalid reference to undefined symbol"); StringRef Identifier = Sym->getName(); - if (SM.onIdentifierExpr(Val, Identifier, ErrMsg)) + InlineAsmIdentifierInfo Info; + if (SM.onIdentifierExpr(Val, Identifier, Info, + isParsingInlineAsm(), ErrMsg)) return Error(Loc, ErrMsg); End = consumeToken(); } else { @@ -1500,8 +1540,7 @@ bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val, Val = nullptr; StringRef LineBuf(Identifier.data()); - void *Result = - SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); + SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); const AsmToken &Tok = Parser.getTok(); SMLoc Loc = Tok.getLoc(); @@ -1517,12 +1556,13 @@ bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val, // The frontend should end parsing on an assembler token boundary, unless it // failed parsing. - assert((End.getPointer() == EndPtr || !Result) && - "frontend claimed part of a token?"); + assert((End.getPointer() == EndPtr || + Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) && + "frontend claimed part of a token?"); // If the identifier lookup was unsuccessful, assume that we are dealing with // a label. - if (!Result) { + if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) { StringRef InternalName = SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), Loc, false); @@ -1530,8 +1570,8 @@ bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val, // Push a rewrite for replacing the identifier name with the internal name. InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), InternalName); - } - + } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) + return false; // Create the symbol reference. MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; @@ -1625,6 +1665,12 @@ std::unique_ptr X86AsmParser::ParseIntelOffsetOfOperator() { /*Unevaluated=*/false, End)) return nullptr; + void *Decl = nullptr; + // FIXME: MS evaluates "offset " to the underlying integral + if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) + return ErrorOperand(Start, "offset operator cannot yet handle constants"); + else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) + Decl = Info.Var.Decl; // Don't emit the offset operator. InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7); @@ -1635,7 +1681,7 @@ std::unique_ptr X86AsmParser::ParseIntelOffsetOfOperator() { unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX); return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true, - OffsetOfLoc, Identifier, Info.OpDecl); + OffsetOfLoc, Identifier, Decl); } // Query a candidate string for being an Intel assembly operator @@ -1668,7 +1714,7 @@ unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { /*Unevaluated=*/true, End)) return 0; - if (!Info.OpDecl) { + if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { Error(Start, "unable to lookup expression"); return 0; } @@ -1676,9 +1722,9 @@ unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { unsigned CVal = 0; switch(OpKind) { default: llvm_unreachable("Unexpected operand kind!"); - case IOK_LENGTH: CVal = Info.Length; break; - case IOK_SIZE: CVal = Info.Size; break; - case IOK_TYPE: CVal = Info.Type; break; + case IOK_LENGTH: CVal = Info.Var.Length; break; + case IOK_SIZE: CVal = Info.Var.Size; break; + case IOK_TYPE: CVal = Info.Var.Type; break; } return CVal; @@ -2284,7 +2330,6 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); // Determine whether this is an instruction prefix. // FIXME: @@ -2294,22 +2339,48 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // lock addq %rax, %rbx ; Destination operand must be of memory type // xacquire ; xacquire must be accompanied by 'lock' bool isPrefix = StringSwitch(Name) - .Cases("lock", - "rep", "repe", - "repz", "repne", - "repnz", "rex64", - "data32", "data16", true) - .Cases("xacquire", "xrelease", true) - .Cases("acquire", "release", isParsingIntelSyntax()) - .Default(false); + .Cases("rex64", "data32", "data16", true) + .Cases("xacquire", "xrelease", true) + .Cases("acquire", "release", isParsingIntelSyntax()) + .Default(false); + + auto isLockRepeatPrefix = [](StringRef N) { + return StringSwitch(N) + .Cases("lock", "rep", "repe", "repz", "repne", "repnz", true) + .Default(false); + }; bool CurlyAsEndOfStatement = false; + + unsigned Flags = X86::IP_NO_PREFIX; + while (isLockRepeatPrefix(Name.lower())) { + unsigned Prefix = + StringSwitch(Name) + .Cases("lock", "lock", X86::IP_HAS_LOCK) + .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) + .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) + .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) + Flags |= Prefix; + Name = Parser.getTok().getString(); + Parser.Lex(); // eat the prefix + // Hack: we could have something like + // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" + while (Name.startswith(";") || Name.startswith("\n") || + Name.startswith("\t") || Name.startswith("/")) { + Name = Parser.getTok().getString(); + Parser.Lex(); // go to next prefix or instr + } + } + + if (Flags) + PatchedName = Name; + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); + // This does the actual operand parsing. Don't parse any more if we have a // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we // just want to parse the "lock" as the first instruction and the "incl" as // the next one. if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { - // Parse '*' modifier. if (getLexer().is(AsmToken::Star)) Operands.push_back(X86Operand::CreateToken("*", consumeToken())); @@ -2547,6 +2618,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } + if (Flags) + Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); return false; } @@ -2614,6 +2687,16 @@ bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); } +static unsigned getPrefixes(OperandVector &Operands) { + unsigned Result = 0; + X86Operand &Prefix = static_cast(*Operands.back()); + if (Prefix.isPrefix()) { + Result = Prefix.getPrefix(); + Operands.pop_back(); + } + return Result; +} + bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -2628,8 +2711,13 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); bool WasOriginallyInvalidOperand = false; + unsigned Prefixes = getPrefixes(Operands); + MCInst Inst; + if (Prefixes) + Inst.setFlags(Prefixes); + // First, try a direct match. switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax())) { @@ -2794,12 +2882,16 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, StringRef Mnemonic = Op.getToken(); SMRange EmptyRange = None; StringRef Base = Op.getToken(); + unsigned Prefixes = getPrefixes(Operands); // First, handle aliases that expand to multiple instructions. MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); MCInst Inst; + if (Prefixes) + Inst.setFlags(Prefixes); + // Find one unsized memory operand, if present. X86Operand *UnsizedMemOp = nullptr; for (const auto &Op : Operands) { @@ -2998,6 +3090,19 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { return false; } else if (IDVal == ".even") return parseDirectiveEven(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_proc") + return parseDirectiveFPOProc(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_setframe") + return parseDirectiveFPOSetFrame(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_pushreg") + return parseDirectiveFPOPushReg(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_stackalloc") + return parseDirectiveFPOStackAlloc(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_endprologue") + return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); + else if (IDVal == ".cv_fpo_endproc") + return parseDirectiveFPOEndProc(DirectiveID.getLoc()); + return true; } @@ -3095,6 +3200,71 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { return false; } +// .cv_fpo_proc foo +bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) { + MCAsmParser &Parser = getParser(); + StringRef ProcName; + int64_t ParamsSize; + if (Parser.parseIdentifier(ProcName)) + return Parser.TokError("expected symbol name"); + if (Parser.parseIntToken(ParamsSize, "expected parameter byte count")) + return true; + if (!isUIntN(32, ParamsSize)) + return Parser.TokError("parameters size out of range"); + if (Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_proc' directive"); + MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); + return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L); +} + +// .cv_fpo_setframe ebp +bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) { + MCAsmParser &Parser = getParser(); + unsigned Reg; + SMLoc DummyLoc; + if (ParseRegister(Reg, DummyLoc, DummyLoc) || + Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_setframe' directive"); + return getTargetStreamer().emitFPOSetFrame(Reg, L); +} + +// .cv_fpo_pushreg ebx +bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) { + MCAsmParser &Parser = getParser(); + unsigned Reg; + SMLoc DummyLoc; + if (ParseRegister(Reg, DummyLoc, DummyLoc) || + Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_pushreg' directive"); + return getTargetStreamer().emitFPOPushReg(Reg, L); +} + +// .cv_fpo_stackalloc 20 +bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) { + MCAsmParser &Parser = getParser(); + int64_t Offset; + if (Parser.parseIntToken(Offset, "expected offset") || + Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_stackalloc' directive"); + return getTargetStreamer().emitFPOStackAlloc(Offset, L); +} + +// .cv_fpo_endprologue +bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) { + MCAsmParser &Parser = getParser(); + if (Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_endprologue' directive"); + return getTargetStreamer().emitFPOEndPrologue(L); +} + +// .cv_fpo_endproc +bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { + MCAsmParser &Parser = getParser(); + if (Parser.parseEOL("unexpected tokens")) + return addErrorSuffix(" in '.cv_fpo_endproc' directive"); + return getTargetStreamer().emitFPOEndProc(L); +} + // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { RegisterMCAsmParser X(getTheX86_32Target()); diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index 0fba15cc692ca..43a0561e769b2 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H #define LLVM_LIB_TARGET_X86_ASMPARSER_X86OPERAND_H +#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86AsmParserCommon.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -28,12 +29,7 @@ namespace llvm { /// X86Operand - Instances of this class represent a parsed X86 machine /// instruction. struct X86Operand : public MCParsedAsmOperand { - enum KindTy { - Token, - Register, - Immediate, - Memory - } Kind; + enum KindTy { Token, Register, Immediate, Memory, Prefix } Kind; SMLoc StartLoc, EndLoc; SMLoc OffsetOfLoc; @@ -50,6 +46,10 @@ struct X86Operand : public MCParsedAsmOperand { unsigned RegNo; }; + struct PrefOp { + unsigned Prefixes; + }; + struct ImmOp { const MCExpr *Val; }; @@ -73,6 +73,7 @@ struct X86Operand : public MCParsedAsmOperand { struct RegOp Reg; struct ImmOp Imm; struct MemOp Mem; + struct PrefOp Pref; }; X86Operand(KindTy K, SMLoc Start, SMLoc End) @@ -111,6 +112,11 @@ struct X86Operand : public MCParsedAsmOperand { return Reg.RegNo; } + unsigned getPrefix() const { + assert(Kind == Prefix && "Invalid access!"); + return Pref.Prefixes; + } + const MCExpr *getImm() const { assert(Kind == Immediate && "Invalid access!"); return Imm.Val; @@ -387,6 +393,7 @@ struct X86Operand : public MCParsedAsmOperand { return isMemOffs() && Mem.ModeSize == 64 && (!Mem.Size || Mem.Size == 64); } + bool isPrefix() const { return Kind == Prefix; } bool isReg() const override { return Kind == Register; } bool isGR32orGR64() const { @@ -509,6 +516,13 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } + static std::unique_ptr + CreatePrefix(unsigned Prefixes, SMLoc StartLoc, SMLoc EndLoc) { + auto Res = llvm::make_unique(Prefix, StartLoc, EndLoc); + Res->Pref.Prefixes = Prefixes; + return Res; + } + static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc) { auto Res = llvm::make_unique(Immediate, StartLoc, EndLoc); diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 3966581d93524..7e0df29414677 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -14,6 +14,10 @@ tablegen(LLVM X86GenEVEX2VEXTables.inc -gen-x86-EVEX2VEX-tables) tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank) tablegen(LLVM X86GenGlobalISel.inc -gen-global-isel) +if (X86_GEN_FOLD_TABLES) + tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables) +endif() + add_public_tablegen_target(X86CommonTableGen) set(sources @@ -21,6 +25,7 @@ set(sources X86CallFrameOptimization.cpp X86CallLowering.cpp X86CmovConversion.cpp + X86DomainReassignment.cpp X86ExpandPseudo.cpp X86FastISel.cpp X86FixupBWInsts.cpp diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 4ce908b1da64e..c58254ae38c19 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -74,6 +74,7 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86DisassemblerDecoder.h" #include "llvm/MC/MCContext.h" @@ -232,7 +233,24 @@ MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( return Fail; } else { Size = InternalInstr.length; - return (!translateInstruction(Instr, InternalInstr, this)) ? Success : Fail; + bool Ret = translateInstruction(Instr, InternalInstr, this); + if (!Ret) { + unsigned Flags = X86::IP_NO_PREFIX; + if (InternalInstr.hasAdSize) + Flags |= X86::IP_HAS_AD_SIZE; + if (!InternalInstr.mandatoryPrefix) { + if (InternalInstr.hasOpSize) + Flags |= X86::IP_HAS_OP_SIZE; + if (InternalInstr.repeatPrefix == 0xf2) + Flags |= X86::IP_HAS_REPEAT_NE; + else if (InternalInstr.repeatPrefix == 0xf3 && + // It should not be 'pause' f3 90 + InternalInstr.opcode != 0x90) + Flags |= X86::IP_HAS_REPEAT; + } + Instr.setFlags(Flags); + } + return (!Ret) ? Success : Fail; } } @@ -315,12 +333,12 @@ static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { unsigned baseRegNo; if (insn.mode == MODE_64BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI; + baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI; else if (insn.mode == MODE_32BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI; + baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI; else { assert(insn.mode == MODE_16BIT); - baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI; + baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI; } MCOperand baseReg = MCOperand::createReg(baseRegNo); mcInst.addOperand(baseReg); @@ -340,12 +358,12 @@ static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { unsigned baseRegNo; if (insn.mode == MODE_64BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI; + baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI; else if (insn.mode == MODE_32BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI; + baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI; else { assert(insn.mode == MODE_16BIT); - baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI; + baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI; } MCOperand baseReg = MCOperand::createReg(baseRegNo); mcInst.addOperand(baseReg); @@ -746,102 +764,6 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, baseReg = MCOperand::createReg(0); } - // Check whether we are handling VSIB addressing mode for GATHER. - // If sibIndex was set to SIB_INDEX_NONE, index offset is 4 and - // we should use SIB_INDEX_XMM4|YMM4 for VSIB. - // I don't see a way to get the correct IndexReg in readSIB: - // We can tell whether it is VSIB or SIB after instruction ID is decoded, - // but instruction ID may not be decoded yet when calling readSIB. - uint32_t Opcode = mcInst.getOpcode(); - bool IndexIs128 = (Opcode == X86::VGATHERDPDrm || - Opcode == X86::VGATHERDPDYrm || - Opcode == X86::VGATHERQPDrm || - Opcode == X86::VGATHERDPSrm || - Opcode == X86::VGATHERQPSrm || - Opcode == X86::VPGATHERDQrm || - Opcode == X86::VPGATHERDQYrm || - Opcode == X86::VPGATHERQQrm || - Opcode == X86::VPGATHERDDrm || - Opcode == X86::VPGATHERQDrm || - Opcode == X86::VGATHERDPDZ128rm || - Opcode == X86::VGATHERDPDZ256rm || - Opcode == X86::VGATHERDPSZ128rm || - Opcode == X86::VGATHERQPDZ128rm || - Opcode == X86::VGATHERQPSZ128rm || - Opcode == X86::VPGATHERDDZ128rm || - Opcode == X86::VPGATHERDQZ128rm || - Opcode == X86::VPGATHERDQZ256rm || - Opcode == X86::VPGATHERQDZ128rm || - Opcode == X86::VPGATHERQQZ128rm || - Opcode == X86::VSCATTERDPDZ128mr || - Opcode == X86::VSCATTERDPDZ256mr || - Opcode == X86::VSCATTERDPSZ128mr || - Opcode == X86::VSCATTERQPDZ128mr || - Opcode == X86::VSCATTERQPSZ128mr || - Opcode == X86::VPSCATTERDDZ128mr || - Opcode == X86::VPSCATTERDQZ128mr || - Opcode == X86::VPSCATTERDQZ256mr || - Opcode == X86::VPSCATTERQDZ128mr || - Opcode == X86::VPSCATTERQQZ128mr); - bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm || - Opcode == X86::VGATHERDPSYrm || - Opcode == X86::VGATHERQPSYrm || - Opcode == X86::VGATHERDPDZrm || - Opcode == X86::VPGATHERDQZrm || - Opcode == X86::VPGATHERQQYrm || - Opcode == X86::VPGATHERDDYrm || - Opcode == X86::VPGATHERQDYrm || - Opcode == X86::VGATHERDPSZ256rm || - Opcode == X86::VGATHERQPDZ256rm || - Opcode == X86::VGATHERQPSZ256rm || - Opcode == X86::VPGATHERDDZ256rm || - Opcode == X86::VPGATHERQQZ256rm || - Opcode == X86::VPGATHERQDZ256rm || - Opcode == X86::VSCATTERDPDZmr || - Opcode == X86::VPSCATTERDQZmr || - Opcode == X86::VSCATTERDPSZ256mr || - Opcode == X86::VSCATTERQPDZ256mr || - Opcode == X86::VSCATTERQPSZ256mr || - Opcode == X86::VPSCATTERDDZ256mr || - Opcode == X86::VPSCATTERQQZ256mr || - Opcode == X86::VPSCATTERQDZ256mr || - Opcode == X86::VGATHERPF0DPDm || - Opcode == X86::VGATHERPF1DPDm || - Opcode == X86::VSCATTERPF0DPDm || - Opcode == X86::VSCATTERPF1DPDm); - bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm || - Opcode == X86::VGATHERDPSZrm || - Opcode == X86::VGATHERQPSZrm || - Opcode == X86::VPGATHERQQZrm || - Opcode == X86::VPGATHERDDZrm || - Opcode == X86::VPGATHERQDZrm || - Opcode == X86::VSCATTERQPDZmr || - Opcode == X86::VSCATTERDPSZmr || - Opcode == X86::VSCATTERQPSZmr || - Opcode == X86::VPSCATTERQQZmr || - Opcode == X86::VPSCATTERDDZmr || - Opcode == X86::VPSCATTERQDZmr || - Opcode == X86::VGATHERPF0DPSm || - Opcode == X86::VGATHERPF0QPDm || - Opcode == X86::VGATHERPF0QPSm || - Opcode == X86::VGATHERPF1DPSm || - Opcode == X86::VGATHERPF1QPDm || - Opcode == X86::VGATHERPF1QPSm || - Opcode == X86::VSCATTERPF0DPSm || - Opcode == X86::VSCATTERPF0QPDm || - Opcode == X86::VSCATTERPF0QPSm || - Opcode == X86::VSCATTERPF1DPSm || - Opcode == X86::VSCATTERPF1QPDm || - Opcode == X86::VSCATTERPF1QPSm); - if (IndexIs128 || IndexIs256 || IndexIs512) { - unsigned IndexOffset = insn.sibIndex - - (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); - SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : - IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; - insn.sibIndex = (SIBIndex)(IndexBase + - (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); - } - if (insn.sibIndex != SIB_INDEX_NONE) { switch (insn.sibIndex) { default: @@ -969,6 +891,9 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_BNDR: return translateRMRegister(mcInst, insn); case TYPE_M: + case TYPE_MVSIBX: + case TYPE_MVSIBY: + case TYPE_MVSIBZ: return translateRMMemory(mcInst, insn, Dis); } } @@ -1034,6 +959,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, insn, Dis); return false; + case ENCODING_IRC: + mcInst.addOperand(MCOperand::createImm(insn.RC)); + return false; case ENCODING_SI: return translateSrcIndex(mcInst, insn); case ENCODING_DI: diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index 577b7a776c6df..709fc630633a7 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -277,38 +277,44 @@ static void dbgprintf(struct InternalInstruction* insn, insn->dlog(insn->dlogArg, buffer); } -/* - * setPrefixPresent - Marks that a particular prefix is present at a particular - * location. - * - * @param insn - The instruction to be marked as having the prefix. - * @param prefix - The prefix that is present. - * @param location - The location where the prefix is located (in the address - * space of the instruction's reader). - */ -static void setPrefixPresent(struct InternalInstruction* insn, - uint8_t prefix, - uint64_t location) -{ - insn->prefixPresent[prefix] = 1; - insn->prefixLocations[prefix] = location; +static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { + if (insn->mode == MODE_64BIT) + return prefix >= 0x40 && prefix <= 0x4f; + return false; } /* - * isPrefixAtLocation - Queries an instruction to determine whether a prefix is - * present at a given location. + * setPrefixPresent - Marks that a particular prefix is present as mandatory * - * @param insn - The instruction to be queried. - * @param prefix - The prefix. - * @param location - The location to query. - * @return - Whether the prefix is at that location. + * @param insn - The instruction to be marked as having the prefix. + * @param prefix - The prefix that is present. */ -static bool isPrefixAtLocation(struct InternalInstruction* insn, - uint8_t prefix, - uint64_t location) -{ - return insn->prefixPresent[prefix] == 1 && - insn->prefixLocations[prefix] == location; +static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) { + uint8_t nextByte; + switch (prefix) { + case 0xf2: + case 0xf3: + if (lookAtByte(insn, &nextByte)) + break; + // TODO: + // 1. There could be several 0x66 + // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then + // it's not mandatory prefix + // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need + // 0x0f exactly after it to be mandatory prefix + if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) + // The last of 0xf2 /0xf3 is mandatory prefix + insn->mandatoryPrefix = prefix; + insn->repeatPrefix = prefix; + break; + case 0x66: + if (lookAtByte(insn, &nextByte)) + break; + // 0x66 can't overwrite existing mandatory prefix and should be ignored + if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) + insn->mandatoryPrefix = prefix; + break; + } } /* @@ -322,19 +328,12 @@ static bool isPrefixAtLocation(struct InternalInstruction* insn, */ static int readPrefixes(struct InternalInstruction* insn) { bool isPrefix = true; - bool prefixGroups[4] = { false }; - uint64_t prefixLocation; uint8_t byte = 0; uint8_t nextByte; - bool hasAdSize = false; - bool hasOpSize = false; - dbgprintf(insn, "readPrefixes()"); while (isPrefix) { - prefixLocation = insn->readerCursor; - /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ if (consumeByte(insn, &byte)) break; @@ -343,13 +342,10 @@ static int readPrefixes(struct InternalInstruction* insn) { * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then * break and let it be disassembled as a normal "instruction". */ - if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK break; - if (insn->readerCursor - 1 == insn->startLocation - && (byte == 0xf2 || byte == 0xf3) - && !lookAtByte(insn, &nextByte)) - { + if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) { /* * If the byte is 0xf2 or 0xf3, and any of the following conditions are * met: @@ -357,39 +353,41 @@ static int readPrefixes(struct InternalInstruction* insn) { * - it is followed by an xchg instruction * then it should be disassembled as a xacquire/xrelease not repne/rep. */ - if ((byte == 0xf2 || byte == 0xf3) && - ((nextByte == 0xf0) || - ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) + if (((nextByte == 0xf0) || + ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { insn->xAcquireRelease = true; + if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support + break; + } /* * Also if the byte is 0xf3, and the following condition is met: * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or * "mov mem, imm" (opcode 0xc6/0xc7) instructions. * then it should be disassembled as an xrelease not rep. */ - if (byte == 0xf3 && - (nextByte == 0x88 || nextByte == 0x89 || - nextByte == 0xc6 || nextByte == 0xc7)) + if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || + nextByte == 0xc6 || nextByte == 0xc7)) { insn->xAcquireRelease = true; - if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { - if (consumeByte(insn, &nextByte)) + if (nextByte != 0x90) // PAUSE instruction support + break; + } + if (isREX(insn, nextByte)) { + uint8_t nnextByte; + // Go to REX prefix after the current one + if (consumeByte(insn, &nnextByte)) return -1; - if (lookAtByte(insn, &nextByte)) + // We should be able to read next byte after REX prefix + if (lookAtByte(insn, &nnextByte)) return -1; unconsumeByte(insn); } - if (nextByte != 0x0f && nextByte != 0x90) - break; } switch (byte) { case 0xf0: /* LOCK */ case 0xf2: /* REPNE/REPNZ */ case 0xf3: /* REP or REPE/REPZ */ - if (prefixGroups[0]) - dbgprintf(insn, "Redundant Group 1 prefix"); - prefixGroups[0] = true; - setPrefixPresent(insn, byte, prefixLocation); + setPrefixPresent(insn, byte); break; case 0x2e: /* CS segment override -OR- Branch not taken */ case 0x36: /* SS segment override -OR- Branch taken */ @@ -420,24 +418,15 @@ static int readPrefixes(struct InternalInstruction* insn) { debug("Unhandled override"); return -1; } - if (prefixGroups[1]) - dbgprintf(insn, "Redundant Group 2 prefix"); - prefixGroups[1] = true; - setPrefixPresent(insn, byte, prefixLocation); + setPrefixPresent(insn, byte); break; case 0x66: /* Operand-size override */ - if (prefixGroups[2]) - dbgprintf(insn, "Redundant Group 3 prefix"); - prefixGroups[2] = true; - hasOpSize = true; - setPrefixPresent(insn, byte, prefixLocation); + insn->hasOpSize = true; + setPrefixPresent(insn, byte); break; case 0x67: /* Address-size override */ - if (prefixGroups[3]) - dbgprintf(insn, "Redundant Group 4 prefix"); - prefixGroups[3] = true; - hasAdSize = true; - setPrefixPresent(insn, byte, prefixLocation); + insn->hasAdSize = true; + setPrefixPresent(insn, byte); break; default: /* Not a prefix byte */ isPrefix = false; @@ -469,7 +458,6 @@ static int readPrefixes(struct InternalInstruction* insn) { } else { unconsumeByte(insn); /* unconsume byte1 */ unconsumeByte(insn); /* unconsume byte */ - insn->necessaryPrefixLocation = insn->readerCursor - 2; } if (insn->vectorExtensionType == TYPE_EVEX) { @@ -505,13 +493,10 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) insn->vectorExtensionType = TYPE_VEX_3B; - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } else { + else unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } if (insn->vectorExtensionType == TYPE_VEX_3B) { insn->vectorExtensionPrefix[0] = byte; @@ -520,13 +505,12 @@ static int readPrefixes(struct InternalInstruction* insn) { /* We simulate the REX prefix for simplicity's sake */ - if (insn->mode == MODE_64BIT) { + if (insn->mode == MODE_64BIT) insn->rexPrefix = 0x40 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); - } dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], @@ -540,26 +524,24 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) insn->vectorExtensionType = TYPE_VEX_2B; - } else { + else unconsumeByte(insn); - } if (insn->vectorExtensionType == TYPE_VEX_2B) { insn->vectorExtensionPrefix[0] = byte; consumeByte(insn, &insn->vectorExtensionPrefix[1]); - if (insn->mode == MODE_64BIT) { + if (insn->mode == MODE_64BIT) insn->rexPrefix = 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); - } switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { default: break; case VEX_PREFIX_66: - hasOpSize = true; + insn->hasOpSize = true; break; } @@ -575,13 +557,10 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ + if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */ insn->vectorExtensionType = TYPE_XOP; - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } else { + else unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } if (insn->vectorExtensionType == TYPE_XOP) { insn->vectorExtensionPrefix[0] = byte; @@ -590,19 +569,18 @@ static int readPrefixes(struct InternalInstruction* insn) { /* We simulate the REX prefix for simplicity's sake */ - if (insn->mode == MODE_64BIT) { + if (insn->mode == MODE_64BIT) insn->rexPrefix = 0x40 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); - } switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { default: break; case VEX_PREFIX_66: - hasOpSize = true; + insn->hasOpSize = true; break; } @@ -610,51 +588,35 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], insn->vectorExtensionPrefix[2]); } - } else { - if (insn->mode == MODE_64BIT) { - if ((byte & 0xf0) == 0x40) { - uint8_t opcodeByte; - - if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { - dbgprintf(insn, "Redundant REX prefix"); - return -1; - } - - insn->rexPrefix = byte; - insn->necessaryPrefixLocation = insn->readerCursor - 2; - - dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - } + } else if (isREX(insn, byte)) { + if (lookAtByte(insn, &nextByte)) + return -1; + insn->rexPrefix = byte; + dbgprintf(insn, "Found REX prefix 0x%hhx", byte); + } else + unconsumeByte(insn); if (insn->mode == MODE_16BIT) { - insn->registerSize = (hasOpSize ? 4 : 2); - insn->addressSize = (hasAdSize ? 4 : 2); - insn->displacementSize = (hasAdSize ? 4 : 2); - insn->immediateSize = (hasOpSize ? 4 : 2); + insn->registerSize = (insn->hasOpSize ? 4 : 2); + insn->addressSize = (insn->hasAdSize ? 4 : 2); + insn->displacementSize = (insn->hasAdSize ? 4 : 2); + insn->immediateSize = (insn->hasOpSize ? 4 : 2); } else if (insn->mode == MODE_32BIT) { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 2 : 4); - insn->displacementSize = (hasAdSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); + insn->registerSize = (insn->hasOpSize ? 2 : 4); + insn->addressSize = (insn->hasAdSize ? 2 : 4); + insn->displacementSize = (insn->hasAdSize ? 2 : 4); + insn->immediateSize = (insn->hasOpSize ? 2 : 4); } else if (insn->mode == MODE_64BIT) { if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { insn->registerSize = 8; - insn->addressSize = (hasAdSize ? 4 : 8); + insn->addressSize = (insn->hasAdSize ? 4 : 8); insn->displacementSize = 4; insn->immediateSize = 4; } else { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 4 : 8); - insn->displacementSize = (hasOpSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); + insn->registerSize = (insn->hasOpSize ? 2 : 4); + insn->addressSize = (insn->hasAdSize ? 4 : 8); + insn->displacementSize = (insn->hasOpSize ? 2 : 4); + insn->immediateSize = (insn->hasOpSize ? 2 : 4); } } @@ -758,7 +720,10 @@ static int readOpcode(struct InternalInstruction* insn) { insn->opcodeType = TWOBYTE; } - } + } else if (insn->mandatoryPrefix) + // The opcode with mandatory prefix must start with opcode escape. + // If not it's legacy repeat prefix + insn->mandatoryPrefix = 0; /* * At this point we have consumed the full opcode. @@ -950,15 +915,38 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { } else { return -1; } - } else { - if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) + } else if (!insn->mandatoryPrefix) { + // If we don't have mandatory prefix we should use legacy prefixes here + if (insn->hasOpSize && (insn->mode != MODE_16BIT)) attrMask |= ATTR_OPSIZE; - else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) + if (insn->hasAdSize) attrMask |= ATTR_ADSIZE; - else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XS; - else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) + if (insn->opcodeType == ONEBYTE) { + if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) + // Special support for PAUSE + attrMask |= ATTR_XS; + } else { + if (insn->repeatPrefix == 0xf2) + attrMask |= ATTR_XD; + else if (insn->repeatPrefix == 0xf3) + attrMask |= ATTR_XS; + } + } else { + switch (insn->mandatoryPrefix) { + case 0xf2: attrMask |= ATTR_XD; + break; + case 0xf3: + attrMask |= ATTR_XS; + break; + case 0x66: + if (insn->mode != MODE_16BIT) + attrMask |= ATTR_OPSIZE; + break; + case 0x67: + attrMask |= ATTR_ADSIZE; + break; + } } if (insn->rexPrefix & 0x08) @@ -977,8 +965,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes */ - if (insn->mode == MODE_64BIT && - isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) { + if ((insn->mode == MODE_64BIT) && insn->hasOpSize) { switch (insn->opcode) { case 0xE8: case 0xE9: @@ -1058,9 +1045,9 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { */ if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) { /* Make sure we observed the prefixes in any position. */ - if (insn->prefixPresent[0x67]) + if (insn->hasAdSize) attrMask |= ATTR_ADSIZE; - if (insn->prefixPresent[0x66]) + if (insn->hasOpSize) attrMask |= ATTR_OPSIZE; /* In 16-bit, invert the attributes. */ @@ -1075,7 +1062,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { return 0; } - if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) && + if ((insn->mode == MODE_16BIT || insn->hasOpSize) && !(attrMask & ATTR_OPSIZE)) { /* * The instruction tables make no distinction between instructions that @@ -1108,7 +1095,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg); if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && - (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) { + (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { insn->instructionID = instructionIDWithOpsize; insn->spec = specifierForUID(instructionIDWithOpsize); } else { @@ -1169,7 +1156,6 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { * @return - 0 if the SIB byte was successfully read; nonzero otherwise. */ static int readSIB(struct InternalInstruction* insn) { - SIBIndex sibIndexBase = SIB_INDEX_NONE; SIBBase sibBaseBase = SIB_BASE_NONE; uint8_t index, base; @@ -1185,11 +1171,11 @@ static int readSIB(struct InternalInstruction* insn) { dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); return -1; case 4: - sibIndexBase = SIB_INDEX_EAX; + insn->sibIndexBase = SIB_INDEX_EAX; sibBaseBase = SIB_BASE_EAX; break; case 8: - sibIndexBase = SIB_INDEX_RAX; + insn->sibIndexBase = SIB_INDEX_RAX; sibBaseBase = SIB_BASE_RAX; break; } @@ -1199,26 +1185,10 @@ static int readSIB(struct InternalInstruction* insn) { index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); - // FIXME: The fifth bit (bit index 4) is only to be used for instructions - // that understand VSIB indexing. ORing the bit in here is mildy dangerous - // because performing math on an 'enum SIBIndex' can produce garbage. - // Excluding the "none" value, it should cover 6 spaces of register names: - // - 16 possibilities for 16-bit GPR starting at SIB_INDEX_BX_SI - // - 16 possibilities for 32-bit GPR starting at SIB_INDEX_EAX - // - 16 possibilities for 64-bit GPR starting at SIB_INDEX_RAX - // - 32 possibilities for each of XMM, YMM, ZMM registers - // When sibIndexBase gets assigned SIB_INDEX_RAX as it does in 64-bit mode, - // summing in a fully decoded index between 0 and 31 can end up with a value - // that looks like something in the low half of the XMM range. - // translateRMMemory() tries to reverse the damage, with only partial success, - // as evidenced by known bugs in "test/MC/Disassembler/X86/x86-64.txt" - if (insn->vectorExtensionType == TYPE_EVEX) - index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4; - if (index == 0x4) { insn->sibIndex = SIB_INDEX_NONE; } else { - insn->sibIndex = (SIBIndex)(sibIndexBase + index); + insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index); } insn->sibScale = 1 << scaleFromSIB(insn->sib); @@ -1483,9 +1453,9 @@ static int readModRM(struct InternalInstruction* insn) { case TYPE_MM64: \ return prefix##_MM0 + (index & 0x7); \ case TYPE_SEGMENTREG: \ - if (index > 5) \ + if ((index & 7) > 5) \ *valid = 0; \ - return prefix##_ES + index; \ + return prefix##_ES + (index & 7); \ case TYPE_DEBUGREG: \ return prefix##_DR0 + index; \ case TYPE_CONTROLREG: \ @@ -1494,6 +1464,12 @@ static int readModRM(struct InternalInstruction* insn) { if (index > 3) \ *valid = 0; \ return prefix##_BND0 + index; \ + case TYPE_MVSIBX: \ + return prefix##_XMM0 + index; \ + case TYPE_MVSIBY: \ + return prefix##_YMM0 + index; \ + case TYPE_MVSIBZ: \ + return prefix##_ZMM0 + index; \ } \ } @@ -1549,7 +1525,6 @@ static int fixupReg(struct InternalInstruction *insn, return -1; break; CASE_ENCODING_RM: - CASE_ENCODING_VSIB: if (insn->eaBase >= insn->eaRegBase) { insn->eaBase = (EABase)fixupRMValue(insn, (OperandType)op->type, @@ -1747,8 +1722,39 @@ static int readOperands(struct InternalInstruction* insn) { needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0); if (readModRM(insn)) return -1; - if (fixupReg(insn, &Op)) + + // Reject if SIB wasn't used. + if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) return -1; + + // If sibIndex was set to SIB_INDEX_NONE, index offset is 4. + if (insn->sibIndex == SIB_INDEX_NONE) + insn->sibIndex = (SIBIndex)4; + + // If EVEX.v2 is set this is one of the 16-31 registers. + if (insn->vectorExtensionType == TYPE_EVEX && + v2FromEVEX4of4(insn->vectorExtensionPrefix[3])) + insn->sibIndex = (SIBIndex)(insn->sibIndex + 16); + + // Adjust the index register to the correct size. + switch ((OperandType)Op.type) { + default: + debug("Unhandled VSIB index type"); + return -1; + case TYPE_MVSIBX: + insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 + + (insn->sibIndex - insn->sibIndexBase)); + break; + case TYPE_MVSIBY: + insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 + + (insn->sibIndex - insn->sibIndexBase)); + break; + case TYPE_MVSIBZ: + insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 + + (insn->sibIndex - insn->sibIndexBase)); + break; + } + // Apply the AVX512 compressed displacement scaling factor. if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); @@ -1797,6 +1803,10 @@ static int readOperands(struct InternalInstruction* insn) { if (readImmediate(insn, insn->addressSize)) return -1; break; + case ENCODING_IRC: + insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) | + lFromEVEX4of4(insn->vectorExtensionPrefix[3]); + break; case ENCODING_RB: if (readOpcodeRegister(insn, 1)) return -1; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index b07fd0b17d352..ecd9d8dccafaa 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -546,24 +546,26 @@ struct InternalInstruction { // Prefix state - // 1 if the prefix byte corresponding to the entry is present; 0 if not - uint8_t prefixPresent[0x100]; - // contains the location (for use with the reader) of the prefix byte - uint64_t prefixLocations[0x100]; + // The possible mandatory prefix + uint8_t mandatoryPrefix; // The value of the vector extension prefix(EVEX/VEX/XOP), if present uint8_t vectorExtensionPrefix[4]; // The type of the vector extension prefix VectorExtensionType vectorExtensionType; // The value of the REX prefix, if present uint8_t rexPrefix; - // The location where a mandatory prefix would have to be (i.e., right before - // the opcode, or right before the REX prefix if one is present). - uint64_t necessaryPrefixLocation; // The segment override type SegmentOverride segmentOverride; // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease bool xAcquireRelease; + // Address-size override + bool hasAdSize; + // Operand-size override + bool hasOpSize; + // The repeat prefix if any + uint8_t repeatPrefix; + // Sizes of various critical pieces of data, in bytes uint8_t registerSize; uint8_t addressSize; @@ -637,10 +639,14 @@ struct InternalInstruction { Reg reg; // SIB state + SIBIndex sibIndexBase; SIBIndex sibIndex; uint8_t sibScale; SIBBase sibBase; + // Embedded rounding control. + uint8_t RC; + ArrayRef operands; }; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index e0f4399b3687e..ad1404860fb6b 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -382,6 +382,7 @@ enum ModRMDecisionType { \ ENUM_ENTRY(ENCODING_Iv, "Immediate of operand size") \ ENUM_ENTRY(ENCODING_Ia, "Immediate of address size") \ + ENUM_ENTRY(ENCODING_IRC, "Immediate for static rounding control") \ ENUM_ENTRY(ENCODING_Rv, "Register code of operand size added to the " \ "opcode byte") \ ENUM_ENTRY(ENCODING_DUP, "Duplicate of another operand; ID is encoded " \ @@ -410,6 +411,9 @@ enum OperandEncoding { ENUM_ENTRY(TYPE_AVX512ICC, "1-byte immediate operand for AVX512 icmp") \ ENUM_ENTRY(TYPE_UIMM8, "1-byte unsigned immediate operand") \ ENUM_ENTRY(TYPE_M, "Memory operand") \ + ENUM_ENTRY(TYPE_MVSIBX, "Memory operand using XMM index") \ + ENUM_ENTRY(TYPE_MVSIBY, "Memory operand using YMM index") \ + ENUM_ENTRY(TYPE_MVSIBZ, "Memory operand using ZMM index") \ ENUM_ENTRY(TYPE_SRCIDX, "memory at source index") \ ENUM_ENTRY(TYPE_DSTIDX, "memory at destination index") \ ENUM_ENTRY(TYPE_MOFFS, "memory offset (relative to segment base)") \ diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 4d91300c7edec..6ff1136cd85a8 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -50,8 +50,16 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); + unsigned Flags = MI->getFlags(); if (TSFlags & X86II::LOCK) OS << "\tlock\t"; + if (!(TSFlags & X86II::LOCK) && Flags & X86::IP_HAS_LOCK) + OS << "\tlock\n"; + + if (Flags & X86::IP_HAS_REPEAT_NE) + OS << "\trepne\n"; + else if (Flags & X86::IP_HAS_REPEAT) + OS << "\trep\n"; // Output CALLpcrel32 as "callq" in 64-bit mode. // In Intel annotation it's always emitted as "call". diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index f5f3a4cc83dc9..2890fd6156e11 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -205,16 +205,14 @@ static MVT getZeroExtensionResultType(const MCInst *MI) { } /// Wraps the destination register name with AVX512 mask/maskz filtering. -static std::string getMaskName(const MCInst *MI, const char *DestName, - const char *(*getRegName)(unsigned)) { - std::string OpMaskName(DestName); - +static void printMasking(raw_ostream &OS, const MCInst *MI, + const char *(*getRegName)(unsigned)) { bool MaskWithZero = false; const char *MaskRegName = nullptr; switch (MI->getOpcode()) { default: - return OpMaskName; + return; CASE_MASKZ_MOVDUP(MOVDDUP, m) CASE_MASKZ_MOVDUP(MOVDDUP, r) CASE_MASKZ_MOVDUP(MOVSHDUP, m) @@ -293,6 +291,8 @@ static std::string getMaskName(const MCInst *MI, const char *DestName, CASE_MASKZ_INS_COMMON(BROADCASTI32X4, , rm) CASE_MASKZ_INS_COMMON(BROADCASTF32X8, , rm) CASE_MASKZ_INS_COMMON(BROADCASTI32X8, , rm) + CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z128, r) + CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z128, m) CASE_MASKZ_INS_COMMON(BROADCASTF32X2, Z256, r) CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z256, r) CASE_MASKZ_INS_COMMON(BROADCASTF32X2, Z256, m) @@ -382,6 +382,8 @@ static std::string getMaskName(const MCInst *MI, const char *DestName, CASE_MASK_INS_COMMON(BROADCASTI32X4, , rm) CASE_MASK_INS_COMMON(BROADCASTF32X8, , rm) CASE_MASK_INS_COMMON(BROADCASTI32X8, , rm) + CASE_MASK_INS_COMMON(BROADCASTI32X2, Z128, r) + CASE_MASK_INS_COMMON(BROADCASTI32X2, Z128, m) CASE_MASK_INS_COMMON(BROADCASTF32X2, Z256, r) CASE_MASK_INS_COMMON(BROADCASTI32X2, Z256, r) CASE_MASK_INS_COMMON(BROADCASTF32X2, Z256, m) @@ -395,15 +397,11 @@ static std::string getMaskName(const MCInst *MI, const char *DestName, } // MASK: zmmX {%kY} - OpMaskName += " {%"; - OpMaskName += MaskRegName; - OpMaskName += "}"; + OS << " {%" << MaskRegName << "}"; // MASKZ: zmmX {%kY} {z} if (MaskWithZero) - OpMaskName += " {z}"; - - return OpMaskName; + OS << " {z}"; } //===----------------------------------------------------------------------===// @@ -1090,6 +1088,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, DecodeSubVectorBroadcast(MVT::v16f32, MVT::v8f32, ShuffleMask); DestName = getRegName(MI->getOperand(0).getReg()); break; + CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, r) + Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); + LLVM_FALLTHROUGH; + CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z128, m) + DecodeSubVectorBroadcast(MVT::v4f32, MVT::v2f32, ShuffleMask); + DestName = getRegName(MI->getOperand(0).getReg()); + break; CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r) CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r) Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg()); @@ -1149,7 +1154,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, return false; if (!DestName) DestName = Src1Name; - OS << (DestName ? getMaskName(MI, DestName, getRegName) : "mem") << " = "; + if (DestName) { + OS << DestName; + printMasking(OS, MI, getRegName); + } else + OS << "mem"; + + OS << " = "; // If the two sources are the same, canonicalize the input elements to be // from the first src so that we get larger element spans. diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 72593878e4473..464941a1bab6b 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -43,6 +43,12 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, if (TSFlags & X86II::LOCK) OS << "\tlock\n"; + unsigned Flags = MI->getFlags(); + if (Flags & X86::IP_HAS_REPEAT_NE) + OS << "\trepne\n"; + else if (Flags & X86::IP_HAS_REPEAT) + OS << "\trep\n"; + printInstruction(MI, OS); // Next always print the annotation. diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt index 33df9ec7dcde7..8d0d9fa1215c7 100644 --- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMX86Desc X86MCCodeEmitter.cpp X86MachObjectWriter.cpp X86ELFObjectWriter.cpp - X86WinCOFFStreamer.cpp X86WinCOFFObjectWriter.cpp + X86WinCOFFStreamer.cpp + X86WinCOFFTargetStreamer.cpp ) diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 733eac7c03212..a5cecf0370054 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -389,7 +389,8 @@ class ELFX86_32AsmBackend : public ELFX86AsmBackend { ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : ELFX86AsmBackend(T, OSABI, CPU) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386); } }; @@ -399,7 +400,8 @@ class ELFX86_X32AsmBackend : public ELFX86AsmBackend { ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : ELFX86AsmBackend(T, OSABI, CPU) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_X86_64); } @@ -410,7 +412,8 @@ class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend { ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : ELFX86AsmBackend(T, OSABI, CPU) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_IAMCU); } @@ -421,7 +424,8 @@ class ELFX86_64AsmBackend : public ELFX86AsmBackend { ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) : ELFX86AsmBackend(T, OSABI, CPU) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64); } }; @@ -443,7 +447,8 @@ class WindowsX86AsmBackend : public X86AsmBackend { .Default(MCAsmBackend::getFixupKind(Name)); } - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createX86WinCOFFObjectWriter(OS, Is64Bit); } }; @@ -804,7 +809,8 @@ class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { StringRef CPU) : DarwinX86AsmBackend(T, MRI, CPU, false) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_I386, MachO::CPU_SUBTYPE_I386_ALL); @@ -824,7 +830,8 @@ class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { StringRef CPU, MachO::CPUSubTypeX86 st) : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr + createObjectWriter(raw_pwrite_stream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/true, MachO::CPU_TYPE_X86_64, Subtype); } diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index d8953da4abb2d..7c6444ba58a53 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -51,6 +51,16 @@ namespace X86 { TO_ZERO = 3, CUR_DIRECTION = 4 }; + + /// The constants to describe instr prefixes if there are + enum IPREFIXES { + IP_NO_PREFIX = 0, + IP_HAS_OP_SIZE = 1, + IP_HAS_AD_SIZE = 2, + IP_HAS_REPEAT_NE = 4, + IP_HAS_REPEAT = 8, + IP_HAS_LOCK = 16 + }; } // end namespace X86; /// X86II - This namespace holds all of the target specific flags that diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index 4da4eebec0386..4cdbae4d0d96a 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -15,6 +15,7 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include @@ -297,10 +298,9 @@ unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, return getRelocType32(Ctx, Modifier, getType32(Type), IsPCRel, Kind); } -MCObjectWriter *llvm::createX86ELFObjectWriter(raw_pwrite_stream &OS, - bool IsELF64, uint8_t OSABI, - uint16_t EMachine) { - MCELFObjectTargetWriter *MOTW = - new X86ELFObjectWriter(IsELF64, OSABI, EMachine); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); +std::unique_ptr +llvm::createX86ELFObjectWriter(raw_pwrite_stream &OS, bool IsELF64, + uint8_t OSABI, uint16_t EMachine) { + auto MOTW = llvm::make_unique(IsELF64, OSABI, EMachine); + return createELFObjectWriter(std::move(MOTW), OS, /*IsLittleEndian=*/true); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 10e2bbc64d3cf..272c6f2301459 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -380,7 +380,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op, return X86::reloc_riprel_4byte_movq_load; case X86::CALL64m: case X86::JMP64m: - case X86::TEST64rm: + case X86::TEST64mr: case X86::ADC64rm: case X86::ADD64rm: case X86::AND64rm: @@ -1108,7 +1108,7 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, EmitByte(0x66, CurByte, OS); // Emit the LOCK opcode prefix. - if (TSFlags & X86II::LOCK) + if (TSFlags & X86II::LOCK || MI.getFlags() & X86::IP_HAS_LOCK) EmitByte(0xF0, CurByte, OS); switch (TSFlags & X86II::OpPrefixMask) { @@ -1159,6 +1159,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MCII.get(Opcode); uint64_t TSFlags = Desc.TSFlags; + unsigned Flags = MI.getFlags(); // Pseudo instructions don't get encoded. if ((TSFlags & X86II::FormMask) == X86II::Pseudo) @@ -1194,8 +1195,10 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, MI, OS); // Emit the repeat opcode prefix as needed. - if (TSFlags & X86II::REP) + if (TSFlags & X86II::REP || Flags & X86::IP_HAS_REPEAT) EmitByte(0xF3, CurByte, OS); + if (Flags & X86::IP_HAS_REPEAT_NE) + EmitByte(0xF2, CurByte, OS); // Emit the address size opcode prefix as needed. bool need_address_override; diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index b33d895520b4c..cdd43478baedc 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -16,6 +16,7 @@ #include "InstPrinter/X86IntelInstPrinter.h" #include "X86MCAsmInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -72,52 +73,128 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) { MRI->mapLLVMRegToSEHReg(Reg, SEH); } - // These CodeView registers are numbered sequentially starting at value 1. - static const MCPhysReg LowCVRegs[] = { - X86::AL, X86::CL, X86::DL, X86::BL, X86::AH, X86::CH, - X86::DH, X86::BH, X86::AX, X86::CX, X86::DX, X86::BX, - X86::SP, X86::BP, X86::SI, X86::DI, X86::EAX, X86::ECX, - X86::EDX, X86::EBX, X86::ESP, X86::EBP, X86::ESI, X86::EDI, + // Mapping from CodeView to MC register id. + static const struct { + codeview::RegisterId CVReg; + MCPhysReg Reg; + } RegMap[] = { + { codeview::RegisterId::AL, X86::AL}, + { codeview::RegisterId::CL, X86::CL}, + { codeview::RegisterId::DL, X86::DL}, + { codeview::RegisterId::BL, X86::BL}, + { codeview::RegisterId::AH, X86::AH}, + { codeview::RegisterId::CH, X86::CH}, + { codeview::RegisterId::DH, X86::DH}, + { codeview::RegisterId::BH, X86::BH}, + { codeview::RegisterId::AX, X86::AX}, + { codeview::RegisterId::CX, X86::CX}, + { codeview::RegisterId::DX, X86::DX}, + { codeview::RegisterId::BX, X86::BX}, + { codeview::RegisterId::SP, X86::SP}, + { codeview::RegisterId::BP, X86::BP}, + { codeview::RegisterId::SI, X86::SI}, + { codeview::RegisterId::DI, X86::DI}, + { codeview::RegisterId::EAX, X86::EAX}, + { codeview::RegisterId::ECX, X86::ECX}, + { codeview::RegisterId::EDX, X86::EDX}, + { codeview::RegisterId::EBX, X86::EBX}, + { codeview::RegisterId::ESP, X86::ESP}, + { codeview::RegisterId::EBP, X86::EBP}, + { codeview::RegisterId::ESI, X86::ESI}, + { codeview::RegisterId::EDI, X86::EDI}, + + { codeview::RegisterId::EFLAGS, X86::EFLAGS}, + + { codeview::RegisterId::ST0, X86::FP0}, + { codeview::RegisterId::ST1, X86::FP1}, + { codeview::RegisterId::ST2, X86::FP2}, + { codeview::RegisterId::ST3, X86::FP3}, + { codeview::RegisterId::ST4, X86::FP4}, + { codeview::RegisterId::ST5, X86::FP5}, + { codeview::RegisterId::ST6, X86::FP6}, + { codeview::RegisterId::ST7, X86::FP7}, + + { codeview::RegisterId::XMM0, X86::XMM0}, + { codeview::RegisterId::XMM1, X86::XMM1}, + { codeview::RegisterId::XMM2, X86::XMM2}, + { codeview::RegisterId::XMM3, X86::XMM3}, + { codeview::RegisterId::XMM4, X86::XMM4}, + { codeview::RegisterId::XMM5, X86::XMM5}, + { codeview::RegisterId::XMM6, X86::XMM6}, + { codeview::RegisterId::XMM7, X86::XMM7}, + + { codeview::RegisterId::XMM8, X86::XMM8}, + { codeview::RegisterId::XMM9, X86::XMM9}, + { codeview::RegisterId::XMM10, X86::XMM10}, + { codeview::RegisterId::XMM11, X86::XMM11}, + { codeview::RegisterId::XMM12, X86::XMM12}, + { codeview::RegisterId::XMM13, X86::XMM13}, + { codeview::RegisterId::XMM14, X86::XMM14}, + { codeview::RegisterId::XMM15, X86::XMM15}, + + { codeview::RegisterId::SIL, X86::SIL}, + { codeview::RegisterId::DIL, X86::DIL}, + { codeview::RegisterId::BPL, X86::BPL}, + { codeview::RegisterId::SPL, X86::SPL}, + { codeview::RegisterId::RAX, X86::RAX}, + { codeview::RegisterId::RBX, X86::RBX}, + { codeview::RegisterId::RCX, X86::RCX}, + { codeview::RegisterId::RDX, X86::RDX}, + { codeview::RegisterId::RSI, X86::RSI}, + { codeview::RegisterId::RDI, X86::RDI}, + { codeview::RegisterId::RBP, X86::RBP}, + { codeview::RegisterId::RSP, X86::RSP}, + { codeview::RegisterId::R8, X86::R8}, + { codeview::RegisterId::R9, X86::R9}, + { codeview::RegisterId::R10, X86::R10}, + { codeview::RegisterId::R11, X86::R11}, + { codeview::RegisterId::R12, X86::R12}, + { codeview::RegisterId::R13, X86::R13}, + { codeview::RegisterId::R14, X86::R14}, + { codeview::RegisterId::R15, X86::R15}, + { codeview::RegisterId::R8B, X86::R8B}, + { codeview::RegisterId::R9B, X86::R9B}, + { codeview::RegisterId::R10B, X86::R10B}, + { codeview::RegisterId::R11B, X86::R11B}, + { codeview::RegisterId::R12B, X86::R12B}, + { codeview::RegisterId::R13B, X86::R13B}, + { codeview::RegisterId::R14B, X86::R14B}, + { codeview::RegisterId::R15B, X86::R15B}, + { codeview::RegisterId::R8W, X86::R8W}, + { codeview::RegisterId::R9W, X86::R9W}, + { codeview::RegisterId::R10W, X86::R10W}, + { codeview::RegisterId::R11W, X86::R11W}, + { codeview::RegisterId::R12W, X86::R12W}, + { codeview::RegisterId::R13W, X86::R13W}, + { codeview::RegisterId::R14W, X86::R14W}, + { codeview::RegisterId::R15W, X86::R15W}, + { codeview::RegisterId::R8D, X86::R8D}, + { codeview::RegisterId::R9D, X86::R9D}, + { codeview::RegisterId::R10D, X86::R10D}, + { codeview::RegisterId::R11D, X86::R11D}, + { codeview::RegisterId::R12D, X86::R12D}, + { codeview::RegisterId::R13D, X86::R13D}, + { codeview::RegisterId::R14D, X86::R14D}, + { codeview::RegisterId::R15D, X86::R15D}, + { codeview::RegisterId::AMD64_YMM0, X86::YMM0}, + { codeview::RegisterId::AMD64_YMM1, X86::YMM1}, + { codeview::RegisterId::AMD64_YMM2, X86::YMM2}, + { codeview::RegisterId::AMD64_YMM3, X86::YMM3}, + { codeview::RegisterId::AMD64_YMM4, X86::YMM4}, + { codeview::RegisterId::AMD64_YMM5, X86::YMM5}, + { codeview::RegisterId::AMD64_YMM6, X86::YMM6}, + { codeview::RegisterId::AMD64_YMM7, X86::YMM7}, + { codeview::RegisterId::AMD64_YMM8, X86::YMM8}, + { codeview::RegisterId::AMD64_YMM9, X86::YMM9}, + { codeview::RegisterId::AMD64_YMM10, X86::YMM10}, + { codeview::RegisterId::AMD64_YMM11, X86::YMM11}, + { codeview::RegisterId::AMD64_YMM12, X86::YMM12}, + { codeview::RegisterId::AMD64_YMM13, X86::YMM13}, + { codeview::RegisterId::AMD64_YMM14, X86::YMM14}, + { codeview::RegisterId::AMD64_YMM15, X86::YMM15}, }; - unsigned CVLowRegStart = 1; - for (unsigned I = 0; I < array_lengthof(LowCVRegs); ++I) - MRI->mapLLVMRegToCVReg(LowCVRegs[I], I + CVLowRegStart); - - MRI->mapLLVMRegToCVReg(X86::EFLAGS, 34); - - // The x87 registers start at 128 and are numbered sequentially. - unsigned FP0Start = 128; - for (unsigned I = 0; I < 8; ++I) - MRI->mapLLVMRegToCVReg(X86::FP0 + I, FP0Start + I); - - // The low 8 XMM registers start at 154 and are numbered sequentially. - unsigned CVXMM0Start = 154; - for (unsigned I = 0; I < 8; ++I) - MRI->mapLLVMRegToCVReg(X86::XMM0 + I, CVXMM0Start + I); - - // The high 8 XMM registers start at 252 and are numbered sequentially. - unsigned CVXMM8Start = 252; - for (unsigned I = 0; I < 8; ++I) - MRI->mapLLVMRegToCVReg(X86::XMM8 + I, CVXMM8Start + I); - - // FIXME: XMM16 and above from AVX512 not yet documented. - - // AMD64 registers start at 324 and count up. - unsigned CVX64RegStart = 324; - static const MCPhysReg CVX64Regs[] = { - X86::SIL, X86::DIL, X86::BPL, X86::SPL, X86::RAX, X86::RBX, - X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBP, X86::RSP, - X86::R8, X86::R9, X86::R10, X86::R11, X86::R12, X86::R13, - X86::R14, X86::R15, X86::R8B, X86::R9B, X86::R10B, X86::R11B, - X86::R12B, X86::R13B, X86::R14B, X86::R15B, X86::R8W, X86::R9W, - X86::R10W, X86::R11W, X86::R12W, X86::R13W, X86::R14W, X86::R15W, - X86::R8D, X86::R9D, X86::R10D, X86::R11D, X86::R12D, X86::R13D, - X86::R14D, X86::R15D, X86::YMM0, X86::YMM1, X86::YMM2, X86::YMM3, - X86::YMM4, X86::YMM5, X86::YMM6, X86::YMM7, X86::YMM8, X86::YMM9, - X86::YMM10, X86::YMM11, X86::YMM12, X86::YMM13, X86::YMM14, X86::YMM15, - }; - for (unsigned I = 0; I < array_lengthof(CVX64Regs); ++I) - MRI->mapLLVMRegToCVReg(CVX64Regs[I], CVX64RegStart + I); + for (unsigned I = 0; I < array_lengthof(RegMap); ++I) + MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast(RegMap[I].CVReg)); } MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT, @@ -242,7 +319,13 @@ extern "C" void LLVMInitializeX86TargetMC() { // Register the code emitter. TargetRegistry::RegisterMCCodeEmitter(*T, createX86MCCodeEmitter); - // Register the object streamer. + // Register the obj target streamer. + TargetRegistry::RegisterObjectTargetStreamer(*T, + createX86ObjectTargetStreamer); + + // Register the asm target streamer. + TargetRegistry::RegisterAsmTargetStreamer(*T, createX86AsmTargetStreamer); + TargetRegistry::RegisterCOFFStreamer(*T, createX86WinCOFFStreamer); // Register the MCInstPrinter. diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index f73e734b9b0e6..c5859b600ad2b 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -77,25 +77,41 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU, const MCTargetOptions &Options); +/// Implements X86-only directives for assembly emission. +MCTargetStreamer *createX86AsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrint, + bool isVerboseAsm); + +/// Implements X86-only directives for object files. +MCTargetStreamer *createX86ObjectTargetStreamer(MCStreamer &OS, + const MCSubtargetInfo &STI); + /// Construct an X86 Windows COFF machine code streamer which will generate /// PE/COFF format object files. /// /// Takes ownership of \p AB and \p CE. -MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, - raw_pwrite_stream &OS, MCCodeEmitter *CE, - bool RelaxAll, bool IncrementalLinkerCompatible); +MCStreamer *createX86WinCOFFStreamer(MCContext &C, + std::unique_ptr &&AB, + raw_pwrite_stream &OS, + std::unique_ptr &&CE, + bool RelaxAll, + bool IncrementalLinkerCompatible); /// Construct an X86 Mach-O object writer. -MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, - uint32_t CPUType, - uint32_t CPUSubtype); +std::unique_ptr createX86MachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); /// Construct an X86 ELF object writer. -MCObjectWriter *createX86ELFObjectWriter(raw_pwrite_stream &OS, bool IsELF64, - uint8_t OSABI, uint16_t EMachine); +std::unique_ptr createX86ELFObjectWriter(raw_pwrite_stream &OS, + bool IsELF64, + uint8_t OSABI, + uint16_t EMachine); /// Construct an X86 Win COFF object writer. -MCObjectWriter *createX86WinCOFFObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit); +std::unique_ptr +createX86WinCOFFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit); /// Returns the sub or super register of a specific X86 register. /// e.g. getX86SubSuperRegister(X86::EAX, 16) returns X86::AX. diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 8f2017e990c51..965f7de809b31 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -597,11 +597,10 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createX86MachObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) { - return createMachObjectWriter(new X86MachObjectWriter(Is64Bit, - CPUType, - CPUSubtype), - OS, /*IsLittleEndian=*/true); +std::unique_ptr +llvm::createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, + uint32_t CPUType, uint32_t CPUSubtype) { + return createMachObjectWriter( + llvm::make_unique(Is64Bit, CPUType, CPUSubtype), OS, + /*IsLittleEndian=*/true); } diff --git a/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h b/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h new file mode 100644 index 0000000000000..8d38cd32b82c9 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h @@ -0,0 +1,34 @@ +//===- X86TargetStreamer.h ------------------------------*- C++ -*---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_MCTARGETDESC_X86TARGETSTREAMER_H +#define LLVM_LIB_TARGET_X86_MCTARGETDESC_X86TARGETSTREAMER_H + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +/// X86 target streamer implementing x86-only assembly directives. +class X86TargetStreamer : public MCTargetStreamer { +public: + X86TargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + + virtual bool emitFPOProc(const MCSymbol *ProcSym, unsigned ParamsSize, + SMLoc L = {}) = 0; + virtual bool emitFPOEndPrologue(SMLoc L = {}) = 0; + virtual bool emitFPOEndProc(SMLoc L = {}) = 0; + virtual bool emitFPOData(const MCSymbol *ProcSym, SMLoc L = {}) = 0; + virtual bool emitFPOPushReg(unsigned Reg, SMLoc L = {}) = 0; + virtual bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L = {}) = 0; + virtual bool emitFPOSetFrame(unsigned Reg, SMLoc L = {}) = 0; +}; + +} // end namespace llvm + +#endif diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index 807f7a6ddb198..5139bb46b5612 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/Support/ErrorHandling.h" @@ -104,8 +105,8 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx, llvm_unreachable("Unsupported COFF machine type."); } -MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit) { - MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit); - return createWinCOFFObjectWriter(MOTW, OS); +std::unique_ptr +llvm::createX86WinCOFFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit) { + auto MOTW = llvm::make_unique(Is64Bit); + return createWinCOFFObjectWriter(std::move(MOTW), OS); } diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index d04511873b46a..5b1357ae4a7be 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -8,6 +8,9 @@ //===----------------------------------------------------------------------===// #include "X86MCTargetDesc.h" +#include "X86TargetStreamer.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCWin64EH.h" #include "llvm/MC/MCWinCOFFStreamer.h" @@ -17,17 +20,18 @@ namespace { class X86WinCOFFStreamer : public MCWinCOFFStreamer { Win64EH::UnwindEmitter EHStreamer; public: - X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE, - raw_pwrite_stream &OS) - : MCWinCOFFStreamer(C, AB, *CE, OS) {} + X86WinCOFFStreamer(MCContext &C, std::unique_ptr AB, + std::unique_ptr CE, raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), OS) {} - void EmitWinEHHandlerData() override; + void EmitWinEHHandlerData(SMLoc Loc) override; void EmitWindowsUnwindTables() override; + void EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) override; void FinishImpl() override; }; -void X86WinCOFFStreamer::EmitWinEHHandlerData() { - MCStreamer::EmitWinEHHandlerData(); +void X86WinCOFFStreamer::EmitWinEHHandlerData(SMLoc Loc) { + MCStreamer::EmitWinEHHandlerData(Loc); // We have to emit the unwind info now, because this directive // actually switches to the .xdata section! @@ -40,6 +44,12 @@ void X86WinCOFFStreamer::EmitWindowsUnwindTables() { EHStreamer.Emit(*this); } +void X86WinCOFFStreamer::EmitCVFPOData(const MCSymbol *ProcSym, SMLoc Loc) { + X86TargetStreamer *XTS = + static_cast(getTargetStreamer()); + XTS->emitFPOData(ProcSym, Loc); +} + void X86WinCOFFStreamer::FinishImpl() { EmitFrames(nullptr); EmitWindowsUnwindTables(); @@ -48,11 +58,14 @@ void X86WinCOFFStreamer::FinishImpl() { } } -MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, +MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, + std::unique_ptr &&AB, raw_pwrite_stream &OS, - MCCodeEmitter *CE, bool RelaxAll, + std::unique_ptr &&CE, + bool RelaxAll, bool IncrementalLinkerCompatible) { - X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS); + X86WinCOFFStreamer *S = + new X86WinCOFFStreamer(C, std::move(AB), std::move(CE), OS); S->getAssembler().setRelaxAll(RelaxAll); S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); return S; diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp new file mode 100644 index 0000000000000..093dab4f2f964 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp @@ -0,0 +1,415 @@ +//===-- X86WinCOFFTargetStreamer.cpp ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86MCTargetDesc.h" +#include "X86TargetStreamer.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/MC/MCCodeView.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/FormattedStream.h" + +using namespace llvm; +using namespace llvm::codeview; + +namespace { +/// Implements Windows x86-only directives for assembly emission. +class X86WinCOFFAsmTargetStreamer : public X86TargetStreamer { + formatted_raw_ostream &OS; + MCInstPrinter &InstPrinter; + +public: + X86WinCOFFAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, + MCInstPrinter &InstPrinter) + : X86TargetStreamer(S), OS(OS), InstPrinter(InstPrinter) {} + + bool emitFPOProc(const MCSymbol *ProcSym, unsigned ParamsSize, + SMLoc L) override; + bool emitFPOEndPrologue(SMLoc L) override; + bool emitFPOEndProc(SMLoc L) override; + bool emitFPOData(const MCSymbol *ProcSym, SMLoc L) override; + bool emitFPOPushReg(unsigned Reg, SMLoc L) override; + bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) override; + bool emitFPOSetFrame(unsigned Reg, SMLoc L) override; +}; + +/// Represents a single FPO directive. +struct FPOInstruction { + MCSymbol *Label; + enum Operation { + PushReg, + StackAlloc, + SetFrame, + } Op; + unsigned RegOrOffset; +}; + +struct FPOData { + const MCSymbol *Function = nullptr; + MCSymbol *Begin = nullptr; + MCSymbol *PrologueEnd = nullptr; + MCSymbol *End = nullptr; + unsigned ParamsSize = 0; + + SmallVector Instructions; +}; + +/// Implements Windows x86-only directives for object emission. +class X86WinCOFFTargetStreamer : public X86TargetStreamer { + /// Map from function symbol to its FPO data. + DenseMap> AllFPOData; + + /// Current FPO data created by .cv_fpo_proc. + std::unique_ptr CurFPOData; + + bool haveOpenFPOData() { return !!CurFPOData; } + + /// Diagnoses an error at L if we are not in an FPO prologue. Return true on + /// error. + bool checkInFPOPrologue(SMLoc L); + + MCSymbol *emitFPOLabel(); + + MCContext &getContext() { return getStreamer().getContext(); } + +public: + X86WinCOFFTargetStreamer(MCStreamer &S) : X86TargetStreamer(S) {} + + bool emitFPOProc(const MCSymbol *ProcSym, unsigned ParamsSize, + SMLoc L) override; + bool emitFPOEndPrologue(SMLoc L) override; + bool emitFPOEndProc(SMLoc L) override; + bool emitFPOData(const MCSymbol *ProcSym, SMLoc L) override; + bool emitFPOPushReg(unsigned Reg, SMLoc L) override; + bool emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) override; + bool emitFPOSetFrame(unsigned Reg, SMLoc L) override; +}; +} // end namespace + +bool X86WinCOFFAsmTargetStreamer::emitFPOProc(const MCSymbol *ProcSym, + unsigned ParamsSize, SMLoc L) { + OS << "\t.cv_fpo_proc\t"; + ProcSym->print(OS, getStreamer().getContext().getAsmInfo()); + OS << ' ' << ParamsSize << '\n'; + return false; +} + +bool X86WinCOFFAsmTargetStreamer::emitFPOEndPrologue(SMLoc L) { + OS << "\t.cv_fpo_endprologue\n"; + return false; +} + +bool X86WinCOFFAsmTargetStreamer::emitFPOEndProc(SMLoc L) { + OS << "\t.cv_fpo_endproc\n"; + return false; +} + +bool X86WinCOFFAsmTargetStreamer::emitFPOData(const MCSymbol *ProcSym, + SMLoc L) { + OS << "\t.cv_fpo_data\t"; + ProcSym->print(OS, getStreamer().getContext().getAsmInfo()); + OS << '\n'; + return false; +} + +bool X86WinCOFFAsmTargetStreamer::emitFPOPushReg(unsigned Reg, SMLoc L) { + OS << "\t.cv_fpo_pushreg\t"; + InstPrinter.printRegName(OS, Reg); + OS << '\n'; + return false; +} + +bool X86WinCOFFAsmTargetStreamer::emitFPOStackAlloc(unsigned StackAlloc, + SMLoc L) { + OS << "\t.cv_fpo_stackalloc\t" << StackAlloc << '\n'; + return false; +} + +bool X86WinCOFFAsmTargetStreamer::emitFPOSetFrame(unsigned Reg, SMLoc L) { + OS << "\t.cv_fpo_setframe\t"; + InstPrinter.printRegName(OS, Reg); + OS << '\n'; + return false; +} + +bool X86WinCOFFTargetStreamer::checkInFPOPrologue(SMLoc L) { + if (!haveOpenFPOData() || CurFPOData->PrologueEnd) { + getContext().reportError( + L, + "directive must appear between .cv_fpo_proc and .cv_fpo_endprologue"); + return true; + } + return false; +} + +MCSymbol *X86WinCOFFTargetStreamer::emitFPOLabel() { + MCSymbol *Label = getContext().createTempSymbol("cfi", true); + getStreamer().EmitLabel(Label); + return Label; +} + +bool X86WinCOFFTargetStreamer::emitFPOProc(const MCSymbol *ProcSym, + unsigned ParamsSize, SMLoc L) { + if (haveOpenFPOData()) { + getContext().reportError( + L, "opening new .cv_fpo_proc before closing previous frame"); + return true; + } + CurFPOData = llvm::make_unique(); + CurFPOData->Function = ProcSym; + CurFPOData->Begin = emitFPOLabel(); + CurFPOData->ParamsSize = ParamsSize; + return false; +} + +bool X86WinCOFFTargetStreamer::emitFPOEndProc(SMLoc L) { + if (!haveOpenFPOData()) { + getContext().reportError(L, ".cv_fpo_endproc must appear after .cv_proc"); + return true; + } + if (!CurFPOData->PrologueEnd) { + // Complain if there were prologue setup instructions but no end prologue. + if (!CurFPOData->Instructions.empty()) { + getContext().reportError(L, "missing .cv_fpo_endprologue"); + CurFPOData->Instructions.clear(); + } + + // Claim there is a zero-length prologue to make the label math work out + // later. + CurFPOData->PrologueEnd = CurFPOData->Begin; + } + + CurFPOData->End = emitFPOLabel(); + const MCSymbol *Fn = CurFPOData->Function; + AllFPOData.insert({Fn, std::move(CurFPOData)}); + return false; +} + +bool X86WinCOFFTargetStreamer::emitFPOSetFrame(unsigned Reg, SMLoc L) { + if (checkInFPOPrologue(L)) + return true; + FPOInstruction Inst; + Inst.Label = emitFPOLabel(); + Inst.Op = FPOInstruction::SetFrame; + Inst.RegOrOffset = Reg; + CurFPOData->Instructions.push_back(Inst); + return false; +} + +bool X86WinCOFFTargetStreamer::emitFPOPushReg(unsigned Reg, SMLoc L) { + if (checkInFPOPrologue(L)) + return true; + FPOInstruction Inst; + Inst.Label = emitFPOLabel(); + Inst.Op = FPOInstruction::PushReg; + Inst.RegOrOffset = Reg; + CurFPOData->Instructions.push_back(Inst); + return false; +} + +bool X86WinCOFFTargetStreamer::emitFPOStackAlloc(unsigned StackAlloc, SMLoc L) { + if (checkInFPOPrologue(L)) + return true; + FPOInstruction Inst; + Inst.Label = emitFPOLabel(); + Inst.Op = FPOInstruction::StackAlloc; + Inst.RegOrOffset = StackAlloc; + CurFPOData->Instructions.push_back(Inst); + return false; +} + +bool X86WinCOFFTargetStreamer::emitFPOEndPrologue(SMLoc L) { + if (checkInFPOPrologue(L)) + return true; + CurFPOData->PrologueEnd = emitFPOLabel(); + return false; +} + +namespace { +struct RegSaveOffset { + RegSaveOffset(unsigned Reg, unsigned Offset) : Reg(Reg), Offset(Offset) {} + + unsigned Reg = 0; + unsigned Offset = 0; +}; + +struct FPOStateMachine { + explicit FPOStateMachine(const FPOData *FPO) : FPO(FPO) {} + + const FPOData *FPO = nullptr; + unsigned FrameReg = 0; + unsigned FrameRegOff = 0; + unsigned CurOffset = 0; + unsigned LocalSize = 0; + unsigned SavedRegSize = 0; + unsigned Flags = 0; // FIXME: Set HasSEH / HasEH. + + SmallString<128> FrameFunc; + + SmallVector RegSaveOffsets; + + void emitFrameDataRecord(MCStreamer &OS, MCSymbol *Label); +}; +} // end namespace + +static Printable printFPOReg(const MCRegisterInfo *MRI, unsigned LLVMReg) { + return Printable([MRI, LLVMReg](raw_ostream &OS) { + switch (LLVMReg) { + // MSVC only seems to emit symbolic register names for EIP, EBP, and ESP, + // but the format seems to support more than that, so we emit them. + case X86::EAX: OS << "$eax"; break; + case X86::EBX: OS << "$ebx"; break; + case X86::ECX: OS << "$ecx"; break; + case X86::EDX: OS << "$edx"; break; + case X86::EDI: OS << "$edi"; break; + case X86::ESI: OS << "$esi"; break; + case X86::ESP: OS << "$esp"; break; + case X86::EBP: OS << "$ebp"; break; + case X86::EIP: OS << "$eip"; break; + // Otherwise, get the codeview register number and print $N. + default: + OS << '$' << MRI->getCodeViewRegNum(LLVMReg); + break; + } + }); +} + +void FPOStateMachine::emitFrameDataRecord(MCStreamer &OS, MCSymbol *Label) { + unsigned CurFlags = Flags; + if (Label == FPO->Begin) + CurFlags |= FrameData::IsFunctionStart; + + // Compute the new FrameFunc string. + FrameFunc.clear(); + raw_svector_ostream FuncOS(FrameFunc); + const MCRegisterInfo *MRI = OS.getContext().getRegisterInfo(); + if (FrameReg) { + // CFA is FrameReg + FrameRegOff. + FuncOS << "$T0 " << printFPOReg(MRI, FrameReg) << " " << FrameRegOff + << " + = "; + } else { + // The address of return address is ESP + CurOffset, but we use .raSearch to + // match MSVC. This seems to ask the debugger to subtract some combination + // of LocalSize and SavedRegSize from ESP and grovel around in that memory + // to find the address of a plausible return address. + FuncOS << "$T0 .raSearch = "; + } + + // Caller's $eip should be dereferenced CFA, and $esp should be CFA plus 4. + FuncOS << "$eip $T0 ^ = $esp $T0 4 + = "; + + // Each saved register is stored at an unchanging negative CFA offset. + for (RegSaveOffset RO : RegSaveOffsets) + FuncOS << printFPOReg(MRI, RO.Reg) << " $T0 " << RO.Offset << " - ^ = "; + + // Add it to the CV string table. + CodeViewContext &CVCtx = OS.getContext().getCVContext(); + unsigned FrameFuncStrTabOff = CVCtx.addToStringTable(FuncOS.str()).second; + + // MSVC has only ever been observed to emit a MaxStackSize of zero. + unsigned MaxStackSize = 0; + + // The FrameData record format is: + // ulittle32_t RvaStart; + // ulittle32_t CodeSize; + // ulittle32_t LocalSize; + // ulittle32_t ParamsSize; + // ulittle32_t MaxStackSize; + // ulittle32_t FrameFunc; // String table offset + // ulittle16_t PrologSize; + // ulittle16_t SavedRegsSize; + // ulittle32_t Flags; + + OS.emitAbsoluteSymbolDiff(Label, FPO->Begin, 4); // RvaStart + OS.emitAbsoluteSymbolDiff(FPO->End, Label, 4); // CodeSize + OS.EmitIntValue(LocalSize, 4); + OS.EmitIntValue(FPO->ParamsSize, 4); + OS.EmitIntValue(MaxStackSize, 4); + OS.EmitIntValue(FrameFuncStrTabOff, 4); // FrameFunc + OS.emitAbsoluteSymbolDiff(FPO->PrologueEnd, Label, 2); + OS.EmitIntValue(SavedRegSize, 2); + OS.EmitIntValue(CurFlags, 4); +} + +/// Compute and emit the real CodeView FrameData subsection. +bool X86WinCOFFTargetStreamer::emitFPOData(const MCSymbol *ProcSym, SMLoc L) { + MCStreamer &OS = getStreamer(); + MCContext &Ctx = OS.getContext(); + + auto I = AllFPOData.find(ProcSym); + if (I == AllFPOData.end()) { + Ctx.reportError(L, Twine("no FPO data found for symbol ") + + ProcSym->getName()); + return true; + } + const FPOData *FPO = I->second.get(); + assert(FPO->Begin && FPO->End && FPO->PrologueEnd && "missing FPO label"); + + MCSymbol *FrameBegin = Ctx.createTempSymbol(), + *FrameEnd = Ctx.createTempSymbol(); + + OS.EmitIntValue(unsigned(DebugSubsectionKind::FrameData), 4); + OS.emitAbsoluteSymbolDiff(FrameEnd, FrameBegin, 4); + OS.EmitLabel(FrameBegin); + + // Start with the RVA of the function in question. + OS.EmitValue(MCSymbolRefExpr::create(FPO->Function, + MCSymbolRefExpr::VK_COFF_IMGREL32, Ctx), + 4); + + // Emit a sequence of FrameData records. + FPOStateMachine FSM(FPO); + + FSM.emitFrameDataRecord(OS, FPO->Begin); + for (const FPOInstruction &Inst : FPO->Instructions) { + switch (Inst.Op) { + case FPOInstruction::PushReg: + FSM.CurOffset += 4; + FSM.SavedRegSize += 4; + FSM.RegSaveOffsets.push_back({Inst.RegOrOffset, FSM.CurOffset}); + break; + case FPOInstruction::SetFrame: + FSM.FrameReg = Inst.RegOrOffset; + FSM.FrameRegOff = FSM.CurOffset; + break; + case FPOInstruction::StackAlloc: + FSM.CurOffset += Inst.RegOrOffset; + FSM.LocalSize += Inst.RegOrOffset; + // No need to emit FrameData for stack allocations with a frame pointer. + if (FSM.FrameReg) + continue; + break; + } + FSM.emitFrameDataRecord(OS, Inst.Label); + } + + OS.EmitValueToAlignment(4, 0); + OS.EmitLabel(FrameEnd); + return false; +} + +MCTargetStreamer *llvm::createX86AsmTargetStreamer(MCStreamer &S, + formatted_raw_ostream &OS, + MCInstPrinter *InstPrinter, + bool IsVerboseAsm) { + // FIXME: This makes it so we textually assemble COFF directives on ELF. + // That's kind of nonsensical. + return new X86WinCOFFAsmTargetStreamer(S, OS, *InstPrinter); +} + +MCTargetStreamer * +llvm::createX86ObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + // No need to register a target streamer. + if (!STI.getTargetTriple().isOSBinFormatCOFF()) + return nullptr; + // Registers itself to the MCStreamer. + return new X86WinCOFFTargetStreamer(S); +} diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 024a26efa99ce..5631648d2dc89 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -92,6 +92,10 @@ FunctionPass *createX86CmovConverterPass(); /// the upper portions of registers, and to save code size. FunctionPass *createX86FixupBWInsts(); +/// Return a Machine IR pass that reassigns instruction chains from one domain +/// to another, when profitable. +FunctionPass *createX86DomainReassignmentPass(); + void initializeFixupBWInstPassPass(PassRegistry &); /// This pass replaces EVEX encoded of AVX-512 instructiosn by VEX diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 10d8031963ab1..840b3b40d4e25 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -95,8 +95,6 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", "64-bit with cmpxchg16b", [Feature64Bit]>; -def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", - "Bit testing of memory is slow">; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", @@ -336,7 +334,7 @@ def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, def : Proc<"pentium3", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>; def : Proc<"pentium3m", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureSSE1, FeatureFXSR, FeatureSlowBTMem]>; + FeatureSSE1, FeatureFXSR]>; // Enable the PostRAScheduler for SSE2 and SSE3 class cpus. // The intent is to enable it for pentium4 which is the current default @@ -350,7 +348,7 @@ def : Proc<"pentium3m", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, def : ProcessorModel<"pentium-m", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>; + FeatureSSE2, FeatureFXSR]>; def : ProcessorModel<"pentium4", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, @@ -358,7 +356,7 @@ def : ProcessorModel<"pentium4", GenericPostRAModel, def : ProcessorModel<"pentium4m", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>; + FeatureSSE2, FeatureFXSR]>; // Intel Quark. def : Proc<"lakemont", []>; @@ -366,20 +364,19 @@ def : Proc<"lakemont", []>; // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, - FeatureFXSR, FeatureSlowBTMem]>; + FeatureFXSR]>; // NetBurst. def : ProcessorModel<"prescott", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, - FeatureFXSR, FeatureSlowBTMem]>; + FeatureFXSR]>; def : ProcessorModel<"nocona", GenericPostRAModel, [ FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, - FeatureCMPXCHG16B, - FeatureSlowBTMem + FeatureCMPXCHG16B ]>; // Intel Core 2 Solo/Duo. @@ -390,7 +387,6 @@ def : ProcessorModel<"core2", SandyBridgeModel, [ FeatureSSSE3, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureLAHFSAHF, FeatureMacroFusion ]>; @@ -401,7 +397,6 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureSSE41, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureLAHFSAHF, FeatureMacroFusion ]>; @@ -416,7 +411,6 @@ class BonnellProc : ProcessorModel : ProcessorModel; @@ -466,7 +459,6 @@ class GoldmontProc : ProcessorModel : ProcessorModel : ProcessorModel : ProcModel; def : SandyBridgeProc<"sandybridge">; @@ -561,7 +550,6 @@ def IVBFeatures : ProcessorFeatures : ProcModel; def : IvyBridgeProc<"ivybridge">; @@ -579,18 +567,19 @@ def HSWFeatures : ProcessorFeatures : ProcModel; + ProcIntelHSW +]>; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. def BDWFeatures : ProcessorFeatures; -class BroadwellProc : ProcModel; +class BroadwellProc : ProcModel; def : BroadwellProc<"broadwell">; def SKLFeatures : ProcessorFeatures : ProcModel; + ProcIntelSKL +]>; def : SkylakeClientProc<"skylake">; -// FIXME: define KNL model -class KnightsLandingProc : ProcModel : ProcModel; + +// FIXME: define KNL model +class KnightsLandingProc : ProcModel; def : KnightsLandingProc<"knl">; +class KnightsMillProc : ProcModel; +def : KnightsMillProc<"knm">; // TODO Add AVX5124FMAPS/AVX5124VNNIW features + def SKXFeatures : ProcessorFeatures; -// FIXME: define SKX model -class SkylakeServerProc : ProcModel : ProcModel; + ProcIntelSKX +]>; def : SkylakeServerProc<"skylake-avx512">; def : SkylakeServerProc<"skx">; // Legacy alias. @@ -656,8 +656,8 @@ def CNLFeatures : ProcessorFeatures : ProcModel; + ProcIntelCNL +]>; def : CannonlakeProc<"cannonlake">; // AMD CPUs. @@ -666,46 +666,43 @@ def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"athlon", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"athlon-tbird", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA, - FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"athlon-4", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1, - Feature3DNowA, FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>; +def : Proc<"athlon-4", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1, + Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>; def : Proc<"athlon-xp", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1, - Feature3DNowA, FeatureFXSR, FeatureSlowBTMem, - FeatureSlowSHLD]>; + Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>; def : Proc<"athlon-mp", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1, - Feature3DNowA, FeatureFXSR, FeatureSlowBTMem, - FeatureSlowSHLD]>; + Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>; def : Proc<"k8", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, FeatureFXSR, Feature64Bit, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"opteron", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, FeatureFXSR, Feature64Bit, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"athlon64", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, FeatureFXSR, Feature64Bit, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"athlon-fx", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, FeatureFXSR, Feature64Bit, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"k8-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"opteron-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"athlon64-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"amdfam10", [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD, + FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF]>; def : Proc<"barcelona", [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD, + FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF]>; // Bobcat @@ -911,7 +908,6 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [ FeatureFXSR, Feature64Bit, FeatureSlow3OpsLEA, - FeatureSlowBTMem, FeatureSlowIncDec, FeatureMacroFusion ]>; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index dc15aeadaa619..1c938d9c84235 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -15,6 +15,7 @@ #include "X86AsmPrinter.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86TargetStreamer.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" #include "llvm/BinaryFormat/COFF.h" @@ -40,6 +41,10 @@ #include "llvm/Support/TargetRegistry.h" using namespace llvm; +X86AsmPrinter::X86AsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)), SM(*this), FM(*this) {} + //===----------------------------------------------------------------------===// // Primitive Helper Functions. //===----------------------------------------------------------------------===// @@ -51,9 +56,12 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { SMShadowTracker.startFunction(MF); CodeEmitter.reset(TM.getTarget().createMCCodeEmitter( - *MF.getSubtarget().getInstrInfo(), *MF.getSubtarget().getRegisterInfo(), + *Subtarget->getInstrInfo(), *Subtarget->getRegisterInfo(), MF.getContext())); + EmitFPOData = + Subtarget->isTargetWin32() && MF.getMMI().getModule()->getCodeViewFlag(); + SetupMachineFunction(MF); if (Subtarget->isTargetCOFF()) { @@ -72,10 +80,30 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the XRay table for this function. emitXRayTable(); + EmitFPOData = false; + // We didn't modify anything. return false; } +void X86AsmPrinter::EmitFunctionBodyStart() { + if (EmitFPOData) { + X86TargetStreamer *XTS = + static_cast(OutStreamer->getTargetStreamer()); + unsigned ParamsSize = + MF->getInfo()->getArgumentStackSize(); + XTS->emitFPOProc(CurrentFnSym, ParamsSize); + } +} + +void X86AsmPrinter::EmitFunctionBodyEnd() { + if (EmitFPOData) { + X86TargetStreamer *XTS = + static_cast(OutStreamer->getTargetStreamer()); + XTS->emitFPOEndProc(); + } +} + /// printSymbolOperand - Print a raw symbol reference operand. This handles /// jump tables, constant pools, global address and external symbols, all of /// which print to a label with various suffixes for relocation types etc. diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index d7c3b74d3efb2..08d7734517932 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -14,6 +14,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/FaultMaps.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/Target/TargetMachine.h" // Implemented in X86MCInstLower.cpp @@ -30,6 +31,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { StackMaps SM; FaultMaps FM; std::unique_ptr CodeEmitter; + bool EmitFPOData = false; // This utility class tracks the length of a stackmap instruction's 'shadow'. // It is used by the X86AsmPrinter to ensure that the stackmap shadow @@ -99,10 +101,11 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { // function. void EmitXRayTable(); + // Choose between emitting .seh_ directives and .cv_fpo_ directives. + void EmitSEHInstruction(const MachineInstr *MI); + public: - explicit X86AsmPrinter(TargetMachine &TM, - std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)), SM(*this), FM(*this) {} + X86AsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); StringRef getPassName() const override { return "X86 Assembly Printer"; @@ -117,6 +120,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void EmitInstruction(const MachineInstr *MI) override; void EmitBasicBlockEnd(const MachineBasicBlock &MBB) override { + AsmPrinter::EmitBasicBlockEnd(MBB); SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo()); } @@ -133,10 +137,13 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { bool doInitialization(Module &M) override { SMShadowTracker.reset(0); SM.reset(); + FM.reset(); return AsmPrinter::doInitialization(M); } bool runOnMachineFunction(MachineFunction &F) override; + void EmitFunctionBodyStart() override; + void EmitFunctionBodyEnd() override; }; } // end namespace llvm diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index 765af67de160a..34e384ba31145 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -56,18 +56,27 @@ static cl::opt cl::desc("Avoid optimizing x86 call frames for size"), cl::init(false), cl::Hidden); +namespace llvm { +void initializeX86CallFrameOptimizationPass(PassRegistry &); +} + namespace { class X86CallFrameOptimization : public MachineFunctionPass { public: - X86CallFrameOptimization() : MachineFunctionPass(ID) {} + X86CallFrameOptimization() : MachineFunctionPass(ID) { + initializeX86CallFrameOptimizationPass( + *PassRegistry::getPassRegistry()); + } bool runOnMachineFunction(MachineFunction &MF) override; + static char ID; + private: // Information we know about a particular call site struct CallContext { - CallContext() : FrameSetup(nullptr), MovVector(4, nullptr) {} + CallContext() : FrameSetup(nullptr), ArgStoreVector(4, nullptr) {} // Iterator referring to the frame setup instruction MachineBasicBlock::iterator FrameSetup; @@ -81,8 +90,8 @@ class X86CallFrameOptimization : public MachineFunctionPass { // The total displacement of all passed parameters int64_t ExpectedDist = 0; - // The sequence of movs used to pass the parameters - SmallVector MovVector; + // The sequence of storing instructions used to pass the parameters + SmallVector ArgStoreVector; // True if this call site has no stack parameters bool NoStackParams = false; @@ -120,12 +129,12 @@ class X86CallFrameOptimization : public MachineFunctionPass { MachineRegisterInfo *MRI; unsigned SlotSize; unsigned Log2SlotSize; - static char ID; }; -char X86CallFrameOptimization::ID = 0; - } // end anonymous namespace +char X86CallFrameOptimization::ID = 0; +INITIALIZE_PASS(X86CallFrameOptimization, DEBUG_TYPE, + "X86 Call Frame Optimization", false, false) // This checks whether the transformation is legal. // Also returns false in cases where it's potentially legal, but @@ -271,11 +280,27 @@ X86CallFrameOptimization::classifyInstruction( if (MI == MBB.end()) return Exit; - // The instructions we actually care about are movs onto the stack - int Opcode = MI->getOpcode(); - if (Opcode == X86::MOV32mi || Opcode == X86::MOV32mr || - Opcode == X86::MOV64mi32 || Opcode == X86::MOV64mr) - return Convert; + // The instructions we actually care about are movs onto the stack or special + // cases of constant-stores to stack + switch (MI->getOpcode()) { + case X86::AND16mi8: + case X86::AND32mi8: + case X86::AND64mi8: { + MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands); + return ImmOp.getImm() == 0 ? Convert : Exit; + } + case X86::OR16mi8: + case X86::OR32mi8: + case X86::OR64mi8: { + MachineOperand ImmOp = MI->getOperand(X86::AddrNumOperands); + return ImmOp.getImm() == -1 ? Convert : Exit; + } + case X86::MOV32mi: + case X86::MOV32mr: + case X86::MOV64mi32: + case X86::MOV64mr: + return Convert; + } // Not all calling conventions have only stack MOVs between the stack // adjust and the call. @@ -354,32 +379,40 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, ++I; unsigned StackPtr = RegInfo.getStackRegister(); + auto StackPtrCopyInst = MBB.end(); // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual - // register here. If it's there, use that virtual register as stack pointer - // instead. - if (I->isCopy() && I->getOperand(0).isReg() && I->getOperand(1).isReg() && - I->getOperand(1).getReg() == StackPtr) { - Context.SPCopy = &*I++; - StackPtr = Context.SPCopy->getOperand(0).getReg(); - } + // register. If it's there, use that virtual register as stack pointer + // instead. Also, we need to locate this instruction so that we can later + // safely ignore it while doing the conservative processing of the call chain. + // The COPY can be located anywhere between the call-frame setup + // instruction and its first use. We use the call instruction as a boundary + // because it is usually cheaper to check if an instruction is a call than + // checking if an instruction uses a register. + for (auto J = I; !J->isCall(); ++J) + if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() && + J->getOperand(1).getReg() == StackPtr) { + StackPtrCopyInst = J; + Context.SPCopy = &*J++; + StackPtr = Context.SPCopy->getOperand(0).getReg(); + break; + } // Scan the call setup sequence for the pattern we're looking for. // We only handle a simple case - a sequence of store instructions that // push a sequence of stack-slot-aligned values onto the stack, with // no gaps between them. if (MaxAdjust > 4) - Context.MovVector.resize(MaxAdjust, nullptr); + Context.ArgStoreVector.resize(MaxAdjust, nullptr); - InstClassification Classification; DenseSet UsedRegs; - while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) != - Exit) { - if (Classification == Skip) { - ++I; + for (InstClassification Classification = Skip; Classification != Exit; ++I) { + // If this is the COPY of the stack pointer, it's ok to ignore. + if (I == StackPtrCopyInst) + continue; + Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs); + if (Classification != Convert) continue; - } - // We know the instruction has a supported store opcode. // We only want movs of the form: // mov imm/reg, k(%StackPtr) @@ -407,13 +440,13 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, return; StackDisp >>= Log2SlotSize; - assert((size_t)StackDisp < Context.MovVector.size() && + assert((size_t)StackDisp < Context.ArgStoreVector.size() && "Function call has more parameters than the stack is adjusted for."); // If the same stack slot is being filled twice, something's fishy. - if (Context.MovVector[StackDisp] != nullptr) + if (Context.ArgStoreVector[StackDisp] != nullptr) return; - Context.MovVector[StackDisp] = &*I; + Context.ArgStoreVector[StackDisp] = &*I; for (const MachineOperand &MO : I->uses()) { if (!MO.isReg()) @@ -422,10 +455,10 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, if (RegInfo.isPhysicalRegister(Reg)) UsedRegs.insert(Reg); } - - ++I; } + --I; + // We now expect the end of the sequence. If we stopped early, // or reached the end of the block without finding a call, bail. if (I == MBB.end() || !I->isCall()) @@ -436,14 +469,14 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF, return; // Now, go through the vector, and see that we don't have any gaps, - // but only a series of MOVs. - auto MMI = Context.MovVector.begin(), MME = Context.MovVector.end(); + // but only a series of storing instructions. + auto MMI = Context.ArgStoreVector.begin(), MME = Context.ArgStoreVector.end(); for (; MMI != MME; ++MMI, Context.ExpectedDist += SlotSize) if (*MMI == nullptr) break; // If the call had no parameters, do nothing - if (MMI == Context.MovVector.begin()) + if (MMI == Context.ArgStoreVector.begin()) return; // We are either at the last parameter, or a gap. @@ -466,17 +499,23 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, DebugLoc DL = FrameSetup->getDebugLoc(); bool Is64Bit = STI->is64Bit(); - // Now, iterate through the vector in reverse order, and replace the movs - // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to + // Now, iterate through the vector in reverse order, and replace the store to + // stack with pushes. MOVmi/MOVmr doesn't have any defs, so no need to // replace uses. for (int Idx = (Context.ExpectedDist >> Log2SlotSize) - 1; Idx >= 0; --Idx) { - MachineBasicBlock::iterator MOV = *Context.MovVector[Idx]; - MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); + MachineBasicBlock::iterator Store = *Context.ArgStoreVector[Idx]; + MachineOperand PushOp = Store->getOperand(X86::AddrNumOperands); MachineBasicBlock::iterator Push = nullptr; unsigned PushOpcode; - switch (MOV->getOpcode()) { + switch (Store->getOpcode()) { default: llvm_unreachable("Unexpected Opcode!"); + case X86::AND16mi8: + case X86::AND32mi8: + case X86::AND64mi8: + case X86::OR16mi8: + case X86::OR32mi8: + case X86::OR64mi8: case X86::MOV32mi: case X86::MOV64mi32: PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32; @@ -497,7 +536,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, // If storing a 32-bit vreg on 64-bit targets, extend to a 64-bit vreg // in preparation for the PUSH64. The upper 32 bits can be undef. - if (Is64Bit && MOV->getOpcode() == X86::MOV32mr) { + if (Is64Bit && Store->getOpcode() == X86::MOV32mr) { unsigned UndefReg = MRI->createVirtualRegister(&X86::GR64RegClass); Reg = MRI->createVirtualRegister(&X86::GR64RegClass); BuildMI(MBB, Context.Call, DL, TII->get(X86::IMPLICIT_DEF), UndefReg); @@ -541,7 +580,7 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, MBB, std::next(Push), DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, SlotSize)); - MBB.erase(MOV); + MBB.erase(Store); } // The stack-pointer copy is no longer used in the call sequences. diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp index 9d1d24f3349c3..7beb9c6e357ba 100644 --- a/lib/Target/X86/X86CallLowering.cpp +++ b/lib/Target/X86/X86CallLowering.cpp @@ -1,4 +1,4 @@ -//===-- llvm/lib/Target/X86/X86CallLowering.cpp - Call lowering -----------===// +//===- llvm/lib/Target/X86/X86CallLowering.cpp - Call lowering ------------===// // // The LLVM Compiler Infrastructure // @@ -6,25 +6,45 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// +// /// \file /// This file implements the lowering of LLVM calls to machine code calls for /// GlobalISel. -/// +// //===----------------------------------------------------------------------===// #include "X86CallLowering.h" #include "X86CallingConv.h" #include "X86ISelLowering.h" #include "X86InstrInfo.h" -#include "X86TargetMachine.h" - +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include +#include using namespace llvm; @@ -38,7 +58,6 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, const DataLayout &DL, MachineRegisterInfo &MRI, SplitArgTy PerformArgSplit) const { - const X86TargetLowering &TLI = *getTLI(); LLVMContext &Context = OrigArg.Ty->getContext(); @@ -79,16 +98,16 @@ bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, } namespace { + struct OutgoingValueHandler : public CallLowering::ValueHandler { OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstrBuilder &MIB, CCAssignFn *AssignFn) - : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), StackSize(0), + : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), DL(MIRBuilder.getMF().getDataLayout()), - STI(MIRBuilder.getMF().getSubtarget()), NumXMMRegs(0) {} + STI(MIRBuilder.getMF().getSubtarget()) {} unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { - LLT p0 = LLT::pointer(0, DL.getPointerSizeInBits(0)); LLT SType = LLT::scalar(DL.getPointerSizeInBits(0)); unsigned SPReg = MRI.createGenericVirtualRegister(p0); @@ -113,7 +132,6 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { - unsigned ExtReg = extendRegister(ValVReg, VA); auto MMO = MIRBuilder.getMF().getMachineMemOperand( MPO, MachineMemOperand::MOStore, VA.getLocVT().getStoreSize(), @@ -124,7 +142,6 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, const CallLowering::ArgInfo &Info, CCState &State) override { - bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); StackSize = State.getNextStackOffset(); @@ -142,16 +159,16 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { protected: MachineInstrBuilder &MIB; - uint64_t StackSize; + uint64_t StackSize = 0; const DataLayout &DL; const X86Subtarget &STI; - unsigned NumXMMRegs; + unsigned NumXMMRegs = 0; }; -} // End anonymous namespace. + +} // end anonymous namespace bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg) const { - assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg"); auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0); @@ -182,6 +199,7 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, } namespace { + struct IncomingValueHandler : public CallLowering::ValueHandler { IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, CCAssignFn *AssignFn) @@ -190,7 +208,6 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { - auto &MFI = MIRBuilder.getMF().getFrameInfo(); int FI = MFI.CreateFixedObject(Size, Offset, true); MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI); @@ -203,13 +220,34 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) override { - auto MMO = MIRBuilder.getMF().getMachineMemOperand( MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size, 0); MIRBuilder.buildLoad(ValVReg, Addr, *MMO); } + void assignValueToReg(unsigned ValVReg, unsigned PhysReg, + CCValAssign &VA) override { + markPhysRegUsed(PhysReg); + switch (VA.getLocInfo()) { + default: + MIRBuilder.buildCopy(ValVReg, PhysReg); + break; + case CCValAssign::LocInfo::SExt: + case CCValAssign::LocInfo::ZExt: + case CCValAssign::LocInfo::AExt: { + auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg); + MIRBuilder.buildTrunc(ValVReg, Copy); + break; + } + } + } + + /// How the physical register gets marked varies between formal + /// parameters (it's a basic-block live-in), and a call instruction + /// (it's an implicit-def of the BL). + virtual void markPhysRegUsed(unsigned PhysReg) = 0; + protected: const DataLayout &DL; }; @@ -219,10 +257,8 @@ struct FormalArgHandler : public IncomingValueHandler { CCAssignFn *AssignFn) : IncomingValueHandler(MIRBuilder, MRI, AssignFn) {} - void assignValueToReg(unsigned ValVReg, unsigned PhysReg, - CCValAssign &VA) override { + void markPhysRegUsed(unsigned PhysReg) override { MIRBuilder.getMBB().addLiveIn(PhysReg); - MIRBuilder.buildCopy(ValVReg, PhysReg); } }; @@ -231,17 +267,15 @@ struct CallReturnHandler : public IncomingValueHandler { CCAssignFn *AssignFn, MachineInstrBuilder &MIB) : IncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} - void assignValueToReg(unsigned ValVReg, unsigned PhysReg, - CCValAssign &VA) override { + void markPhysRegUsed(unsigned PhysReg) override { MIB.addDef(PhysReg, RegState::Implicit); - MIRBuilder.buildCopy(ValVReg, PhysReg); } protected: MachineInstrBuilder &MIB; }; -} // namespace +} // end anonymous namespace bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, @@ -299,7 +333,6 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const MachineOperand &Callee, const ArgInfo &OrigRet, ArrayRef OrigArgs) const { - MachineFunction &MF = MIRBuilder.getMF(); const Function &F = *MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h index f8f83717586c3..6c9dc1565dad7 100644 --- a/lib/Target/X86/X86CallLowering.h +++ b/lib/Target/X86/X86CallLowering.h @@ -1,4 +1,4 @@ -//===-- llvm/lib/Target/X86/X86CallLowering.h - Call lowering -----===// +//===- llvm/lib/Target/X86/X86CallLowering.h - Call lowering ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,24 +6,24 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// +// /// \file /// This file describes how to lower LLVM calls to machine code calls. -/// +// //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_X86_X86CALLLOWERING -#define LLVM_LIB_TARGET_X86_X86CALLLOWERING +#ifndef LLVM_LIB_TARGET_X86_X86CALLLOWERING_H +#define LLVM_LIB_TARGET_X86_X86CALLLOWERING_H #include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include namespace llvm { -class Function; -class MachineIRBuilder; +class DataLayout; +class MachineRegisterInfo; class X86TargetLowering; -class Value; class X86CallLowering : public CallLowering { public: @@ -41,12 +41,14 @@ class X86CallLowering : public CallLowering { private: /// A function of this type is used to perform value split action. - typedef std::function)> SplitArgTy; + using SplitArgTy = std::function)>; bool splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, SplitArgTy SplitArg) const; }; -} // namespace llvm -#endif + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_X86_X86CALLLOWERING_H diff --git a/lib/Target/X86/X86CmovConversion.cpp b/lib/Target/X86/X86CmovConversion.cpp index e31a7949f0be3..b2cd622b1e8c8 100644 --- a/lib/Target/X86/X86CmovConversion.cpp +++ b/lib/Target/X86/X86CmovConversion.cpp @@ -1,4 +1,4 @@ -//====-- X86CmovConversion.cpp - Convert Cmov to Branch -------------------===// +//====- X86CmovConversion.cpp - Convert Cmov to Branch --------------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file implements a pass that converts X86 cmov instructions into /// branches when profitable. This pass is conservative. It transforms if and @@ -31,36 +32,61 @@ /// 25% branch misprediction. /// /// Note: This pass is assumed to run on SSA machine code. +// //===----------------------------------------------------------------------===// // // External interfaces: // FunctionPass *llvm::createX86CmovConverterPass(); // bool X86CmovConverterPass::runOnMachineFunction(MachineFunction &MF); // +//===----------------------------------------------------------------------===// #include "X86.h" #include "X86InstrInfo.h" -#include "X86Subtarget.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/IR/InstIterator.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include +#include +#include +#include + using namespace llvm; -#define DEBUG_TYPE "x86-cmov-converter" +#define DEBUG_TYPE "x86-cmov-conversion" STATISTIC(NumOfSkippedCmovGroups, "Number of unsupported CMOV-groups"); STATISTIC(NumOfCmovGroupCandidate, "Number of CMOV-group candidates"); STATISTIC(NumOfLoopCandidate, "Number of CMOV-conversion profitable loops"); STATISTIC(NumOfOptimizedCmovGroups, "Number of optimized CMOV-groups"); -namespace { +namespace llvm { + +void initializeX86CmovConverterPassPass(PassRegistry &); + +} // end namespace llvm + // This internal switch can be used to turn off the cmov/branch optimization. static cl::opt EnableCmovConverter("x86-cmov-converter", @@ -77,28 +103,31 @@ static cl::opt ForceMemOperand( cl::desc("Convert cmovs to branches whenever they have memory operands."), cl::init(true), cl::Hidden); +namespace { + /// Converts X86 cmov instructions into branches when profitable. class X86CmovConverterPass : public MachineFunctionPass { public: - X86CmovConverterPass() : MachineFunctionPass(ID) {} - ~X86CmovConverterPass() {} + X86CmovConverterPass() : MachineFunctionPass(ID) { + initializeX86CmovConverterPassPass(*PassRegistry::getPassRegistry()); + } StringRef getPassName() const override { return "X86 cmov Conversion"; } bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; -private: /// Pass identification, replacement for typeid. static char ID; +private: MachineRegisterInfo *MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; TargetSchedModel TSchedModel; /// List of consecutive CMOV instructions. - typedef SmallVector CmovGroup; - typedef SmallVector CmovGroups; + using CmovGroup = SmallVector; + using CmovGroups = SmallVector; /// Collect all CMOV-group-candidates in \p CurrLoop and update \p /// CmovInstGroups accordingly. @@ -125,6 +154,8 @@ class X86CmovConverterPass : public MachineFunctionPass { void convertCmovInstsToBranches(SmallVectorImpl &Group) const; }; +} // end anonymous namespace + char X86CmovConverterPass::ID = 0; void X86CmovConverterPass::getAnalysisUsage(AnalysisUsage &AU) const { @@ -263,6 +294,9 @@ bool X86CmovConverterPass::collectCmovCandidates( bool SkipGroup = false; for (auto &I : *MBB) { + // Skip debug instructions. + if (I.isDebugValue()) + continue; X86::CondCode CC = X86::getCondFromCMovOpc(I.getOpcode()); // Check if we found a X86::CMOVrr instruction. if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) { @@ -400,6 +434,9 @@ bool X86CmovConverterPass::checkForProfitableCmovCandidates( // Clear physical registers Def map. RegDefMaps[PhyRegType].clear(); for (MachineInstr &MI : *MBB) { + // Skip debug instructions. + if (MI.isDebugValue()) + continue; unsigned MIDepth = 0; unsigned MIDepthOpt = 0; bool IsCMOV = CmovInstructions.count(&MI); @@ -558,11 +595,36 @@ static bool checkEFLAGSLive(MachineInstr *MI) { return false; } +/// Given /p First CMOV instruction and /p Last CMOV instruction representing a +/// group of CMOV instructions, which may contain debug instructions in between, +/// move all debug instructions to after the last CMOV instruction, making the +/// CMOV group consecutive. +static void packCmovGroup(MachineInstr *First, MachineInstr *Last) { + assert(X86::getCondFromCMovOpc(Last->getOpcode()) != X86::COND_INVALID && + "Last instruction in a CMOV group must be a CMOV instruction"); + + SmallVector DBGInstructions; + for (auto I = First->getIterator(), E = Last->getIterator(); I != E; I++) { + if (I->isDebugValue()) + DBGInstructions.push_back(&*I); + } + + // Splice the debug instruction after the cmov group. + MachineBasicBlock *MBB = First->getParent(); + for (auto *MI : DBGInstructions) + MBB->insertAfter(Last, MI->removeFromParent()); +} + void X86CmovConverterPass::convertCmovInstsToBranches( SmallVectorImpl &Group) const { assert(!Group.empty() && "No CMOV instructions to convert"); ++NumOfOptimizedCmovGroups; + // If the CMOV group is not packed, e.g., there are debug instructions between + // first CMOV and last CMOV, then pack the group and make the CMOV instruction + // consecutive by moving the debug instructions to after the last CMOV. + packCmovGroup(Group.front(), Group.back()); + // To convert a CMOVcc instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg // to set, the condition code register to branch on, the true/false values to @@ -660,7 +722,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches( MI.getOperand(X86::getCondFromCMovOpc(MI.getOpcode()) == CC ? 1 : 2) .getReg(); // Walk back through any intermediate cmovs referenced. - for (;;) { + while (true) { auto FRIt = FalseBBRegRewriteTable.find(FalseReg); if (FRIt == FalseBBRegRewriteTable.end()) break; @@ -795,7 +857,11 @@ void X86CmovConverterPass::convertCmovInstsToBranches( MBB->erase(MIItBegin, MIItEnd); } -} // End anonymous namespace. +INITIALIZE_PASS_BEGIN(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion", + false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(X86CmovConverterPass, DEBUG_TYPE, "X86 cmov Conversion", + false, false) FunctionPass *llvm::createX86CmovConverterPass() { return new X86CmovConverterPass(); diff --git a/lib/Target/X86/X86DomainReassignment.cpp b/lib/Target/X86/X86DomainReassignment.cpp new file mode 100644 index 0000000000000..4d015424317e1 --- /dev/null +++ b/lib/Target/X86/X86DomainReassignment.cpp @@ -0,0 +1,752 @@ +//===--- X86DomainReassignment.cpp - Selectively switch register classes---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass attempts to find instruction chains (closures) in one domain, +// and convert them to equivalent instructions in a different domain, +// if profitable. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +namespace llvm { +void initializeX86DomainReassignmentPass(PassRegistry &); +} + +#define DEBUG_TYPE "x86-domain-reassignment" + +STATISTIC(NumClosuresConverted, "Number of closures converted by the pass"); + +static cl::opt DisableX86DomainReassignment( + "disable-x86-domain-reassignment", cl::Hidden, + cl::desc("X86: Disable Virtual Register Reassignment."), cl::init(false)); + +namespace { +enum RegDomain { NoDomain = -1, GPRDomain, MaskDomain, OtherDomain }; + +static bool isGPR(const TargetRegisterClass *RC) { + return X86::GR64RegClass.hasSubClassEq(RC) || + X86::GR32RegClass.hasSubClassEq(RC) || + X86::GR16RegClass.hasSubClassEq(RC) || + X86::GR8RegClass.hasSubClassEq(RC); +} + +static bool isMask(const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) { + return X86::VK16RegClass.hasSubClassEq(RC); +} + +static RegDomain getDomain(const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) { + if (isGPR(RC)) + return GPRDomain; + if (isMask(RC, TRI)) + return MaskDomain; + return OtherDomain; +} + +/// Return a register class equivalent to \p SrcRC, in \p Domain. +static const TargetRegisterClass *getDstRC(const TargetRegisterClass *SrcRC, + RegDomain Domain) { + assert(Domain == MaskDomain && "add domain"); + if (SrcRC == &X86::GR8RegClass) + return &X86::VK8RegClass; + if (SrcRC == &X86::GR16RegClass) + return &X86::VK16RegClass; + if (SrcRC == &X86::GR32RegClass) + return &X86::VK32RegClass; + if (SrcRC == &X86::GR64RegClass) + return &X86::VK64RegClass; + llvm_unreachable("add register class"); + return nullptr; +} + +/// Abstract Instruction Converter class. +class InstrConverterBase { +protected: + unsigned SrcOpcode; + +public: + InstrConverterBase(unsigned SrcOpcode) : SrcOpcode(SrcOpcode) {} + + virtual ~InstrConverterBase() {} + + /// \returns true if \p MI is legal to convert. + virtual bool isLegal(const MachineInstr *MI, + const TargetInstrInfo *TII) const { + assert(MI->getOpcode() == SrcOpcode && + "Wrong instruction passed to converter"); + return true; + } + + /// Applies conversion to \p MI. + /// + /// \returns true if \p MI is no longer need, and can be deleted. + virtual bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, + MachineRegisterInfo *MRI) const = 0; + + /// \returns the cost increment incurred by converting \p MI. + virtual double getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const = 0; +}; + +/// An Instruction Converter which ignores the given instruction. +/// For example, PHI instructions can be safely ignored since only the registers +/// need to change. +class InstrIgnore : public InstrConverterBase { +public: + InstrIgnore(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {} + + bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, + MachineRegisterInfo *MRI) const override { + assert(isLegal(MI, TII) && "Cannot convert instruction"); + return false; + } + + double getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + return 0; + } +}; + +/// An Instruction Converter which replaces an instruction with another. +class InstrReplacer : public InstrConverterBase { +public: + /// Opcode of the destination instruction. + unsigned DstOpcode; + + InstrReplacer(unsigned SrcOpcode, unsigned DstOpcode) + : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {} + + bool isLegal(const MachineInstr *MI, + const TargetInstrInfo *TII) const override { + if (!InstrConverterBase::isLegal(MI, TII)) + return false; + // It's illegal to replace an instruction that implicitly defines a register + // with an instruction that doesn't, unless that register dead. + for (auto &MO : MI->implicit_operands()) + if (MO.isReg() && MO.isDef() && !MO.isDead() && + !TII->get(DstOpcode).hasImplicitDefOfPhysReg(MO.getReg())) + return false; + return true; + } + + bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, + MachineRegisterInfo *MRI) const override { + assert(isLegal(MI, TII) && "Cannot convert instruction"); + MachineInstrBuilder Bld = + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(DstOpcode)); + // Transfer explicit operands from original instruction. Implicit operands + // are handled by BuildMI. + for (auto &Op : MI->explicit_operands()) + Bld.add(Op); + return true; + } + + double getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + // Assuming instructions have the same cost. + return 0; + } +}; + +/// An Instruction Converter which replaces an instruction with another, and +/// adds a COPY from the new instruction's destination to the old one's. +class InstrReplacerDstCOPY : public InstrConverterBase { +public: + unsigned DstOpcode; + + InstrReplacerDstCOPY(unsigned SrcOpcode, unsigned DstOpcode) + : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {} + + bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, + MachineRegisterInfo *MRI) const override { + assert(isLegal(MI, TII) && "Cannot convert instruction"); + MachineBasicBlock *MBB = MI->getParent(); + auto &DL = MI->getDebugLoc(); + + unsigned Reg = MRI->createVirtualRegister( + TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(), + *MBB->getParent())); + MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg); + for (unsigned Idx = 1, End = MI->getNumOperands(); Idx < End; ++Idx) + Bld.add(MI->getOperand(Idx)); + + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY)) + .add(MI->getOperand(0)) + .addReg(Reg); + + return true; + } + + double getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + // Assuming instructions have the same cost, and that COPY is in the same + // domain so it will be eliminated. + return 0; + } +}; + +/// An Instruction Converter for replacing COPY instructions. +class InstrCOPYReplacer : public InstrReplacer { +public: + RegDomain DstDomain; + + InstrCOPYReplacer(unsigned SrcOpcode, RegDomain DstDomain, unsigned DstOpcode) + : InstrReplacer(SrcOpcode, DstOpcode), DstDomain(DstDomain) {} + + double getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY"); + + for (auto &MO : MI->operands()) { + // Physical registers will not be converted. Assume that converting the + // COPY to the destination domain will eventually result in a actual + // instruction. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + return 1; + + RegDomain OpDomain = getDomain(MRI->getRegClass(MO.getReg()), + MRI->getTargetRegisterInfo()); + // Converting a cross domain COPY to a same domain COPY should eliminate + // an insturction + if (OpDomain == DstDomain) + return -1; + } + return 0; + } +}; + +/// An Instruction Converter which replaces an instruction with a COPY. +class InstrReplaceWithCopy : public InstrConverterBase { +public: + // Source instruction operand Index, to be used as the COPY source. + unsigned SrcOpIdx; + + InstrReplaceWithCopy(unsigned SrcOpcode, unsigned SrcOpIdx) + : InstrConverterBase(SrcOpcode), SrcOpIdx(SrcOpIdx) {} + + bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, + MachineRegisterInfo *MRI) const override { + assert(isLegal(MI, TII) && "Cannot convert instruction"); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .add({MI->getOperand(0), MI->getOperand(SrcOpIdx)}); + return true; + } + + double getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + return 0; + } +}; + +/// An Instruction Converter which completely deletes an instruction. +/// For example, IMPLICIT_DEF instructions can be deleted when converting from +/// GPR to mask. +class InstrDeleter : public InstrConverterBase { +public: + InstrDeleter(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {} + + bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII, + MachineRegisterInfo *MRI) const override { + assert(isLegal(MI, TII) && "Cannot convert instruction"); + return true; + } + + double getExtraCost(const MachineInstr *MI, + MachineRegisterInfo *MRI) const override { + return 0; + } +}; + +// Key type to be used by the Instruction Converters map. +// A converter is identified by +typedef std::pair InstrConverterBaseKeyTy; + +typedef DenseMap + InstrConverterBaseMap; + +/// A closure is a set of virtual register representing all of the edges in +/// the closure, as well as all of the instructions connected by those edges. +/// +/// A closure may encompass virtual registers in the same register bank that +/// have different widths. For example, it may contain 32-bit GPRs as well as +/// 64-bit GPRs. +/// +/// A closure that computes an address (i.e. defines a virtual register that is +/// used in a memory operand) excludes the instructions that contain memory +/// operands using the address. Such an instruction will be included in a +/// different closure that manipulates the loaded or stored value. +class Closure { +private: + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + + /// Virtual registers in the closure. + DenseSet Edges; + + /// Instructions in the closure. + SmallVector Instrs; + + /// A map of available Instruction Converters. + const InstrConverterBaseMap &Converters; + + /// The register domain of this closure. + RegDomain Domain; + + /// Domains which this closure can legally be reassigned to. + SmallVector LegalDstDomains; + + SmallVector getLegalDstDomains() const { + return LegalDstDomains; + } + + /// Enqueue \p Reg to be considered for addition to the closure. + void visitRegister(unsigned Reg, SmallVectorImpl &Worklist); + + /// Add \p MI to this closure. + void encloseInstr(MachineInstr *MI); + + /// Calculate the total cost of reassigning the closure to \p Domain. + double calculateCost(RegDomain Domain) const; + + /// All edges that are included in some closure. + DenseSet &EnclosedEdges; + + /// All instructions that are included in some closure. + DenseMap &EnclosedInstrs; + +public: + Closure(const TargetInstrInfo *TII, MachineRegisterInfo *MRI, + const InstrConverterBaseMap &Converters, + const SmallVector &LegalDstDomains, + DenseSet &EnclosedEdges, + DenseMap &EnclosedInstrs) + : TII(TII), MRI(MRI), Converters(Converters), Domain(NoDomain), + LegalDstDomains(LegalDstDomains), EnclosedEdges(EnclosedEdges), + EnclosedInstrs(EnclosedInstrs) {} + + /// Starting from \Reg, expand the closure as much as possible. + void buildClosure(unsigned E); + + /// /returns true if it is profitable to reassign the closure to \p Domain. + bool isReassignmentProfitable(RegDomain Domain) const; + + /// Reassign the closure to \p Domain. + void Reassign(RegDomain Domain) const; + + /// Mark this closure as illegal for reassignment to all domains. + void setAllIllegal() { LegalDstDomains.clear(); } + + /// \returns true if this closure has domains which are legal to reassign to. + bool hasLegalDstDomain() const { return !LegalDstDomains.empty(); } + + /// \returns true if is legal to reassign this closure to domain \p RD. + bool isLegal(RegDomain RD) const { return is_contained(LegalDstDomains, RD); } + + bool empty() const { return Edges.empty(); } +}; + +class X86DomainReassignment : public MachineFunctionPass { +public: + static char ID; + + X86DomainReassignment() : MachineFunctionPass(ID) { + initializeX86DomainReassignmentPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return "X86 Domain Reassignment Pass"; + } + +private: + const X86Subtarget *STI; + MachineRegisterInfo *MRI; + const X86InstrInfo *TII; + + /// A map of available Instruction Converters. + InstrConverterBaseMap Converters; + + /// Initialize Converters map. + void initConverters(); +}; + +char X86DomainReassignment::ID = 0; + +} // End anonymous namespace. + +void Closure::visitRegister(unsigned Reg, SmallVectorImpl &Worklist) { + if (EnclosedEdges.count(Reg)) + return; + + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return; + + if (!MRI->hasOneDef(Reg)) + return; + + RegDomain RD = getDomain(MRI->getRegClass(Reg), MRI->getTargetRegisterInfo()); + // First edge in closure sets the domain. + if (Domain == NoDomain) + Domain = RD; + + if (Domain != RD) + return; + + Worklist.push_back(Reg); +} + +void Closure::encloseInstr(MachineInstr *MI) { + auto I = EnclosedInstrs.find(MI); + if (I != EnclosedInstrs.end()) { + if (I->second != this) + // Instruction already belongs to another closure, avoid conflicts between + // closure and mark this closure as illegal. + setAllIllegal(); + return; + } + + EnclosedInstrs[MI] = this; + Instrs.push_back(MI); + + // Mark closure as illegal for reassignment to domains, if there is no + // converter for the instruction or if the converter cannot convert the + // instruction. + erase_if(LegalDstDomains, [&](RegDomain D) { + InstrConverterBase *IC = Converters.lookup({D, MI->getOpcode()}); + return !IC || !IC->isLegal(MI, TII); + }); +} + +double Closure::calculateCost(RegDomain DstDomain) const { + assert(isLegal(DstDomain) && "Cannot calculate cost for illegal closure"); + + double Cost = 0.0; + for (auto MI : Instrs) + Cost += + Converters.lookup({DstDomain, MI->getOpcode()})->getExtraCost(MI, MRI); + return Cost; +} + +bool Closure::isReassignmentProfitable(RegDomain Domain) const { + return calculateCost(Domain) < 0.0; +} + +void Closure::Reassign(RegDomain Domain) const { + assert(isLegal(Domain) && "Cannot convert illegal closure"); + + // Iterate all instructions in the closure, convert each one using the + // appropriate converter. + SmallVector ToErase; + for (auto MI : Instrs) + if (Converters.lookup({Domain, MI->getOpcode()}) + ->convertInstr(MI, TII, MRI)) + ToErase.push_back(MI); + + // Iterate all registers in the closure, replace them with registers in the + // destination domain. + for (unsigned Reg : Edges) { + MRI->setRegClass(Reg, getDstRC(MRI->getRegClass(Reg), Domain)); + for (auto &MO : MRI->use_operands(Reg)) { + if (MO.isReg()) + // Remove all subregister references as they are not valid in the + // destination domain. + MO.setSubReg(0); + } + } + + for (auto MI : ToErase) + MI->eraseFromParent(); +} + +/// \returns true when \p Reg is used as part of an address calculation in \p +/// MI. +static bool usedAsAddr(const MachineInstr &MI, unsigned Reg, + const TargetInstrInfo *TII) { + if (!MI.mayLoadOrStore()) + return false; + + const MCInstrDesc &Desc = TII->get(MI.getOpcode()); + int MemOpStart = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemOpStart == -1) + return false; + + MemOpStart += X86II::getOperandBias(Desc); + for (unsigned MemOpIdx = MemOpStart, + MemOpEnd = MemOpStart + X86::AddrNumOperands; + MemOpIdx < MemOpEnd; ++MemOpIdx) { + auto &Op = MI.getOperand(MemOpIdx); + if (Op.isReg() && Op.getReg() == Reg) + return true; + } + return false; +} + +void Closure::buildClosure(unsigned Reg) { + SmallVector Worklist; + visitRegister(Reg, Worklist); + while (!Worklist.empty()) { + unsigned CurReg = Worklist.pop_back_val(); + + // Register already in this closure. + if (!Edges.insert(CurReg).second) + continue; + + MachineInstr *DefMI = MRI->getVRegDef(CurReg); + encloseInstr(DefMI); + + // Add register used by the defining MI to the worklist. + // Do not add registers which are used in address calculation, they will be + // added to a different closure. + int OpEnd = DefMI->getNumOperands(); + const MCInstrDesc &Desc = DefMI->getDesc(); + int MemOp = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemOp != -1) + MemOp += X86II::getOperandBias(Desc); + for (int OpIdx = 0; OpIdx < OpEnd; ++OpIdx) { + if (OpIdx == MemOp) { + // skip address calculation. + OpIdx += (X86::AddrNumOperands - 1); + continue; + } + auto &Op = DefMI->getOperand(OpIdx); + if (!Op.isReg() || !Op.isUse()) + continue; + visitRegister(Op.getReg(), Worklist); + } + + // Expand closure through register uses. + for (auto &UseMI : MRI->use_nodbg_instructions(CurReg)) { + // We would like to avoid converting closures which calculare addresses, + // as this should remain in GPRs. + if (usedAsAddr(UseMI, CurReg, TII)) { + setAllIllegal(); + continue; + } + encloseInstr(&UseMI); + + for (auto &DefOp : UseMI.defs()) { + if (!DefOp.isReg()) + continue; + + unsigned DefReg = DefOp.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefReg)) { + setAllIllegal(); + continue; + } + visitRegister(DefReg, Worklist); + } + } + } +} + +void X86DomainReassignment::initConverters() { + Converters[{MaskDomain, TargetOpcode::PHI}] = + new InstrIgnore(TargetOpcode::PHI); + + Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] = + new InstrDeleter(TargetOpcode::IMPLICIT_DEF); + + Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] = + new InstrReplaceWithCopy(TargetOpcode::INSERT_SUBREG, 2); + + Converters[{MaskDomain, TargetOpcode::COPY}] = + new InstrCOPYReplacer(TargetOpcode::COPY, MaskDomain, TargetOpcode::COPY); + + auto createReplacerDstCOPY = [&](unsigned From, unsigned To) { + Converters[{MaskDomain, From}] = new InstrReplacerDstCOPY(From, To); + }; + + createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm); + createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm); + + createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk); + createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk); + + if (STI->hasDQI()) { + createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm); + createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm); + createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm); + + createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk); + createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk); + createReplacerDstCOPY(X86::MOVZX64rr8, X86::KMOVBkk); + } + + auto createReplacer = [&](unsigned From, unsigned To) { + Converters[{MaskDomain, From}] = new InstrReplacer(From, To); + }; + + createReplacer(X86::MOV16rm, X86::KMOVWkm); + createReplacer(X86::MOV16mr, X86::KMOVWmk); + createReplacer(X86::MOV16rr, X86::KMOVWkk); + createReplacer(X86::SHR16ri, X86::KSHIFTRWri); + createReplacer(X86::SHL16ri, X86::KSHIFTLWri); + createReplacer(X86::NOT16r, X86::KNOTWrr); + createReplacer(X86::OR16rr, X86::KORWrr); + createReplacer(X86::AND16rr, X86::KANDWrr); + createReplacer(X86::XOR16rr, X86::KXORWrr); + + if (STI->hasBWI()) { + createReplacer(X86::MOV32rm, X86::KMOVDkm); + createReplacer(X86::MOV64rm, X86::KMOVQkm); + + createReplacer(X86::MOV32mr, X86::KMOVDmk); + createReplacer(X86::MOV64mr, X86::KMOVQmk); + + createReplacer(X86::MOV32rr, X86::KMOVDkk); + createReplacer(X86::MOV64rr, X86::KMOVQkk); + + createReplacer(X86::SHR32ri, X86::KSHIFTRDri); + createReplacer(X86::SHR64ri, X86::KSHIFTRQri); + + createReplacer(X86::SHL32ri, X86::KSHIFTLDri); + createReplacer(X86::SHL64ri, X86::KSHIFTLQri); + + createReplacer(X86::ADD32rr, X86::KADDDrr); + createReplacer(X86::ADD64rr, X86::KADDQrr); + + createReplacer(X86::NOT32r, X86::KNOTDrr); + createReplacer(X86::NOT64r, X86::KNOTQrr); + + createReplacer(X86::OR32rr, X86::KORDrr); + createReplacer(X86::OR64rr, X86::KORQrr); + + createReplacer(X86::AND32rr, X86::KANDDrr); + createReplacer(X86::AND64rr, X86::KANDQrr); + + createReplacer(X86::ANDN32rr, X86::KANDNDrr); + createReplacer(X86::ANDN64rr, X86::KANDNQrr); + + createReplacer(X86::XOR32rr, X86::KXORDrr); + createReplacer(X86::XOR64rr, X86::KXORQrr); + + createReplacer(X86::TEST32rr, X86::KTESTDrr); + createReplacer(X86::TEST64rr, X86::KTESTQrr); + } + + if (STI->hasDQI()) { + createReplacer(X86::ADD8rr, X86::KADDBrr); + createReplacer(X86::ADD16rr, X86::KADDWrr); + + createReplacer(X86::AND8rr, X86::KANDBrr); + + createReplacer(X86::MOV8rm, X86::KMOVBkm); + createReplacer(X86::MOV8mr, X86::KMOVBmk); + createReplacer(X86::MOV8rr, X86::KMOVBkk); + + createReplacer(X86::NOT8r, X86::KNOTBrr); + + createReplacer(X86::OR8rr, X86::KORBrr); + + createReplacer(X86::SHR8ri, X86::KSHIFTRBri); + createReplacer(X86::SHL8ri, X86::KSHIFTLBri); + + createReplacer(X86::TEST8rr, X86::KTESTBrr); + createReplacer(X86::TEST16rr, X86::KTESTWrr); + + createReplacer(X86::XOR8rr, X86::KXORBrr); + } +} + +bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(*MF.getFunction())) + return false; + if (DisableX86DomainReassignment) + return false; + + DEBUG(dbgs() << "***** Machine Function before Domain Reassignment *****\n"); + DEBUG(MF.print(dbgs())); + + STI = &MF.getSubtarget(); + // GPR->K is the only transformation currently supported, bail out early if no + // AVX512. + if (!STI->hasAVX512()) + return false; + + MRI = &MF.getRegInfo(); + assert(MRI->isSSA() && "Expected MIR to be in SSA form"); + + TII = STI->getInstrInfo(); + initConverters(); + bool Changed = false; + + DenseSet EnclosedEdges; + DenseMap EnclosedInstrs; + + std::vector Closures; + + // Go over all virtual registers and calculate a closure. + for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(Idx); + + // GPR only current source domain supported. + if (!isGPR(MRI->getRegClass(Reg))) + continue; + + // Register already in closure. + if (EnclosedEdges.count(Reg)) + continue; + + // Calculate closure starting with Reg. + Closure C(TII, MRI, Converters, {MaskDomain}, EnclosedEdges, + EnclosedInstrs); + C.buildClosure(Reg); + + // Collect all closures that can potentially be converted. + if (!C.empty() && C.isLegal(MaskDomain)) + Closures.push_back(std::move(C)); + } + + for (Closure &C : Closures) + if (C.isReassignmentProfitable(MaskDomain)) { + C.Reassign(MaskDomain); + ++NumClosuresConverted; + Changed = true; + } + + for (auto I : Converters) + delete I.second; + + DEBUG(dbgs() << "***** Machine Function after Domain Reassignment *****\n"); + DEBUG(MF.print(dbgs())); + + return Changed; +} + +INITIALIZE_PASS(X86DomainReassignment, "x86-domain-reassignment", + "X86 Domain Reassignment Pass", false, false) + +/// Returns an instance of the Domain Reassignment pass. +FunctionPass *llvm::createX86DomainReassignmentPass() { + return new X86DomainReassignment(); +} diff --git a/lib/Target/X86/X86EvexToVex.cpp b/lib/Target/X86/X86EvexToVex.cpp index ca59d287dab37..cc45c1f3e1ded 100644 --- a/lib/Target/X86/X86EvexToVex.cpp +++ b/lib/Target/X86/X86EvexToVex.cpp @@ -1,4 +1,4 @@ -//===----------------------- X86EvexToVex.cpp ----------------------------===// +//===- X86EvexToVex.cpp ---------------------------------------------------===// // Compress EVEX instructions to VEX encoding when possible to reduce code size // // The LLVM Compiler Infrastructure @@ -6,18 +6,19 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//===---------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// /// \file /// This file defines the pass that goes over all AVX-512 instructions which /// are encoded using the EVEX prefix and if possible replaces them by their /// corresponding VEX encoding which is usually shorter by 2 bytes. /// EVEX instructions may be encoded via the VEX prefix when the AVX-512 /// instruction has a corresponding AVX/AVX2 opcode and when it does not -/// use the xmm or the mask registers or xmm/ymm registers wuith indexes +/// use the xmm or the mask registers or xmm/ymm registers with indexes /// higher than 15. /// The pass applies code reduction on the generated code for AVX-512 instrs. -/// -//===---------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// #include "InstPrinter/X86InstComments.h" #include "MCTargetDesc/X86BaseInfo.h" @@ -54,7 +55,7 @@ namespace { class EvexToVexInstPass : public MachineFunctionPass { /// X86EvexToVexCompressTable - Evex to Vex encoding opcode map. - typedef DenseMap EvexToVexTableType; + using EvexToVexTableType = DenseMap; EvexToVexTableType EvexToVex128Table; EvexToVexTableType EvexToVex256Table; @@ -101,10 +102,10 @@ class EvexToVexInstPass : public MachineFunctionPass { const X86InstrInfo *TII; }; -char EvexToVexInstPass::ID = 0; - } // end anonymous namespace +char EvexToVexInstPass::ID = 0; + bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) { TII = MF.getSubtarget().getInstrInfo(); @@ -176,7 +177,6 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const { if (It != EvexToVex256Table.end()) NewOpc = It->second; } - // Check for EVEX_V128 or Scalar instructions. else if (IsEVEX_V128) { // Search for opcode in the EvexToVex128 table. diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 85aa944c46532..49ff90644e45e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -210,8 +210,8 @@ getX86SSEConditionCode(CmpInst::Predicate Predicate) { case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH; case CmpInst::FCMP_UGT: CC = 6; break; case CmpInst::FCMP_ORD: CC = 7; break; - case CmpInst::FCMP_UEQ: - case CmpInst::FCMP_ONE: CC = 8; break; + case CmpInst::FCMP_UEQ: CC = 8; break; + case CmpInst::FCMP_ONE: CC = 12; break; } return std::make_pair(CC, NeedSwap); @@ -2178,7 +2178,7 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { unsigned CC; bool NeedSwap; std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); - if (CC > 7) + if (CC > 7 && !Subtarget->hasAVX()) return false; if (NeedSwap) @@ -3970,7 +3970,7 @@ unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode, Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); - Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 3); + Op3 = constrainOperandRegClass(II, Op3, II.getNumDefs() + 3); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 382c71ead5cbb..988f2967401b0 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -924,6 +924,7 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, Notes: - .seh directives are emitted only for Windows 64 ABI + - .cv_fpo directives are emitted on win32 when emitting CodeView - .cfi directives are emitted for all other ABIs - for 32-bit code, substitute %e?? registers for %r?? */ @@ -949,7 +950,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, bool HasFP = hasFP(MF); bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv()); bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); - bool NeedsWinCFI = IsWin64Prologue && Fn->needsUnwindTableEntry(); + bool NeedsWin64CFI = IsWin64Prologue && Fn->needsUnwindTableEntry(); + // FIXME: Emit FPO data for EH funclets. + bool NeedsWinFPO = + !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); + bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; bool NeedsDwarfCFI = !IsWin64Prologue && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); unsigned FramePtr = TRI->getFrameRegister(MF); @@ -958,7 +963,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, ? getX86SubSuperRegister(FramePtr, 64) : FramePtr; unsigned BasePtr = TRI->getBaseRegister(); bool HasWinCFI = false; - + // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; @@ -1120,6 +1125,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister( nullptr, DwarfFramePtr)); } + + if (NeedsWinFPO) { + // .cv_fpo_setframe $FramePtr + HasWinCFI = true; + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) + .addImm(FramePtr) + .addImm(0) + .setMIFlag(MachineInstr::FrameSetup); + } } } else { assert(!IsFunclet && "funclets without FPs not yet implemented"); @@ -1155,8 +1169,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (NeedsWinCFI) { HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( - MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) + .addImm(Reg) + .setMIFlag(MachineInstr::FrameSetup); } } @@ -1295,6 +1310,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // If this is not a funclet, emit the CFI describing our frame pointer. if (NeedsWinCFI && !IsFunclet) { + assert(!NeedsWinFPO && "this setframe incompatible with FPO data"); HasWinCFI = true; BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) .addImm(FramePtr) @@ -1333,6 +1349,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, Offset += SEHFrameOffset; HasWinCFI = true; + assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data"); BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) .addImm(Reg) .addImm(Offset) @@ -1522,10 +1539,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo(); - MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); - Optional RetOpcode; - if (MBBI != MBB.end()) - RetOpcode = MBBI->getOpcode(); + MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator(); + MachineBasicBlock::iterator MBBI = Terminator; DebugLoc DL; if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); @@ -1536,38 +1551,21 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, Is64BitILP32 ? getX86SubSuperRegister(FramePtr, 64) : FramePtr; bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); - bool NeedsWinCFI = + bool NeedsWin64CFI = IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry(); bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI); - MachineBasicBlock *TargetMBB = nullptr; // Get the number of bytes to allocate from the FrameInfo. uint64_t StackSize = MFI.getStackSize(); uint64_t MaxAlign = calculateMaxStackAlign(MF); unsigned CSSize = X86FI->getCalleeSavedFrameSize(); + bool HasFP = hasFP(MF); uint64_t NumBytes = 0; - if (RetOpcode && *RetOpcode == X86::CATCHRET) { - // SEH shouldn't use catchret. - assert(!isAsynchronousEHPersonality( - classifyEHPersonality(MF.getFunction()->getPersonalityFn())) && - "SEH should not use CATCHRET"); - - NumBytes = getWinEHFuncletFrameSize(MF); - assert(hasFP(MF) && "EH funclets without FP not yet implemented"); - TargetMBB = MBBI->getOperand(0).getMBB(); - - // Pop EBP. - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), - MachineFramePtr) - .setMIFlag(MachineInstr::FrameDestroy); - } else if (RetOpcode && *RetOpcode == X86::CLEANUPRET) { + if (IsFunclet) { + assert(HasFP && "EH funclets without FP not yet implemented"); NumBytes = getWinEHFuncletFrameSize(MF); - assert(hasFP(MF) && "EH funclets without FP not yet implemented"); - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), - MachineFramePtr) - .setMIFlag(MachineInstr::FrameDestroy); - } else if (hasFP(MF)) { + } else if (HasFP) { // Calculate required stack adjustment. uint64_t FrameSize = StackSize - SlotSize; NumBytes = FrameSize - CSSize; @@ -1576,16 +1574,18 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // realigned. if (TRI->needsStackRealignment(MF) && !IsWin64Prologue) NumBytes = alignTo(FrameSize, MaxAlign); - - // Pop EBP. - BuildMI(MBB, MBBI, DL, - TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr) - .setMIFlag(MachineInstr::FrameDestroy); } else { NumBytes = StackSize - CSSize; } uint64_t SEHStackAllocAmt = NumBytes; + if (HasFP) { + // Pop EBP. + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), + MachineFramePtr) + .setMIFlag(MachineInstr::FrameDestroy); + } + MachineBasicBlock::iterator FirstCSPop = MBBI; // Skip the callee-saved pop instructions. while (MBBI != MBB.begin()) { @@ -1603,26 +1603,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } MBBI = FirstCSPop; - if (TargetMBB) { - // Fill EAX/RAX with the address of the target block. - unsigned ReturnReg = STI.is64Bit() ? X86::RAX : X86::EAX; - if (STI.is64Bit()) { - // LEA64r TargetMBB(%rip), %rax - BuildMI(MBB, FirstCSPop, DL, TII.get(X86::LEA64r), ReturnReg) - .addReg(X86::RIP) - .addImm(0) - .addReg(0) - .addMBB(TargetMBB) - .addReg(0); - } else { - // MOV32ri $TargetMBB, %eax - BuildMI(MBB, FirstCSPop, DL, TII.get(X86::MOV32ri), ReturnReg) - .addMBB(TargetMBB); - } - // Record that we've taken the address of TargetMBB and no longer just - // reference it in a terminator. - TargetMBB->setHasAddressTaken(); - } + if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) + emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator); if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); @@ -1674,19 +1656,17 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // into the epilogue. To cope with that, we insert an epilogue marker here, // then replace it with a 'nop' if it ends up immediately after a CALL in the // final emitted code. - if (NeedsWinCFI && MF.hasWinCFI()) + if (NeedsWin64CFI && MF.hasWinCFI()) BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); - if (!RetOpcode || !isTailCallOpcode(*RetOpcode)) { + if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) { // Add the return addr area delta back since we are not tail calling. int Offset = -1 * X86FI->getTCReturnAddrDelta(); assert(Offset >= 0 && "TCDelta should never be positive"); if (Offset) { - MBBI = MBB.getFirstTerminator(); - // Check for possible merge with preceding ADD instruction. - Offset += mergeSPUpdates(MBB, MBBI, true); - emitSPUpdate(MBB, MBBI, Offset, /*InEpilogue=*/true); + Offset += mergeSPUpdates(MBB, Terminator, true); + emitSPUpdate(MBB, Terminator, Offset, /*InEpilogue=*/true); } } } @@ -1997,6 +1977,36 @@ bool X86FrameLowering::spillCalleeSavedRegisters( return true; } +void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineInstr *CatchRet) const { + // SEH shouldn't use catchret. + assert(!isAsynchronousEHPersonality(classifyEHPersonality( + MBB.getParent()->getFunction()->getPersonalityFn())) && + "SEH should not use CATCHRET"); + DebugLoc DL = CatchRet->getDebugLoc(); + MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB(); + + // Fill EAX/RAX with the address of the target block. + if (STI.is64Bit()) { + // LEA64r CatchRetTarget(%rip), %rax + BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX) + .addReg(X86::RIP) + .addImm(0) + .addReg(0) + .addMBB(CatchRetTarget) + .addReg(0); + } else { + // MOV32ri $CatchRetTarget, %eax + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addMBB(CatchRetTarget); + } + + // Record that we've taken the address of CatchRetTarget and no longer just + // reference it in a terminator. + CatchRetTarget->setHasAddressTaken(); +} + bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, std::vector &CSI, diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 773baff327d65..38ac96e16d4e0 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -157,15 +157,6 @@ class X86FrameLowering : public TargetFrameLowering { void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl &ObjectsToAllocate) const override; - /// convertArgMovsToPushes - This method tries to convert a call sequence - /// that uses sub and mov instructions to put the argument onto the stack - /// into a series of pushes. - /// Returns true if the transformation succeeded, false if not. - bool convertArgMovsToPushes(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - uint64_t Amount) const; - /// Wraps up getting a CFI index and building a MachineInstr for it. void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst) const; @@ -214,6 +205,11 @@ class X86FrameLowering : public TargetFrameLowering { unsigned getPSPSlotOffsetFromSP(const MachineFunction &MF) const; unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const; + + /// Materialize the catchret target MBB in RAX. + void emitCatchRetReturnValue(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineInstr *CatchRet) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 250d250af353f..3aa3244a70685 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -371,8 +371,7 @@ namespace { assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"); uint64_t Index = N->getConstantOperandVal(1); MVT VecVT = N->getOperand(0).getSimpleValueType(); - unsigned NumElemsPerChunk = VecWidth / VecVT.getScalarSizeInBits(); - return getI8Imm(Index / NumElemsPerChunk, DL); + return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); } SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth, @@ -380,8 +379,7 @@ namespace { assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"); uint64_t Index = N->getConstantOperandVal(2); MVT VecVT = N->getSimpleValueType(0); - unsigned NumElemsPerChunk = VecWidth / VecVT.getScalarSizeInBits(); - return getI8Imm(Index / NumElemsPerChunk, DL); + return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); } /// Return an SDNode that returns the value of the global base register. @@ -2594,7 +2592,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { unsigned LoReg; switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); - case MVT::i8: LoReg = X86::AL; Opc = X86::MUL8r; break; + // MVT::i8 is handled by X86ISD::UMUL8. case MVT::i16: LoReg = X86::AX; Opc = X86::MUL16r; break; case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break; case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break; @@ -3025,7 +3023,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) { } // For example, "testl %eax, $32776" to "testw %ax, $32776". - if (isUInt<16>(Mask) && N0.getValueType() != MVT::i16 && + // NOTE: We only want to form TESTW instructions if optimizing for + // min size. Otherwise we only save one byte and possibly get a length + // changing prefix penalty in the decoders. + if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 && (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16); SDValue Reg = N0.getOperand(0); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e78f70cc52cb9..c0215d2632004 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -188,6 +188,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); + // Integer absolute. + if (Subtarget.hasCMov()) { + setOperationAction(ISD::ABS , MVT::i16 , Custom); + setOperationAction(ISD::ABS , MVT::i32 , Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS , MVT::i64 , Custom); + } + // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this // operation. setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); @@ -425,12 +433,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); - // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support - // SjLj exception handling but a light-weight setjmp/longjmp replacement to - // support continuation, user-level threading, and etc.. As a result, no - // other SjLj exception interfaces are implemented and please don't build - // your own exception handling based on them. - // LLVM/Clang supports zero-cost DWARF exception handling. + // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since + // LLVM/Clang supports zero-cost DWARF and SEH exception handling. setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); @@ -1144,10 +1148,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (MVT VT : MVT::fp_vector_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal); - for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) { + for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); - setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); @@ -1222,8 +1225,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MSTORE, VT, Custom); } } - setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); - setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); if (Subtarget.hasDQI()) { for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) { @@ -1251,20 +1252,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom); - - // FIXME. This commands are available on SSE/AVX2, add relevant patterns. - setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal); } + setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); @@ -1439,6 +1429,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v64i8, Custom); setOperationAction(ISD::MULHS, MVT::v32i16, Legal); setOperationAction(ISD::MULHU, MVT::v32i16, Legal); + setOperationAction(ISD::MULHS, MVT::v64i8, Custom); + setOperationAction(ISD::MULHU, MVT::v64i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom); @@ -1519,13 +1511,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64); } - for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) { + for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); - if (Subtarget.hasVLX()) { - // FIXME. This commands are available on SSE/AVX2, add relevant patterns. - setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal); - setLoadExtAction(ExtType, MVT::v8i16, MVT::v8i8, Legal); - } } } @@ -1598,6 +1585,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); + setLibcallName(RTLIB::MUL_I128, nullptr); } // Combine sin / cos into one node or libcall if possible. @@ -3256,9 +3244,9 @@ SDValue X86TargetLowering::LowerFormalArguments( if (CallConv == CallingConv::X86_RegCall || Fn->hasFnAttribute("no_caller_saved_registers")) { - const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end())) - MF.getRegInfo().disableCalleeSavedRegister(Pair.first); + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (std::pair Pair : MRI.liveins()) + MRI.disableCalleeSavedRegister(Pair.first); } return Chain; @@ -5349,6 +5337,20 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, return false; }; + // Handle UNDEFs. + if (Op.isUndef()) { + APInt UndefSrcElts = APInt::getAllOnesValue(NumElts); + SmallVector SrcEltBits(NumElts, APInt(EltSizeInBits, 0)); + return CastBitData(UndefSrcElts, SrcEltBits); + } + + // Extract scalar constant bits. + if (auto *Cst = dyn_cast(Op)) { + APInt UndefSrcElts = APInt::getNullValue(1); + SmallVector SrcEltBits(1, Cst->getAPIntValue()); + return CastBitData(UndefSrcElts, SrcEltBits); + } + // Extract constant bits from build vector. if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); @@ -5443,6 +5445,24 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode, return true; } +/// Create a shuffle mask that matches the PACKSS/PACKUS truncation. +/// Note: This ignores saturation, so inputs must be checked first. +static void createPackShuffleMask(MVT VT, SmallVectorImpl &Mask, + bool Unary) { + assert(Mask.empty() && "Expected an empty shuffle mask vector"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits(); + unsigned Offset = Unary ? 0 : NumElts; + + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2) + Mask.push_back(Elt + (Lane * NumEltsPerLane)); + for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2) + Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset); + } +} + /// Calculates the shuffle mask corresponding to the target-specific opcode. /// If the mask could be calculated, returns it in \p Mask, returns the shuffle /// operands in \p Ops, and returns true. @@ -5463,21 +5483,28 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, bool IsFakeUnary = false; switch(N->getOpcode()) { case X86ISD::BLENDI: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); DecodeBLENDMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::SHUFP: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); DecodeSHUFPMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::INSERTPS: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); DecodeINSERTPSMask(cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::EXTRQI: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); if (isa(N->getOperand(1)) && isa(N->getOperand(2))) { int BitLen = N->getConstantOperandVal(1); @@ -5487,6 +5514,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, } break; case X86ISD::INSERTQI: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); if (isa(N->getOperand(2)) && isa(N->getOperand(3))) { int BitLen = N->getConstantOperandVal(2); @@ -5496,23 +5525,33 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, } break; case X86ISD::UNPCKH: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); DecodeUNPCKHMask(VT, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::UNPCKL: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); DecodeUNPCKLMask(VT, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVHLPS: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); DecodeMOVHLPSMask(NumElems, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVLHPS: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); DecodeMOVLHPSMask(NumElems, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::PALIGNR: assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); DecodePALIGNRMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); @@ -5521,33 +5560,39 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, break; case X86ISD::VSHLDQ: assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodePSLLDQMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::VSRLDQ: assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands() - 1); DecodePSRLDQMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFD: case X86ISD::VPERMILPI: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); DecodePSHUFMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFHW: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); DecodePSHUFHWMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::PSHUFLW: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); DecodePSHUFLWMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::VZEXT_MOVL: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); DecodeZeroMoveLowMask(VT, Mask); IsUnary = true; break; @@ -5571,6 +5616,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return false; } case X86ISD::VPERMILPV: { + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); IsUnary = true; SDValue MaskNode = N->getOperand(1); unsigned MaskEltSize = VT.getScalarSizeInBits(); @@ -5586,6 +5632,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return false; } case X86ISD::PSHUFB: { + assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); IsUnary = true; SDValue MaskNode = N->getOperand(1); SmallVector RawMask; @@ -5600,28 +5649,36 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return false; } case X86ISD::VPERMI: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERMMask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = true; break; case X86ISD::MOVSS: case X86ISD::MOVSD: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask); break; case X86ISD::VPERM2X128: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); ImmN = N->getOperand(N->getNumOperands()-1); DecodeVPERM2X128Mask(VT, cast(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; case X86ISD::MOVSLDUP: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); DecodeMOVSLDUPMask(VT, Mask); IsUnary = true; break; case X86ISD::MOVSHDUP: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); DecodeMOVSHDUPMask(VT, Mask); IsUnary = true; break; case X86ISD::MOVDDUP: + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); DecodeMOVDDUPMask(VT, Mask); IsUnary = true; break; @@ -5630,6 +5687,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, // Not yet implemented return false; case X86ISD::VPERMIL2: { + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); unsigned MaskEltSize = VT.getScalarSizeInBits(); SDValue MaskNode = N->getOperand(2); @@ -5649,6 +5708,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return false; } case X86ISD::VPPERM: { + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); SDValue MaskNode = N->getOperand(2); SmallVector RawMask; @@ -5663,6 +5724,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return false; } case X86ISD::VPERMV: { + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); IsUnary = true; // Unlike most shuffle nodes, VPERMV's mask operand is operand 0. Ops.push_back(N->getOperand(1)); @@ -5680,6 +5742,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return false; } case X86ISD::VPERMV3: { + assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(2).getValueType() == VT && "Unexpected value type"); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2); // Unlike most shuffle nodes, VPERMV3's mask operand is the middle one. Ops.push_back(N->getOperand(0)); @@ -5693,6 +5757,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, return false; } case X86ISD::VPERMIV3: { + assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); + assert(N->getOperand(2).getValueType() == VT && "Unexpected value type"); IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2); // Unlike most shuffle nodes, VPERMIV3's mask operand is the first one. Ops.push_back(N->getOperand(1)); @@ -5865,19 +5931,13 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, SDValue N0 = N.getOperand(0); SDValue SrcExtract; - if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - N0.getOperand(0).getValueType() == VT) { + if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getValueType() == VT) || + (N0.getOpcode() == X86ISD::PEXTRW && + N0.getOperand(0).getValueType() == MVT::v8i16) || + (N0.getOpcode() == X86ISD::PEXTRB && + N0.getOperand(0).getValueType() == MVT::v16i8)) { SrcExtract = N0; - } else if (N0.getOpcode() == ISD::AssertZext && - N0.getOperand(0).getOpcode() == X86ISD::PEXTRW && - cast(N0.getOperand(1))->getVT() == MVT::i16) { - SrcExtract = N0.getOperand(0); - assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16); - } else if (N0.getOpcode() == ISD::AssertZext && - N0.getOperand(0).getOpcode() == X86ISD::PEXTRB && - cast(N0.getOperand(1))->getVT() == MVT::i8) { - SrcExtract = N0.getOperand(0); - assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8); } if (!SrcExtract || !isa(SrcExtract.getOperand(1))) @@ -5913,16 +5973,15 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, return true; } - // Attempt to recognise a PINSR*(ASSERTZEXT(PEXTR*)) shuffle pattern. + // Attempt to recognise a PINSR*(PEXTR*) shuffle pattern. // TODO: Expand this to support INSERT_VECTOR_ELT/etc. unsigned ExOp = (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW); - if (InScl.getOpcode() != ISD::AssertZext || - InScl.getOperand(0).getOpcode() != ExOp) + if (InScl.getOpcode() != ExOp) return false; - SDValue ExVec = InScl.getOperand(0).getOperand(0); - uint64_t ExIdx = InScl.getOperand(0).getConstantOperandVal(1); + SDValue ExVec = InScl.getOperand(0); + uint64_t ExIdx = InScl.getConstantOperandVal(1); assert(ExIdx < NumElts && "Illegal extraction index"); Ops.push_back(InVec); Ops.push_back(ExVec); @@ -5930,17 +5989,34 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, Mask.push_back(i == InIdx ? NumElts + ExIdx : i); return true; } - case X86ISD::PACKSS: { + case X86ISD::PACKSS: + case X86ISD::PACKUS: { + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) && + N1.getValueType().getVectorNumElements() == (NumElts / 2) && + "Unexpected input value type"); + // If we know input saturation won't happen we can treat this // as a truncation shuffle. - if (DAG.ComputeNumSignBits(N.getOperand(0)) <= NumBitsPerElt || - DAG.ComputeNumSignBits(N.getOperand(1)) <= NumBitsPerElt) - return false; + if (Opcode == X86ISD::PACKSS) { + if ((!N0.isUndef() && DAG.ComputeNumSignBits(N0) <= NumBitsPerElt) || + (!N1.isUndef() && DAG.ComputeNumSignBits(N1) <= NumBitsPerElt)) + return false; + } else { + APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt); + if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask)) || + (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask))) + return false; + } - Ops.push_back(N.getOperand(0)); - Ops.push_back(N.getOperand(1)); - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(i * 2); + bool IsUnary = (N0 == N1); + + Ops.push_back(N0); + if (!IsUnary) + Ops.push_back(N1); + + createPackShuffleMask(VT, Mask, IsUnary); return true; } case X86ISD::VSHLI: @@ -5999,6 +6075,14 @@ static void resolveTargetShuffleInputsAndMask(SmallVectorImpl &Inputs, for (int i = 0, e = Inputs.size(); i < e; ++i) { int lo = UsedInputs.size() * MaskWidth; int hi = lo + MaskWidth; + + // Strip UNDEF input usage. + if (Inputs[i].isUndef()) + for (int &M : Mask) + if ((lo <= M) && (M < hi)) + M = SM_SentinelUndef; + + // Check for unused inputs. if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) { UsedInputs.push_back(Inputs[i]); continue; @@ -6096,6 +6180,49 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, return SDValue(); } +// Use PINSRB/PINSRW/PINSRD to create a build vector. +static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros, + unsigned NumNonZero, unsigned NumZero, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + MVT VT = Op.getSimpleValueType(); + unsigned NumElts = VT.getVectorNumElements(); + assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) || + ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && + "Illegal vector insertion"); + + SDLoc dl(Op); + SDValue V; + bool First = true; + + for (unsigned i = 0; i < NumElts; ++i) { + bool IsNonZero = (NonZeros & (1 << i)) != 0; + if (!IsNonZero) + continue; + + // If the build vector contains zeros or our first insertion is not the + // first index then insert into zero vector to break any register + // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL. + if (First) { + First = false; + if (NumZero || 0 != i) + V = getZeroVector(VT, Subtarget, DAG, dl); + else { + assert(0 == i && "Expected insertion into zero-index"); + V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); + V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); + V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); + V = DAG.getBitcast(VT, V); + continue; + } + } + V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V, Op.getOperand(i), + DAG.getIntPtrConstant(i, dl)); + } + + return V; +} + /// Custom lower build_vector of v16i8. static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, @@ -6104,39 +6231,15 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, if (NumNonZero > 8 && !Subtarget.hasSSE41()) return SDValue(); + // SSE4.1 - use PINSRB to insert each byte directly. + if (Subtarget.hasSSE41()) + return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG, + Subtarget); + SDLoc dl(Op); SDValue V; bool First = true; - // SSE4.1 - use PINSRB to insert each byte directly. - if (Subtarget.hasSSE41()) { - for (unsigned i = 0; i < 16; ++i) { - bool IsNonZero = (NonZeros & (1 << i)) != 0; - if (IsNonZero) { - // If the build vector contains zeros or our first insertion is not the - // first index then insert into zero vector to break any register - // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL. - if (First) { - First = false; - if (NumZero || 0 != i) - V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl); - else { - assert(0 == i && "Expected insertion into zero-index"); - V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); - V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); - V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); - V = DAG.getBitcast(MVT::v16i8, V); - continue; - } - } - V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V, - Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); - } - } - - return V; - } - // Pre-SSE4.1 - merge byte pairs and insert with PINSRW. for (unsigned i = 0; i < 16; ++i) { bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; @@ -6192,34 +6295,9 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, if (NumNonZero > 4 && !Subtarget.hasSSE41()) return SDValue(); - SDLoc dl(Op); - SDValue V; - bool First = true; - for (unsigned i = 0; i < 8; ++i) { - bool IsNonZero = (NonZeros & (1 << i)) != 0; - if (IsNonZero) { - // If the build vector contains zeros or our first insertion is not the - // first index then insert into zero vector to break any register - // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL. - if (First) { - First = false; - if (NumZero || 0 != i) - V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); - else { - assert(0 == i && "Expected insertion into zero-index"); - V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); - V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); - V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V); - V = DAG.getBitcast(MVT::v8i16, V); - continue; - } - } - V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, - Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); - } - } - - return V; + // Use PINSRW to insert each byte directly. + return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG, + Subtarget); } /// Custom lower build_vector of v4i32 or v4f32. @@ -6489,14 +6567,20 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, } } - auto CreateLoad = [&DAG, &DL](EVT VT, LoadSDNode *LDBase) { + SmallVector Loads; + for (int i = FirstLoadedElt; i <= LastLoadedElt; ++i) + if (LoadMask[i]) + Loads.push_back(cast(peekThroughBitcasts(Elts[i]))); + + auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) { auto MMOFlags = LDBase->getMemOperand()->getFlags(); assert(!(MMOFlags & MachineMemOperand::MOVolatile) && "Cannot merge volatile loads."); SDValue NewLd = DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags); - DAG.makeEquivalentMemoryOrdering(LDBase, NewLd); + for (auto *LD : Loads) + DAG.makeEquivalentMemoryOrdering(LD, NewLd); return NewLd; }; @@ -6561,7 +6645,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef Elts, LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, false/*WriteMem*/); - DAG.makeEquivalentMemoryOrdering(LDBase, ResNode); + for (auto *LD : Loads) + DAG.makeEquivalentMemoryOrdering(LD, ResNode); return DAG.getBitcast(VT, ResNode); } } @@ -7966,10 +8051,10 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, // If this node promotes - by concatenating zeroes - the type of the result // of a node with instruction that zeroes all upper (irrelevant) bits of the // output register, mark it as legal and catch the pattern in instruction - // selection to avoid emitting extra insturctions (for zeroing upper bits). + // selection to avoid emitting extra instructions (for zeroing upper bits). if (SDValue Promoted = isTypePromotionOfi1ZeroUpBits(Op)) { - SDValue ZeroC = DAG.getConstant(0, dl, MVT::i64); - SDValue AllZeros = DAG.getSplatBuildVector(ResVT, dl, ZeroC); + SDValue ZeroC = DAG.getIntPtrConstant(0, dl); + SDValue AllZeros = getZeroVector(ResVT, Subtarget, DAG, dl); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, AllZeros, Promoted, ZeroC); } @@ -8621,6 +8706,51 @@ static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT, return SDValue(); } +// X86 has dedicated pack instructions that can handle specific truncation +// operations: PACKSS and PACKUS. +static SDValue lowerVectorShuffleWithPACK(const SDLoc &DL, MVT VT, + ArrayRef Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned BitSize = VT.getScalarSizeInBits(); + MVT PackSVT = MVT::getIntegerVT(BitSize * 2); + MVT PackVT = MVT::getVectorVT(PackSVT, NumElts / 2); + + auto LowerWithPACK = [&](SDValue N1, SDValue N2) { + SDValue VV1 = DAG.getBitcast(PackVT, N1); + SDValue VV2 = DAG.getBitcast(PackVT, N2); + if ((N1.isUndef() || DAG.ComputeNumSignBits(VV1) > BitSize) && + (N2.isUndef() || DAG.ComputeNumSignBits(VV2) > BitSize)) + return DAG.getNode(X86ISD::PACKSS, DL, VT, VV1, VV2); + + if (Subtarget.hasSSE41() || PackSVT == MVT::i16) { + APInt ZeroMask = APInt::getHighBitsSet(BitSize * 2, BitSize); + if ((N1.isUndef() || DAG.MaskedValueIsZero(VV1, ZeroMask)) && + (N2.isUndef() || DAG.MaskedValueIsZero(VV2, ZeroMask))) + return DAG.getNode(X86ISD::PACKUS, DL, VT, VV1, VV2); + } + + return SDValue(); + }; + + // Try binary shuffle. + SmallVector BinaryMask; + createPackShuffleMask(VT, BinaryMask, false); + if (isShuffleEquivalent(V1, V2, Mask, BinaryMask)) + if (SDValue Pack = LowerWithPACK(V1, V2)) + return Pack; + + // Try unary shuffle. + SmallVector UnaryMask; + createPackShuffleMask(VT, UnaryMask, true); + if (isShuffleEquivalent(V1, V2, Mask, UnaryMask)) + if (SDValue Pack = LowerWithPACK(V1, V1)) + return Pack; + + return SDValue(); +} + /// \brief Try to emit a bitmask instruction for a shuffle. /// /// This handles cases where we can model a blend exactly as a bitmask due to @@ -9790,10 +9920,7 @@ static SDValue lowerVectorShuffleAsElementInsertion( V1Mask[V2Index] = -1; if (!isNoopShuffleMask(V1Mask)) return SDValue(); - // This is essentially a special case blend operation, but if we have - // general purpose blend operations, they are always faster. Bail and let - // the rest of the lowering handle these as blends. - if (Subtarget.hasSSE41()) + if (!VT.is128BitVector()) return SDValue(); // Otherwise, use MOVSD or MOVSS. @@ -9904,7 +10031,9 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, // With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise // we can only broadcast from a register with AVX2. unsigned NumElts = Mask.size(); - unsigned Opcode = VT == MVT::v2f64 ? X86ISD::MOVDDUP : X86ISD::VBROADCAST; + unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2()) + ? X86ISD::MOVDDUP + : X86ISD::VBROADCAST; bool BroadcastFromReg = (Opcode == X86ISD::MOVDDUP) || Subtarget.hasAVX2(); // Check that the mask is a broadcast. @@ -9990,7 +10119,9 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, // 32-bit targets need to load i64 as a f64 and then bitcast the result. if (!Subtarget.is64Bit() && VT.getScalarType() == MVT::i64) { BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements()); - Opcode = (BroadcastVT.is128BitVector() ? X86ISD::MOVDDUP : Opcode); + Opcode = (BroadcastVT.is128BitVector() && !Subtarget.hasAVX2()) + ? X86ISD::MOVDDUP + : Opcode; } // If we are broadcasting a load that is only used by the shuffle @@ -10416,26 +10547,6 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, assert(Mask[0] < 2 && "We sort V1 to be the first input."); assert(Mask[1] >= 2 && "We sort V2 to be the second input."); - // If we have a blend of two same-type PACKUS operations and the blend aligns - // with the low and high halves, we can just merge the PACKUS operations. - // This is particularly important as it lets us merge shuffles that this - // routine itself creates. - auto GetPackNode = [](SDValue V) { - V = peekThroughBitcasts(V); - return V.getOpcode() == X86ISD::PACKUS ? V : SDValue(); - }; - if (SDValue V1Pack = GetPackNode(V1)) - if (SDValue V2Pack = GetPackNode(V2)) { - EVT PackVT = V1Pack.getValueType(); - if (PackVT == V2Pack.getValueType()) - return DAG.getBitcast(MVT::v2i64, - DAG.getNode(X86ISD::PACKUS, DL, PackVT, - Mask[0] == 0 ? V1Pack.getOperand(0) - : V1Pack.getOperand(1), - Mask[1] == 2 ? V2Pack.getOperand(0) - : V2Pack.getOperand(1))); - } - // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -11360,6 +11471,11 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) return V; + // Use dedicated pack instructions for masks that match their pattern. + if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, + DAG, Subtarget)) + return V; + // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask, Subtarget, DAG)) @@ -11410,6 +11526,11 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) return V; + // Use dedicated pack instructions for masks that match their pattern. + if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, + Subtarget)) + return V; + // Try to use byte rotation instructions. if (SDValue Rotate = lowerVectorShuffleAsByteRotate( DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) @@ -11530,6 +11651,11 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return Rotate; + // Use dedicated pack instructions for masks that match their pattern. + if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG, + Subtarget)) + return V; + // Try to use a zext lowering. if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -13024,6 +13150,11 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG)) return V; + // Use dedicated pack instructions for masks that match their pattern. + if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG, + Subtarget)) + return V; + // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -13110,6 +13241,11 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG)) return V; + // Use dedicated pack instructions for masks that match their pattern. + if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG, + Subtarget)) + return V; + // Try to use shift instructions. if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -13972,16 +14108,16 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { ISD::isBuildVectorOfConstantSDNodes(Op.getOperand(2).getNode())) return SDValue(); - // If this VSELECT has a vector if i1 as a mask, it will be directly matched - // with patterns on the mask registers on AVX-512. - if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1) - return Op; - // Try to lower this to a blend-style vector shuffle. This can handle all // constant condition cases. if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG)) return BlendOp; + // If this VSELECT has a vector if i1 as a mask, it will be directly matched + // with patterns on the mask registers on AVX-512. + if (Op->getOperand(0).getValueType().getScalarSizeInBits() == 1) + return Op; + // Variable blends are only legal from SSE4.1 onward. if (!Subtarget.hasSSE41()) return SDValue(); @@ -14043,9 +14179,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { if (VT.getSizeInBits() == 8) { SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1)); - SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, - DAG.getValueType(VT)); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); } if (VT == MVT::f32) { @@ -14204,9 +14338,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, // Transform it so it match pextrw which produces a 32-bit result. SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1)); - SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, - DAG.getValueType(VT)); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); } if (Subtarget.hasSSE41()) @@ -14500,31 +14632,6 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget, OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt)); } -// Lower a node with an EXTRACT_SUBVECTOR opcode. This may result in -// a simple subregister reference or explicit instructions to grab -// upper bits of a vector. -static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { - SDLoc dl(Op); - SDValue In = Op.getOperand(0); - SDValue Idx = Op.getOperand(1); - MVT ResVT = Op.getSimpleValueType(); - - // When v1i1 is legal a scalarization of a vselect with a vXi1 Cond - // would result with: v1i1 = extract_subvector(vXi1, idx). - // Lower these into extract_vector_elt which is already selectable. - assert(ResVT == MVT::v1i1); - assert(Subtarget.hasAVX512() && - "Boolean EXTRACT_SUBVECTOR requires AVX512"); - - MVT EltVT = ResVT.getVectorElementType(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - MVT LegalVT = - (TLI.getTypeToTransformTo(*DAG.getContext(), EltVT)).getSimpleVT(); - SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalVT, In, Idx); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ResVT, Res); -} - // Lower a node with an INSERT_SUBVECTOR opcode. This may result in a // simple superregister reference or explicit instructions to insert // the upper bits of a vector. @@ -15836,7 +15943,7 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget, /// Helper to recursively truncate vector elements in half with PACKSS. /// It makes use of the fact that vector comparison results will be all-zeros -/// or all-ones to use (vXi8 PACKSS(vYi16, vYi16)) instead of matching types. +/// or all-ones to prevent the PACKSS from saturating the results. /// AVX2 (Int256) sub-targets require extra shuffling as the PACKSS operates /// within each 128-bit lane. static SDValue truncateVectorCompareWithPACKSS(EVT DstVT, SDValue In, @@ -15855,38 +15962,49 @@ static SDValue truncateVectorCompareWithPACKSS(EVT DstVT, SDValue In, // We only support vector truncation to 128bits or greater from a // 256bits or greater source. - if ((DstVT.getSizeInBits() % 128) != 0) - return SDValue(); - if ((SrcVT.getSizeInBits() % 256) != 0) + unsigned DstSizeInBits = DstVT.getSizeInBits(); + unsigned SrcSizeInBits = SrcVT.getSizeInBits(); + if ((DstSizeInBits % 128) != 0 || (SrcSizeInBits % 256) != 0) return SDValue(); + LLVMContext &Ctx = *DAG.getContext(); unsigned NumElems = SrcVT.getVectorNumElements(); assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation"); - assert(SrcVT.getSizeInBits() > DstVT.getSizeInBits() && "Illegal truncation"); + assert(SrcSizeInBits > DstSizeInBits && "Illegal truncation"); - EVT PackedSVT = - EVT::getIntegerVT(*DAG.getContext(), SrcVT.getScalarSizeInBits() / 2); + EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2); // Extract lower/upper subvectors. unsigned NumSubElts = NumElems / 2; - unsigned SrcSizeInBits = SrcVT.getSizeInBits(); SDValue Lo = extractSubVector(In, 0 * NumSubElts, DAG, DL, SrcSizeInBits / 2); SDValue Hi = extractSubVector(In, 1 * NumSubElts, DAG, DL, SrcSizeInBits / 2); + // Pack to the largest type possible: + // vXi64/vXi32 -> PACKSSDW and vXi16 -> PACKSSWB. + EVT InVT = MVT::i16, OutVT = MVT::i8; + if (DstVT.getScalarSizeInBits() > 8) { + InVT = MVT::i32; + OutVT = MVT::i16; + } + + unsigned SubSizeInBits = SrcSizeInBits / 2; + InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits()); + OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits()); + // 256bit -> 128bit truncate - PACKSS lower/upper 128-bit subvectors. if (SrcVT.is256BitVector()) { - Lo = DAG.getBitcast(MVT::v8i16, Lo); - Hi = DAG.getBitcast(MVT::v8i16, Hi); - SDValue Res = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, Lo, Hi); + Lo = DAG.getBitcast(InVT, Lo); + Hi = DAG.getBitcast(InVT, Hi); + SDValue Res = DAG.getNode(X86ISD::PACKSS, DL, OutVT, Lo, Hi); return DAG.getBitcast(DstVT, Res); } // AVX2: 512bit -> 256bit truncate - PACKSS lower/upper 256-bit subvectors. // AVX2: 512bit -> 128bit truncate - PACKSS(PACKSS, PACKSS). if (SrcVT.is512BitVector() && Subtarget.hasInt256()) { - Lo = DAG.getBitcast(MVT::v16i16, Lo); - Hi = DAG.getBitcast(MVT::v16i16, Hi); - SDValue Res = DAG.getNode(X86ISD::PACKSS, DL, MVT::v32i8, Lo, Hi); + Lo = DAG.getBitcast(InVT, Lo); + Hi = DAG.getBitcast(InVT, Hi); + SDValue Res = DAG.getNode(X86ISD::PACKSS, DL, OutVT, Lo, Hi); // 256-bit PACKSS(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)), // so we need to shuffle to get ((LO0,HI0),(LO1,HI1)). @@ -15897,18 +16015,18 @@ static SDValue truncateVectorCompareWithPACKSS(EVT DstVT, SDValue In, return DAG.getBitcast(DstVT, Res); // If 512bit -> 128bit truncate another stage. - EVT PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems); + EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); Res = DAG.getBitcast(PackedVT, Res); return truncateVectorCompareWithPACKSS(DstVT, Res, DL, DAG, Subtarget); } // Recursively pack lower/upper subvectors, concat result and pack again. - assert(SrcVT.getSizeInBits() >= 512 && "Expected 512-bit vector or greater"); - EVT PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems / 2); + assert(SrcSizeInBits >= 512 && "Expected 512-bit vector or greater"); + EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumSubElts); Lo = truncateVectorCompareWithPACKSS(PackedVT, Lo, DL, DAG, Subtarget); Hi = truncateVectorCompareWithPACKSS(PackedVT, Hi, DL, DAG, Subtarget); - PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems); + PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi); return truncateVectorCompareWithPACKSS(DstVT, Res, DL, DAG, Subtarget); } @@ -15957,14 +16075,6 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { SDValue In = Op.getOperand(0); MVT InVT = In.getSimpleValueType(); - if (VT == MVT::i1) { - assert((InVT.isInteger() && (InVT.getSizeInBits() <= 64)) && - "Invalid scalar TRUNCATE operation"); - if (InVT.getSizeInBits() >= 32) - return SDValue(); - In = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, In); - return DAG.getNode(ISD::TRUNCATE, DL, VT, In); - } assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && "Invalid TRUNCATE operation"); @@ -16500,8 +16610,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, UI->getOpcode() != ISD::STORE) goto default_case; - if (ConstantSDNode *C = - dyn_cast(ArithOp.getOperand(1))) { + if (auto *C = dyn_cast(ArithOp.getOperand(1))) { // An add of one will be selected as an INC. if (C->isOne() && (!Subtarget.slowIncDec() || @@ -16718,8 +16827,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, } // Use SUB instead of CMP to enable CSE between SUB and CMP. SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32); - SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, - Op0, Op1); + SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1); return SDValue(Sub.getNode(), 1); } return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); @@ -16926,8 +17034,8 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, /// Turns an ISD::CondCode into a value suitable for SSE floating-point mask /// CMPs. -static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, - SDValue &Op1) { +static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, + SDValue &Op1) { unsigned SSECC; bool Swap = false; @@ -16960,8 +17068,8 @@ static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGT: SSECC = 6; break; case ISD::SETO: SSECC = 7; break; - case ISD::SETUEQ: - case ISD::SETONE: SSECC = 8; break; + case ISD::SETUEQ: SSECC = 8; break; + case ISD::SETONE: SSECC = 12; break; } if (Swap) std::swap(Op0, Op1); @@ -17141,11 +17249,9 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, // In the two cases not handled by SSE compare predicates (SETUEQ/SETONE), // emit two comparisons and a logic op to tie them together. - // TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is - // available. SDValue Cmp; unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1); - if (SSECC == 8) { + if (SSECC >= 8 && !Subtarget.hasAVX()) { // LLVM predicate is SETUEQ or SETONE. unsigned CC0, CC1; unsigned CombineOpc; @@ -17583,17 +17689,17 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { (Subtarget.hasSSE1() && VT == MVT::f32)) && VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1); - int SSECC = translateX86FSETCC( + unsigned SSECC = translateX86FSETCC( cast(Cond.getOperand(2))->get(), CondOp0, CondOp1); - if (SSECC != 8) { - if (Subtarget.hasAVX512()) { - SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, - CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); - assert(!VT.isVector() && "Not a scalar type?"); - return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); - } + if (Subtarget.hasAVX512()) { + SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, + CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); + assert(!VT.isVector() && "Not a scalar type?"); + return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); + } + if (SSECC < 8 || Subtarget.hasAVX()) { SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); @@ -17882,17 +17988,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (T1.getValueType() == T2.getValueType() && // Blacklist CopyFromReg to avoid partial register stalls. T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){ - SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue); - SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond); + SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1, + CC, Cond); return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); } } // X86ISD::CMOV means set the result (which is operand 1) to the RHS if // condition is true. - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = { Op2, Op1, CC, Cond }; - return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops); + return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops); } static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, @@ -17926,8 +18031,13 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, return SDValue(); MVT ExtVT = VT; - if (!VT.is512BitVector() && !Subtarget.hasVLX()) + if (!VT.is512BitVector() && !Subtarget.hasVLX()) { ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); + } else if (VTElt == MVT::i16 || VTElt == MVT::i8) { + // If we don't have BWI support we need to extend 8/16-bit to 32-bit. + // Otherwise we end up with vselects we can't handle. + ExtVT = MVT::getVectorVT(MVT::i32, NumElts); + } SDValue V; if (Subtarget.hasDQI()) { @@ -18342,6 +18452,12 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget, if (Ext == ISD::SEXTLOAD && RegSz >= 256) loadRegZize = 128; + // If we don't have BWI we won't be able to create the shuffle needed for + // v8i8->v8i64. + if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && + MemVT == MVT::v8i8) + loadRegZize = 128; + // Represent our vector as a sequence of elements which are the // largest scalar that we can load. EVT LoadUnitVecVT = EVT::getVectorVT( @@ -18408,6 +18524,13 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget &Subtarget, return Shuff; } + if (Ext == ISD::EXTLOAD && !Subtarget.hasBWI() && RegVT == MVT::v8i64 && + MemVT == MVT::v8i8) { + SDValue Sext = getExtendInVec(X86ISD::VZEXT, dl, RegVT, SlicedVec, DAG); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF); + return Sext; + } + // Redistribute the loaded elements into the different locations. SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i != NumElems; ++i) @@ -19270,8 +19393,8 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset); } -static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { // Helper to detect if the operand is CUR_DIRECTION rounding mode. auto isRoundModeCurDirection = [](SDValue Rnd) { if (!isa(Rnd)) @@ -20092,7 +20215,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget auto &Context = MF.getMMI().getContext(); MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + Twine(MF.getFunctionNumber())); - return DAG.getNode(X86ISD::Wrapper, dl, VT, DAG.getMCSymbol(S, PtrVT)); + return DAG.getNode(getGlobalWrapperKind(), dl, VT, + DAG.getMCSymbol(S, PtrVT)); } case Intrinsic::x86_seh_lsda: { @@ -20494,18 +20618,16 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, case RDSEED: case RDRAND: { // Emit the node with the right value type. - SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other); + SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32, MVT::Other); SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1. // Otherwise return the value from Rand, which is always 0, casted to i32. SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), DAG.getConstant(1, dl, Op->getValueType(1)), - DAG.getConstant(X86::COND_B, dl, MVT::i32), + DAG.getConstant(X86::COND_B, dl, MVT::i8), SDValue(Result.getNode(), 1) }; - SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, - DAG.getVTList(Op->getValueType(1), MVT::Glue), - Ops); + SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops); // Return { result, isValid, chain }. return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, @@ -21410,6 +21532,19 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) { + // Since X86 does not have CMOV for 8-bit integer, we don't convert + // 8-bit integer abs to NEG and CMOV. + SDLoc DL(Op); + SDValue N0 = Op.getOperand(0); + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), + DAG.getConstant(0, DL, VT), N0); + SDValue Ops[] = {N0, Neg, DAG.getConstant(X86::COND_GE, DL, MVT::i8), + SDValue(Neg.getNode(), 1)}; + return DAG.getNode(X86ISD::CMOV, DL, VT, Ops); + } + assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); @@ -21605,7 +21740,8 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, return Lower256IntArith(Op, DAG); // Only i8 vectors should need custom lowering after this. - assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256())) && + assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || + (VT == MVT::v64i8 && Subtarget.hasBWI())) && "Unsupported vector type"); // Lower v16i8/v32i8 as extension to v8i16/v16i16 vector pairs, multiply, @@ -21617,7 +21753,12 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, // and then ashr/lshr the upper bits down to the lower bits before multiply. unsigned Opcode = Op.getOpcode(); unsigned ExShift = (ISD::MULHU == Opcode ? ISD::SRL : ISD::SRA); - unsigned ExSSE41 = (ISD::MULHU == Opcode ? X86ISD::VZEXT : X86ISD::VSEXT); + unsigned ExAVX = (ISD::MULHU == Opcode ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); + + // For 512-bit vectors, split into 256-bit vectors to allow the + // sign-extension to occur. + if (VT == MVT::v64i8) + return Lower512IntArith(Op, DAG); // AVX2 implementations - extend xmm subvectors to ymm. if (Subtarget.hasInt256()) { @@ -21626,14 +21767,22 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, SDValue Hi = DAG.getIntPtrConstant(NumElems / 2, dl); if (VT == MVT::v32i8) { + if (Subtarget.hasBWI()) { + SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v32i16, A); + SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v32i16, B); + SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v32i16, ExA, ExB); + Mul = DAG.getNode(ISD::SRL, dl, MVT::v32i16, Mul, + DAG.getConstant(8, dl, MVT::v32i16)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); + } SDValue ALo = extract128BitVector(A, 0, DAG, dl); SDValue BLo = extract128BitVector(B, 0, DAG, dl); SDValue AHi = extract128BitVector(A, NumElems / 2, DAG, dl); SDValue BHi = extract128BitVector(B, NumElems / 2, DAG, dl); - ALo = DAG.getNode(ExSSE41, dl, MVT::v16i16, ALo); - BLo = DAG.getNode(ExSSE41, dl, MVT::v16i16, BLo); - AHi = DAG.getNode(ExSSE41, dl, MVT::v16i16, AHi); - BHi = DAG.getNode(ExSSE41, dl, MVT::v16i16, BHi); + ALo = DAG.getNode(ExAVX, dl, MVT::v16i16, ALo); + BLo = DAG.getNode(ExAVX, dl, MVT::v16i16, BLo); + AHi = DAG.getNode(ExAVX, dl, MVT::v16i16, AHi); + BHi = DAG.getNode(ExAVX, dl, MVT::v16i16, BHi); Lo = DAG.getNode(ISD::SRL, dl, MVT::v16i16, DAG.getNode(ISD::MUL, dl, MVT::v16i16, ALo, BLo), DAG.getConstant(8, dl, MVT::v16i16)); @@ -21651,19 +21800,23 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, DAG.getVectorShuffle(MVT::v16i16, dl, Lo, Hi, HiMask)); } - SDValue ExA = getExtendInVec(ExSSE41, dl, MVT::v16i16, A, DAG); - SDValue ExB = getExtendInVec(ExSSE41, dl, MVT::v16i16, B, DAG); + SDValue ExA = DAG.getNode(ExAVX, dl, MVT::v16i16, A); + SDValue ExB = DAG.getNode(ExAVX, dl, MVT::v16i16, B); SDValue Mul = DAG.getNode(ISD::MUL, dl, MVT::v16i16, ExA, ExB); - SDValue MulH = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul, - DAG.getConstant(8, dl, MVT::v16i16)); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, MulH, Lo); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, MulH, Hi); + Mul = DAG.getNode(ISD::SRL, dl, MVT::v16i16, Mul, + DAG.getConstant(8, dl, MVT::v16i16)); + // If we have BWI we can use truncate instruction. + if (Subtarget.hasBWI()) + return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Lo); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i16, Mul, Hi); return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); } assert(VT == MVT::v16i8 && "Pre-AVX2 support only supports v16i8 multiplication"); MVT ExVT = MVT::v8i16; + unsigned ExSSE41 = (ISD::MULHU == Opcode ? X86ISD::VZEXT : X86ISD::VSEXT); // Extract the lo parts and zero/sign extend to i16. SDValue ALo, BLo; @@ -22029,9 +22182,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, } } - // Special case in 32-bit mode, where i64 is expanded into high and low parts. + // Check cases (mainly 32-bit) where i64 is expanded into high and low parts. // TODO: Replace constant extraction with getTargetConstantBitsFromNode. - if (!Subtarget.is64Bit() && !Subtarget.hasXOP() && + if (!Subtarget.hasXOP() && (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) || (Subtarget.hasAVX512() && VT == MVT::v8i64))) { @@ -22158,9 +22311,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, } } - // Special case in 32-bit mode, where i64 is expanded into high and low parts. - if (!Subtarget.is64Bit() && VT == MVT::v2i64 && - Amt.getOpcode() == ISD::BITCAST && + // Check cases (mainly 32-bit) where i64 is expanded into high and low parts. + if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST && Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { Amt = Amt.getOperand(0); unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() / @@ -22295,7 +22447,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, // the vector shift into four scalar shifts plus four pairs of vector // insert/extract. if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32)) { - unsigned TargetOpcode = X86ISD::MOVSS; + bool UseMOVSD = false; bool CanBeSimplified; // The splat value for the first packed shift (the 'X' from the example). SDValue Amt1 = Amt->getOperand(0); @@ -22312,7 +22464,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, // Otherwise, check if we can still simplify this node using a MOVSD. CanBeSimplified = Amt1 == Amt->getOperand(1) && Amt->getOperand(2) == Amt->getOperand(3); - TargetOpcode = X86ISD::MOVSD; + UseMOVSD = true; Amt2 = Amt->getOperand(2); } } else { @@ -22323,7 +22475,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, CanBeSimplified = Amt2 == Amt->getOperand(i); if (!CanBeSimplified) { - TargetOpcode = X86ISD::MOVSD; + UseMOVSD = true; CanBeSimplified = true; Amt2 = Amt->getOperand(4); for (unsigned i=0; i != 4 && CanBeSimplified; ++i) @@ -22336,19 +22488,18 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, if (CanBeSimplified && isa(Amt1) && isa(Amt2)) { // Replace this node with two shifts followed by a MOVSS/MOVSD/PBLEND. - MVT CastVT = MVT::v4i32; SDValue Splat1 = DAG.getConstant(cast(Amt1)->getAPIntValue(), dl, VT); SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1); SDValue Splat2 = DAG.getConstant(cast(Amt2)->getAPIntValue(), dl, VT); SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2); - SDValue BitCast1 = DAG.getBitcast(CastVT, Shift1); - SDValue BitCast2 = DAG.getBitcast(CastVT, Shift2); - if (TargetOpcode == X86ISD::MOVSD) - return DAG.getBitcast(VT, DAG.getVectorShuffle(CastVT, dl, BitCast1, + SDValue BitCast1 = DAG.getBitcast(MVT::v4i32, Shift1); + SDValue BitCast2 = DAG.getBitcast(MVT::v4i32, Shift2); + if (UseMOVSD) + return DAG.getBitcast(VT, DAG.getVectorShuffle(MVT::v4i32, dl, BitCast1, BitCast2, {0, 1, 6, 7})); - return DAG.getBitcast(VT, DAG.getVectorShuffle(CastVT, dl, BitCast1, + return DAG.getBitcast(VT, DAG.getVectorShuffle(MVT::v4i32, dl, BitCast1, BitCast2, {0, 5, 6, 7})); } } @@ -23955,7 +24106,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VSELECT: return LowerVSELECT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); - case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); @@ -23991,7 +24141,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); - case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, Subtarget, DAG); + case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); @@ -24251,7 +24401,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } } case ISD::INTRINSIC_WO_CHAIN: { - if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG)) + if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG)) Results.push_back(V); return; } @@ -26533,8 +26683,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, SetupEntryBlockForSjLj(MI, BB, DispatchBB, FI); // Create the jump table and associated information - MachineJumpTableInfo *JTI = - MF->getOrCreateJumpTableInfo(getJumpTableEncoding()); + unsigned JTE = getJumpTableEncoding(); + MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE); unsigned MJTI = JTI->createJumpTableIndex(LPadList); const X86RegisterInfo &RI = TII->getRegisterInfo(); @@ -26557,7 +26707,8 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addRegMask(RI.getNoPreservedMask()); } - unsigned IReg = MRI->createVirtualRegister(&X86::GR32RegClass); + // IReg is used as an index in a memory operand and therefore can't be SP + unsigned IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass); addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI, Subtarget.is64Bit() ? 8 : 4); BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri)) @@ -26565,13 +26716,67 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .addImm(LPadList.size()); BuildMI(DispatchBB, DL, TII->get(X86::JAE_1)).addMBB(TrapBB); - BuildMI(DispContBB, DL, - TII->get(Subtarget.is64Bit() ? X86::JMP64m : X86::JMP32m)) - .addReg(0) - .addImm(Subtarget.is64Bit() ? 8 : 4) - .addReg(IReg) - .addJumpTableIndex(MJTI) - .addReg(0); + if (Subtarget.is64Bit()) { + unsigned BReg = MRI->createVirtualRegister(&X86::GR64RegClass); + unsigned IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); + + // leaq .LJTI0_0(%rip), BReg + BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg) + .addReg(X86::RIP) + .addImm(1) + .addReg(0) + .addJumpTableIndex(MJTI) + .addReg(0); + // movzx IReg64, IReg + BuildMI(DispContBB, DL, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64) + .addImm(0) + .addReg(IReg) + .addImm(X86::sub_32bit); + + switch (JTE) { + case MachineJumpTableInfo::EK_BlockAddress: + // jmpq *(BReg,IReg64,8) + BuildMI(DispContBB, DL, TII->get(X86::JMP64m)) + .addReg(BReg) + .addImm(8) + .addReg(IReg64) + .addImm(0) + .addReg(0); + break; + case MachineJumpTableInfo::EK_LabelDifference32: { + unsigned OReg = MRI->createVirtualRegister(&X86::GR32RegClass); + unsigned OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass); + unsigned TReg = MRI->createVirtualRegister(&X86::GR64RegClass); + + // movl (BReg,IReg64,4), OReg + BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg) + .addReg(BReg) + .addImm(4) + .addReg(IReg64) + .addImm(0) + .addReg(0); + // movsx OReg64, OReg + BuildMI(DispContBB, DL, TII->get(X86::MOVSX64rr32), OReg64).addReg(OReg); + // addq BReg, OReg64, TReg + BuildMI(DispContBB, DL, TII->get(X86::ADD64rr), TReg) + .addReg(OReg64) + .addReg(BReg); + // jmpq *TReg + BuildMI(DispContBB, DL, TII->get(X86::JMP64r)).addReg(TReg); + break; + } + default: + llvm_unreachable("Unexpected jump table encoding"); + } + } else { + // jmpl *.LJTI0_0(,IReg,4) + BuildMI(DispContBB, DL, TII->get(X86::JMP32m)) + .addReg(0) + .addImm(4) + .addReg(IReg) + .addJumpTableIndex(MJTI) + .addReg(0); + } // Add the jump table entries as successors to the MBB. SmallPtrSet SeenMBBs; @@ -26950,6 +27155,17 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.Zero.setBitsFrom(NumLoBits); break; } + case X86ISD::PEXTRB: + case X86ISD::PEXTRW: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(), + Op.getConstantOperandVal(1)); + DAG.computeKnownBits(Src, Known, DemandedElt, Depth + 1); + Known = Known.zextOrTrunc(BitWidth); + Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits()); + break; + } case X86ISD::VSHLI: case X86ISD::VSRLI: { if (auto *ShiftImm = dyn_cast(Op.getOperand(1))) { @@ -26990,6 +27206,19 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.Zero.setBitsFrom(InBitWidth); break; } + case X86ISD::CMOV: { + DAG.computeKnownBits(Op.getOperand(1), Known, Depth+1); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + KnownBits Known2; + DAG.computeKnownBits(Op.getOperand(0), Known2, Depth+1); + + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + } } } @@ -27010,6 +27239,16 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( return Tmp; } + case X86ISD::VTRUNC: { + SDValue Src = Op.getOperand(0); + unsigned NumSrcBits = Src.getScalarValueSizeInBits(); + assert(VTBits < NumSrcBits && "Illegal truncation input type"); + unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); + if (Tmp > (NumSrcBits - VTBits)) + return Tmp - (NumSrcBits - VTBits); + return 1; + } + case X86ISD::PACKSS: { // PACKSS is just a truncation if the sign bits extend to the packed size. // TODO: Add DemandedElts support. @@ -27023,20 +27262,24 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( } case X86ISD::VSHLI: { + // TODO: Add DemandedElts support. SDValue Src = Op.getOperand(0); - unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); APInt ShiftVal = cast(Op.getOperand(1))->getAPIntValue(); if (ShiftVal.uge(VTBits)) return VTBits; // Shifted all bits out --> zero. + unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); if (ShiftVal.uge(Tmp)) return 1; // Shifted all sign bits out --> unknown. return Tmp - ShiftVal.getZExtValue(); } case X86ISD::VSRAI: { + // TODO: Add DemandedElts support. SDValue Src = Op.getOperand(0); - unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); APInt ShiftVal = cast(Op.getOperand(1))->getAPIntValue(); + if (ShiftVal.uge(VTBits - 1)) + return VTBits; // Sign splat. + unsigned Tmp = DAG.ComputeNumSignBits(Src, Depth + 1); ShiftVal += Tmp; return ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); } @@ -27048,6 +27291,13 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( case X86ISD::VPCOMU: // Vector compares return zero/all-bits result values. return VTBits; + + case X86ISD::CMOV: { + unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp0 == 1) return 1; // Early out. + unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1); + return std::min(Tmp0, Tmp1); + } } // Fallback case. @@ -27130,7 +27380,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, // instructions are no slower than UNPCKLPD but has the option to // fold the input operand into even an unaligned memory load. if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) { - if (isTargetShuffleEquivalent(Mask, {0, 0})) { + if (!Subtarget.hasAVX2() && isTargetShuffleEquivalent(Mask, {0, 0})) { Shuffle = X86ISD::MOVDDUP; SrcVT = DstVT = MVT::v2f64; return true; @@ -27331,7 +27581,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, SDValue &V1, SDValue &V2, SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, - unsigned &Shuffle, MVT &ShuffleVT, + unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, bool IsUnary) { unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); @@ -27339,26 +27589,26 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) { V2 = V1; Shuffle = X86ISD::MOVLHPS; - ShuffleVT = MVT::v4f32; + SrcVT = DstVT = MVT::v4f32; return true; } if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) { V2 = V1; Shuffle = X86ISD::MOVHLPS; - ShuffleVT = MVT::v4f32; + SrcVT = DstVT = MVT::v4f32; return true; } if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) { std::swap(V1, V2); Shuffle = X86ISD::MOVSD; - ShuffleVT = MaskVT; + SrcVT = DstVT = MaskVT; return true; } if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) && (AllowFloatDomain || !Subtarget.hasSSE41())) { Shuffle = X86ISD::MOVSS; - ShuffleVT = MaskVT; + SrcVT = DstVT = MaskVT; return true; } } @@ -27371,9 +27621,9 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, (MaskVT.is512BitVector() && Subtarget.hasAVX512())) { if (matchVectorShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL, DAG, Subtarget)) { - ShuffleVT = MaskVT; - if (ShuffleVT.is256BitVector() && !Subtarget.hasAVX2()) - ShuffleVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64); + SrcVT = DstVT = MaskVT; + if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) + SrcVT = DstVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64); return true; } } @@ -27706,15 +27956,15 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT, - UnaryShuffle)) { + V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, + ShuffleVT, UnaryShuffle)) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements())) return SDValue(); // AVX512 Writemask clash. - V1 = DAG.getBitcast(ShuffleVT, V1); + V1 = DAG.getBitcast(ShuffleSrcVT, V1); DCI.AddToWorklist(V1.getNode()); - V2 = DAG.getBitcast(ShuffleVT, V2); + V2 = DAG.getBitcast(ShuffleSrcVT, V2); DCI.AddToWorklist(V2.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2); DCI.AddToWorklist(Res.getNode()); @@ -28142,18 +28392,15 @@ static SDValue combineX86ShufflesConstants(const SmallVectorImpl &Ops, /// would simplify under the threshold for PSHUFB formation because of /// combine-ordering. To fix this, we should do the redundant instruction /// combining in this recursive walk. -static bool combineX86ShufflesRecursively(ArrayRef SrcOps, - int SrcOpIndex, SDValue Root, - ArrayRef RootMask, - ArrayRef SrcNodes, - int Depth, bool HasVariableMask, - SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { +static SDValue combineX86ShufflesRecursively( + ArrayRef SrcOps, int SrcOpIndex, SDValue Root, + ArrayRef RootMask, ArrayRef SrcNodes, int Depth, + bool HasVariableMask, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { // Bound the depth of our recursive combine because this is ultimately // quadratic in nature. if (Depth > 8) - return false; + return SDValue(); // Directly rip through bitcasts to find the underlying operand. SDValue Op = SrcOps[SrcOpIndex]; @@ -28161,7 +28408,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, MVT VT = Op.getSimpleValueType(); if (!VT.isVector()) - return false; // Bail if we hit a non-vector. + return SDValue(); // Bail if we hit a non-vector. assert(Root.getSimpleValueType().isVector() && "Shuffles operate on vector types!"); @@ -28172,7 +28419,7 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, SmallVector OpMask; SmallVector OpInputs; if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, DAG)) - return false; + return SDValue(); assert(OpInputs.size() <= 2 && "Too many shuffle inputs"); SDValue Input0 = (OpInputs.size() > 0 ? OpInputs[0] : SDValue()); @@ -28281,18 +28528,15 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, } // Handle the all undef/zero cases early. - if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) { - DCI.CombineTo(Root.getNode(), DAG.getUNDEF(Root.getValueType())); - return true; - } - if (all_of(Mask, [](int Idx) { return Idx < 0; })) { - // TODO - should we handle the mixed zero/undef case as well? Just returning - // a zero mask will lose information on undef elements possibly reducing - // future combine possibilities. - DCI.CombineTo(Root.getNode(), getZeroVector(Root.getSimpleValueType(), - Subtarget, DAG, SDLoc(Root))); - return true; - } + if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) + return DAG.getUNDEF(Root.getValueType()); + + // TODO - should we handle the mixed zero/undef case as well? Just returning + // a zero mask will lose information on undef elements possibly reducing + // future combine possibilities. + if (all_of(Mask, [](int Idx) { return Idx < 0; })) + return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, + SDLoc(Root)); // Remove unused shuffle source ops. resolveTargetShuffleInputsAndMask(Ops, Mask); @@ -28311,21 +28555,19 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, for (int i = 0, e = Ops.size(); i < e; ++i) if (Ops[i].getNode()->hasOneUse() || SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode())) - if (combineX86ShufflesRecursively(Ops, i, Root, Mask, CombinedNodes, - Depth + 1, HasVariableMask, DAG, DCI, - Subtarget)) - return true; + if (SDValue Res = combineX86ShufflesRecursively( + Ops, i, Root, Mask, CombinedNodes, Depth + 1, HasVariableMask, + DAG, DCI, Subtarget)) + return Res; // Attempt to constant fold all of the constant source ops. if (SDValue Cst = combineX86ShufflesConstants( - Ops, Mask, Root, HasVariableMask, DAG, DCI, Subtarget)) { - DCI.CombineTo(Root.getNode(), Cst); - return true; - } + Ops, Mask, Root, HasVariableMask, DAG, DCI, Subtarget)) + return Cst; // We can only combine unary and binary shuffle mask cases. if (Ops.size() > 2) - return false; + return SDValue(); // Minor canonicalization of the accumulated shuffle mask to make it easier // to match below. All this does is detect masks with sequential pairs of @@ -28345,12 +28587,8 @@ static bool combineX86ShufflesRecursively(ArrayRef SrcOps, } // Finally, try to combine into a single shuffle instruction. - if (SDValue Res = combineX86ShuffleChain( - Ops, Root, Mask, Depth, HasVariableMask, DAG, DCI, Subtarget)) { - DCI.CombineTo(Root.getNode(), Res, /*AddTo*/ true); - return true; - } - return false; + return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG, + DCI, Subtarget); } /// \brief Get the PSHUF-style mask from PSHUF node. @@ -28604,8 +28842,37 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, SDLoc DL(N); MVT VT = N.getSimpleValueType(); SmallVector Mask; - unsigned Opcode = N.getOpcode(); + + // Combine binary shuffle of 2 similar 'Horizontal' instructions into a + // single instruction. + if (VT.getScalarSizeInBits() == 64 && + (Opcode == X86ISD::MOVSD || Opcode == X86ISD::UNPCKH || + Opcode == X86ISD::UNPCKL)) { + auto BC0 = peekThroughBitcasts(N.getOperand(0)); + auto BC1 = peekThroughBitcasts(N.getOperand(1)); + EVT VT0 = BC0.getValueType(); + EVT VT1 = BC1.getValueType(); + unsigned Opcode0 = BC0.getOpcode(); + unsigned Opcode1 = BC1.getOpcode(); + if (Opcode0 == Opcode1 && VT0 == VT1 && + (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD || + Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB || + Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS)) { + SDValue Lo, Hi; + if (Opcode == X86ISD::MOVSD) { + Lo = BC1.getOperand(0); + Hi = BC0.getOperand(1); + } else { + Lo = BC0.getOperand(Opcode == X86ISD::UNPCKH ? 1 : 0); + Hi = BC1.getOperand(Opcode == X86ISD::UNPCKH ? 1 : 0); + } + SDValue Horiz = DAG.getNode(Opcode0, DL, VT0, Lo, Hi); + DCI.AddToWorklist(Horiz.getNode()); + return DAG.getBitcast(VT, Horiz); + } + } + switch (Opcode) { case X86ISD::PSHUFD: case X86ISD::PSHUFLW: @@ -28614,17 +28881,6 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, assert(Mask.size() == 4); break; case X86ISD::UNPCKL: { - auto Op0 = N.getOperand(0); - auto Op1 = N.getOperand(1); - unsigned Opcode0 = Op0.getOpcode(); - unsigned Opcode1 = Op1.getOpcode(); - - // Combine X86ISD::UNPCKL with 2 X86ISD::FHADD inputs into a single - // X86ISD::FHADD. This is generated by UINT_TO_FP v2f64 scalarization. - // TODO: Add other horizontal operations as required. - if (VT == MVT::v2f64 && Opcode0 == Opcode1 && Opcode0 == X86ISD::FHADD) - return DAG.getNode(Opcode0, DL, VT, Op0.getOperand(0), Op1.getOperand(0)); - // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE // moves upper half elements into the lower half part. For example: @@ -28642,7 +28898,9 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, if (!VT.is128BitVector()) return SDValue(); - if (Op0.isUndef() && Opcode1 == ISD::VECTOR_SHUFFLE) { + auto Op0 = N.getOperand(0); + auto Op1 = N.getOperand(1); + if (Op0.isUndef() && Op1.getOpcode() == ISD::VECTOR_SHUFFLE) { ArrayRef Mask = cast(Op1.getNode())->getMask(); unsigned NumElts = VT.getVectorNumElements(); @@ -29157,10 +29415,12 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, // specific PSHUF instruction sequences into their minimal form so that we // can evaluate how many specialized shuffle instructions are involved in // a particular chain. - if (combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, DCI, - Subtarget)) - return SDValue(); // This routine will use CombineTo to replace N. + if (SDValue Res = combineX86ShufflesRecursively( + {Op}, 0, Op, {0}, {}, /*Depth*/ 1, + /*HasVarMask*/ false, DAG, DCI, Subtarget)) { + DCI.CombineTo(N, Res); + return SDValue(); + } } return SDValue(); @@ -29290,7 +29550,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, // v8i16 and v16i16. // For these two cases, we can shuffle the upper element bytes to a // consecutive sequence at the start of the vector and treat the results as - // v16i8 or v32i8, and for v61i8 this is the preferable solution. However, + // v16i8 or v32i8, and for v16i8 this is the preferable solution. However, // for v16i16 this is not the case, because the shuffle is expensive, so we // avoid sign-extending to this type entirely. // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as: @@ -29309,9 +29569,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, FPCastVT = MVT::v4f32; // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) // sign-extend to a 256-bit operation to avoid truncation. - if (N0->getOpcode() == ISD::SETCC && - N0->getOperand(0)->getValueType(0).is256BitVector() && - Subtarget.hasAVX()) { + if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() && + N0->getOperand(0)->getValueType(0).is256BitVector()) { SExtVT = MVT::v4i64; FPCastVT = MVT::v4f64; } @@ -29323,9 +29582,9 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over // 256-bit because the shuffle is cheaper than sign extending the result of // the compare. - if (N0->getOpcode() == ISD::SETCC && - N0->getOperand(0)->getValueType(0).is256BitVector() && - Subtarget.hasAVX()) { + if (N0->getOpcode() == ISD::SETCC && Subtarget.hasAVX() && + (N0->getOperand(0)->getValueType(0).is256BitVector() || + N0->getOperand(0)->getValueType(0).is512BitVector())) { SExtVT = MVT::v8i32; FPCastVT = MVT::v8f32; } @@ -29360,13 +29619,12 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, } if (SExtVT == MVT::v8i16) { - V = DAG.getBitcast(MVT::v16i8, V); - V = DAG.getVectorShuffle( - MVT::v16i8, DL, V, DAG.getUNDEF(MVT::v16i8), - {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1}); + assert(16 == DAG.ComputeNumSignBits(V) && "Expected all/none bit vector"); + V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V, + DAG.getUNDEF(MVT::v8i16)); } else assert(SExtVT.getScalarType() != MVT::i16 && - "Vectors of i16 must be shuffled"); + "Vectors of i16 must be packed"); if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE) V = DAG.getBitcast(FPCastVT, V); V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); @@ -29759,7 +30017,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, unsigned TypeSizeInBits = Type.getSizeInBits(); // Return the lowest TypeSizeInBits bits. MVT ResVT = MVT::getVectorVT(Type, SadVT.getSizeInBits() / TypeSizeInBits); - SAD = DAG.getNode(ISD::BITCAST, DL, ResVT, SAD); + SAD = DAG.getBitcast(ResVT, SAD); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Type, SAD, Extract->getOperand(1)); } @@ -29844,9 +30102,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, unsigned OpCode = (SrcVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB); SDValue ExtOp = DAG.getNode(OpCode, dl, MVT::i32, SrcOp, DAG.getIntPtrConstant(SrcIdx, dl)); - SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, ExtOp, - DAG.getValueType(SrcSVT)); - return DAG.getZExtOrTrunc(Assert, dl, VT); + return DAG.getZExtOrTrunc(ExtOp, dl, VT); } return SDValue(); @@ -30228,7 +30484,7 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG, case X86ISD::VALIGN: { if (EltVT != MVT::i32 && EltVT != MVT::i64) return false; - uint64_t Imm = cast(Op.getOperand(2))->getZExtValue(); + uint64_t Imm = Op.getConstantOperandVal(2); MVT OpEltVT = Op.getSimpleValueType().getVectorElementType(); unsigned ShiftAmt = Imm * OpEltVT.getSizeInBits(); unsigned EltSize = EltVT.getSizeInBits(); @@ -30248,29 +30504,6 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG, return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } - case ISD::INSERT_SUBVECTOR: { - unsigned EltSize = EltVT.getSizeInBits(); - if (EltSize != 32 && EltSize != 64) - return false; - MVT OpEltVT = Op.getSimpleValueType().getVectorElementType(); - // Only change element size, not type. - if (EltVT.isInteger() != OpEltVT.isInteger()) - return false; - uint64_t Imm = cast(Op.getOperand(2))->getZExtValue(); - Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize; - SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0)); - DCI.AddToWorklist(Op0.getNode()); - // Op1 needs to be bitcasted to a smaller vector with the same element type. - SDValue Op1 = Op.getOperand(1); - MVT Op1VT = MVT::getVectorVT(EltVT, - Op1.getSimpleValueType().getSizeInBits() / EltSize); - Op1 = DAG.getBitcast(Op1VT, Op1); - DCI.AddToWorklist(Op1.getNode()); - DCI.CombineTo(OrigOp.getNode(), - DAG.getNode(Opcode, DL, VT, Op0, Op1, - DAG.getIntPtrConstant(Imm, DL))); - return true; - } case X86ISD::SUBV_BROADCAST: { unsigned EltSize = EltVT.getSizeInBits(); if (EltSize != 32 && EltSize != 64) @@ -30990,10 +31223,6 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDLoc DL(N); - // If the flag operand isn't dead, don't touch this CMOV. - if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty()) - return SDValue(); - SDValue FalseOp = N->getOperand(0); SDValue TrueOp = N->getOperand(1); X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); @@ -31016,7 +31245,7 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) { SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8), Flags}; - return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); + return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); } } @@ -31045,8 +31274,6 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, unsigned ShAmt = TrueC->getAPIntValue().logBase2(); Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, DAG.getConstant(ShAmt, DL, MVT::i8)); - if (N->getNumValues() == 2) // Dead flag value? - return DCI.CombineTo(N, Cond, SDValue()); return Cond; } @@ -31060,9 +31287,6 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, FalseC->getValueType(0), Cond); Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); - - if (N->getNumValues() == 2) // Dead flag value? - return DCI.CombineTo(N, Cond, SDValue()); return Cond; } @@ -31103,8 +31327,6 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, if (FalseC->getAPIntValue() != 0) Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, SDValue(FalseC, 0)); - if (N->getNumValues() == 2) // Dead flag value? - return DCI.CombineTo(N, Cond, SDValue()); return Cond; } } @@ -31144,7 +31366,7 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, CmpAgainst == dyn_cast(TrueOp)) { SDValue Ops[] = { FalseOp, Cond.getOperand(0), DAG.getConstant(CC, DL, MVT::i8), Cond }; - return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops); + return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); } } } @@ -31179,10 +31401,9 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, SDValue LOps[] = {FalseOp, TrueOp, DAG.getConstant(CC0, DL, MVT::i8), Flags}; - SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), LOps); + SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps); SDValue Ops[] = {LCMOV, TrueOp, DAG.getConstant(CC1, DL, MVT::i8), Flags}; - SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(CMOV.getNode(), 1)); + SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); return CMOV; } } @@ -31310,15 +31531,19 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getOperand(0).getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + if ((NumElts % 2) != 0) + return SDValue(); + unsigned RegSize = 128; MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); - EVT ReducedVT = - EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements()); + EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); + // Shrink the operands of mul. SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0); SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1); - if (VT.getVectorNumElements() >= OpsVT.getVectorNumElements()) { + if (NumElts >= OpsVT.getVectorNumElements()) { // Generate the lower part of mul: pmullw. For MULU8/MULS8, only the // lower part is needed. SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); @@ -31326,7 +31551,7 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, return DAG.getNode((Mode == MULU8) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, DL, VT, MulLo); } else { - MVT ResVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); + MVT ResVT = MVT::getVectorVT(MVT::i32, NumElts / 2); // Generate the higher part of mul: pmulhw/pmulhuw. For MULU16/MULS16, // the higher part is also needed. SDValue MulHi = DAG.getNode(Mode == MULS16 ? ISD::MULHS : ISD::MULHU, DL, @@ -31335,22 +31560,22 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, // Repack the lower part and higher part result of mul into a wider // result. // Generate shuffle functioning as punpcklwd. - SmallVector ShuffleMask(VT.getVectorNumElements()); - for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++) { + SmallVector ShuffleMask(NumElts); + for (unsigned i = 0, e = NumElts / 2; i < e; i++) { ShuffleMask[2 * i] = i; - ShuffleMask[2 * i + 1] = i + VT.getVectorNumElements(); + ShuffleMask[2 * i + 1] = i + NumElts; } SDValue ResLo = DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); - ResLo = DAG.getNode(ISD::BITCAST, DL, ResVT, ResLo); + ResLo = DAG.getBitcast(ResVT, ResLo); // Generate shuffle functioning as punpckhwd. - for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++) { - ShuffleMask[2 * i] = i + VT.getVectorNumElements() / 2; - ShuffleMask[2 * i + 1] = i + VT.getVectorNumElements() * 3 / 2; + for (unsigned i = 0, e = NumElts / 2; i < e; i++) { + ShuffleMask[2 * i] = i + NumElts / 2; + ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; } SDValue ResHi = DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); - ResHi = DAG.getNode(ISD::BITCAST, DL, ResVT, ResHi); + ResHi = DAG.getBitcast(ResVT, ResHi); return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); } } else { @@ -31397,7 +31622,7 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, // result. Make sure the type of mul result is VT. MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32); SDValue Res = getUnpackl(DAG, DL, OpsVT, MulLo, MulHi); - Res = DAG.getNode(ISD::BITCAST, DL, ResVT, Res); + Res = DAG.getBitcast(ResVT, Res); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, DAG.getIntPtrConstant(0, DL)); } @@ -31786,6 +32011,90 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + unsigned Opcode = N->getOpcode(); + assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) && + "Unexpected shift opcode"); + + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + unsigned DstBitsPerElt = VT.getScalarSizeInBits(); + unsigned SrcBitsPerElt = 2 * DstBitsPerElt; + assert(N0.getScalarValueSizeInBits() == SrcBitsPerElt && + N1.getScalarValueSizeInBits() == SrcBitsPerElt && + "Unexpected PACKSS/PACKUS input type"); + + // Constant Folding. + APInt UndefElts0, UndefElts1; + SmallVector EltBits0, EltBits1; + if ((N0->isUndef() || N->isOnlyUserOf(N0.getNode())) && + (N1->isUndef() || N->isOnlyUserOf(N1.getNode())) && + getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) && + getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) { + unsigned NumLanes = VT.getSizeInBits() / 128; + unsigned NumDstElts = VT.getVectorNumElements(); + unsigned NumSrcElts = NumDstElts / 2; + unsigned NumDstEltsPerLane = NumDstElts / NumLanes; + unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; + bool IsSigned = (X86ISD::PACKSS == Opcode); + + APInt Undefs(NumDstElts, 0); + SmallVector Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt)); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) { + unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane; + auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0); + auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0); + + if (UndefElts[SrcIdx]) { + Undefs.setBit(Lane * NumDstEltsPerLane + Elt); + continue; + } + + APInt &Val = EltBits[SrcIdx]; + if (IsSigned) { + // PACKSS: Truncate signed value with signed saturation. + // Source values less than dst minint are saturated to minint. + // Source values greater than dst maxint are saturated to maxint. + if (Val.isSignedIntN(DstBitsPerElt)) + Val = Val.trunc(DstBitsPerElt); + else if (Val.isNegative()) + Val = APInt::getSignedMinValue(DstBitsPerElt); + else + Val = APInt::getSignedMaxValue(DstBitsPerElt); + } else { + // PACKUS: Truncate signed value with unsigned saturation. + // Source values less than zero are saturated to zero. + // Source values greater than dst maxuint are saturated to maxuint. + if (Val.isIntN(DstBitsPerElt)) + Val = Val.trunc(DstBitsPerElt); + else if (Val.isNegative()) + Val = APInt::getNullValue(DstBitsPerElt); + else + Val = APInt::getAllOnesValue(DstBitsPerElt); + } + Bits[Lane * NumDstEltsPerLane + Elt] = Val; + } + } + + return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); + } + + // Attempt to combine as shuffle. + SDValue Op(N, 0); + if (SDValue Res = combineX86ShufflesRecursively( + {Op}, 0, Op, {0}, {}, /*Depth*/ 1, + /*HasVarMask*/ false, DAG, DCI, Subtarget)) { + DCI.CombineTo(N, Res); + return SDValue(); + } + + return SDValue(); +} + static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -31826,13 +32135,24 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, N0.getOpcode() == X86ISD::VSRAI) return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, N0.getOperand(0), N1); + // fold (VSRAI (VSHLI X, C1), C1) --> X iff NumSignBits(X) > C1 + if (Opcode == X86ISD::VSRAI && N0.getOpcode() == X86ISD::VSHLI && + N1 == N0.getOperand(1)) { + SDValue N00 = N0.getOperand(0); + unsigned NumSignBits = DAG.ComputeNumSignBits(N00); + if (ShiftVal.ult(NumSignBits)) + return N00; + } + // We can decode 'whole byte' logical bit shifts as shuffles. if (LogicalShift && (ShiftVal.getZExtValue() % 8) == 0) { SDValue Op(N, 0); - if (combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, DCI, - Subtarget)) - return SDValue(); // This routine will use CombineTo to replace N. + if (SDValue Res = combineX86ShufflesRecursively( + {Op}, 0, Op, {0}, {}, /*Depth*/ 1, + /*HasVarMask*/ false, DAG, DCI, Subtarget)) { + DCI.CombineTo(N, Res); + return SDValue(); + } } // Constant Folding. @@ -31868,8 +32188,13 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, // Attempt to combine PINSRB/PINSRW patterns to a shuffle. SDValue Op(N, 0); - combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, DCI, Subtarget); + if (SDValue Res = combineX86ShufflesRecursively( + {Op}, 0, Op, {0}, {}, /*Depth*/ 1, + /*HasVarMask*/ false, DAG, DCI, Subtarget)) { + DCI.CombineTo(N, Res); + return SDValue(); + } + return SDValue(); } @@ -32169,10 +32494,51 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, // Attempt to recursively combine a bitmask AND with shuffles. if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { SDValue Op(N, 0); - if (combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, DCI, - Subtarget)) - return SDValue(); // This routine will use CombineTo to replace N. + if (SDValue Res = combineX86ShufflesRecursively( + {Op}, 0, Op, {0}, {}, /*Depth*/ 1, + /*HasVarMask*/ false, DAG, DCI, Subtarget)) { + DCI.CombineTo(N, Res); + return SDValue(); + } + } + + // Attempt to combine a scalar bitmask AND with an extracted shuffle. + if ((VT.getScalarSizeInBits() % 8) == 0 && + N->getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa(N->getOperand(0).getOperand(1))) { + SDValue BitMask = N->getOperand(1); + SDValue SrcVec = N->getOperand(0).getOperand(0); + EVT SrcVecVT = SrcVec.getValueType(); + + // Check that the constant bitmask masks whole bytes. + APInt UndefElts; + SmallVector EltBits; + if (VT == SrcVecVT.getScalarType() && + N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) && + getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) && + llvm::all_of(EltBits, [](APInt M) { + return M.isNullValue() || M.isAllOnesValue(); + })) { + unsigned NumElts = SrcVecVT.getVectorNumElements(); + unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8; + unsigned Idx = N->getOperand(0).getConstantOperandVal(1); + + // Create a root shuffle mask from the byte mask and the extracted index. + SmallVector ShuffleMask(NumElts * Scale, SM_SentinelUndef); + for (unsigned i = 0; i != Scale; ++i) { + if (UndefElts[i]) + continue; + int VecIdx = Scale * Idx + i; + ShuffleMask[VecIdx] = + EltBits[i].isNullValue() ? SM_SentinelZero : VecIdx; + } + + if (SDValue Shuffle = combineX86ShufflesRecursively( + {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2, + /*HasVarMask*/ false, DAG, DCI, Subtarget)) + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, + N->getOperand(0).getOperand(1)); + } } return SDValue(); @@ -32517,38 +32883,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// Generate NEG and CMOV for integer abs. -static SDValue combineIntegerAbs(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - - // Since X86 does not have CMOV for 8-bit integer, we don't convert - // 8-bit integer abs to NEG and CMOV. - if (VT.isInteger() && VT.getSizeInBits() == 8) - return SDValue(); - - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDLoc DL(N); - - // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1) - // and change it to SUB and CMOV. - if (VT.isInteger() && N->getOpcode() == ISD::XOR && - N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && - N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) { - auto *Y1C = dyn_cast(N1.getOperand(1)); - if (Y1C && Y1C->getAPIntValue() == VT.getSizeInBits() - 1) { - // Generate SUB & CMOV. - SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), - DAG.getConstant(0, DL, VT), N0.getOperand(0)); - SDValue Ops[] = {N0.getOperand(0), Neg, - DAG.getConstant(X86::COND_GE, DL, MVT::i8), - SDValue(Neg.getNode(), 1)}; - return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops); - } - } - return SDValue(); -} - /// Try to turn tests against the signbit in the form of: /// XOR(TRUNCATE(SRL(X, size(X)-1)), 1) /// into: @@ -34098,6 +34432,23 @@ static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, return SDValue(); } + +/// Fold a xor(setcc cond, val), 1 --> setcc (inverted(cond), val) +static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) { + if (N->getOpcode() != ISD::XOR) + return SDValue(); + + SDValue LHS = N->getOperand(0); + auto *RHSC = dyn_cast(N->getOperand(1)); + if (!RHSC || RHSC->getZExtValue() != 1 || LHS->getOpcode() != X86ISD::SETCC) + return SDValue(); + + X86::CondCode NewCC = X86::GetOppositeBranchCondition( + X86::CondCode(LHS->getConstantOperandVal(0))); + SDLoc DL(N); + return getSETCC(NewCC, LHS->getOperand(1), DL, DAG); +} + static SDValue combineXor(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -34107,13 +34458,12 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); + if (SDValue SetCC = foldXor1SetCC(N, DAG)) + return SetCC; + if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) return RV; - if (Subtarget.hasCMov()) - if (SDValue RV = combineIntegerAbs(N, DAG)) - return RV; - if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) return FPLogic; @@ -34316,10 +34666,12 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, // Attempt to recursively combine a bitmask ANDNP with shuffles. if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { SDValue Op(N, 0); - if (combineX86ShufflesRecursively({Op}, 0, Op, {0}, {}, /*Depth*/ 1, - /*HasVarMask*/ false, DAG, DCI, - Subtarget)) - return SDValue(); // This routine will use CombineTo to replace N. + if (SDValue Res = combineX86ShufflesRecursively( + {Op}, 0, Op, {0}, {}, /*Depth*/ 1, + /*HasVarMask*/ false, DAG, DCI, Subtarget)) { + DCI.CombineTo(N, Res); + return SDValue(); + } } return SDValue(); @@ -34908,6 +35260,11 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, if (!OpVT.isScalarInteger() || OpSize < 128 || isNullConstant(Y)) return SDValue(); + // Bail out if we know that this is not really just an oversized integer. + if (peekThroughBitcasts(X).getValueType() == MVT::f128 || + peekThroughBitcasts(Y).getValueType() == MVT::f128) + return SDValue(); + // TODO: Use PXOR + PTEST for SSE4.1 or later? // TODO: Add support for AVX-512. EVT VT = SetCC->getValueType(0); @@ -35587,6 +35944,89 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, return combineAddOrSubToADCOrSBB(N, DAG); } +static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + // PSUBUS is supported, starting from SSE2, but special preprocessing + // for v8i32 requires umin, which appears in SSE41. + if (!(Subtarget.hasSSE2() && (VT == MVT::v16i8 || VT == MVT::v8i16)) && + !(Subtarget.hasSSE41() && (VT == MVT::v8i32)) && + !(Subtarget.hasAVX2() && (VT == MVT::v32i8 || VT == MVT::v16i16)) && + !(Subtarget.hasAVX512() && Subtarget.hasBWI() && + (VT == MVT::v64i8 || VT == MVT::v32i16 || VT == MVT::v16i32 || + VT == MVT::v8i64))) + return SDValue(); + + SDValue SubusLHS, SubusRHS; + // Try to find umax(a,b) - b or a - umin(a,b) patterns + // they may be converted to subus(a,b). + // TODO: Need to add IR cannonicialization for this code. + if (Op0.getOpcode() == ISD::UMAX) { + SubusRHS = Op1; + SDValue MaxLHS = Op0.getOperand(0); + SDValue MaxRHS = Op0.getOperand(1); + if (MaxLHS == Op1) + SubusLHS = MaxRHS; + else if (MaxRHS == Op1) + SubusLHS = MaxLHS; + else + return SDValue(); + } else if (Op1.getOpcode() == ISD::UMIN) { + SubusLHS = Op0; + SDValue MinLHS = Op1.getOperand(0); + SDValue MinRHS = Op1.getOperand(1); + if (MinLHS == Op0) + SubusRHS = MinRHS; + else if (MinRHS == Op0) + SubusRHS = MinLHS; + else + return SDValue(); + } else + return SDValue(); + + // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with + // special preprocessing in some cases. + if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64) + return DAG.getNode(X86ISD::SUBUS, SDLoc(N), VT, SubusLHS, SubusRHS); + + // Special preprocessing case can be only applied + // if the value was zero extended from 16 bit, + // so we require first 16 bits to be zeros for 32 bit + // values, or first 48 bits for 64 bit values. + KnownBits Known; + DAG.computeKnownBits(SubusLHS, Known); + unsigned NumZeros = Known.countMinLeadingZeros(); + if ((VT == MVT::v8i64 && NumZeros < 48) || NumZeros < 16) + return SDValue(); + + EVT ExtType = SubusLHS.getValueType(); + EVT ShrinkedType; + if (VT == MVT::v8i32 || VT == MVT::v8i64) + ShrinkedType = MVT::v8i16; + else + ShrinkedType = NumZeros >= 24 ? MVT::v16i8 : MVT::v16i16; + + // If SubusLHS is zeroextended - truncate SubusRHS to it's + // size SubusRHS = umin(0xFFF.., SubusRHS). + SDValue SaturationConst = + DAG.getConstant(APInt::getLowBitsSet(ExtType.getScalarSizeInBits(), + ShrinkedType.getScalarSizeInBits()), + SDLoc(SubusLHS), ExtType); + SDValue UMin = DAG.getNode(ISD::UMIN, SDLoc(SubusLHS), ExtType, SubusRHS, + SaturationConst); + SDValue NewSubusLHS = + DAG.getZExtOrTrunc(SubusLHS, SDLoc(SubusLHS), ShrinkedType); + SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType); + SDValue Psubus = DAG.getNode(X86ISD::SUBUS, SDLoc(N), ShrinkedType, + NewSubusLHS, NewSubusRHS); + // Zero extend the result, it may be used somewhere as 32 bit, + // if not zext and following trunc will shrink. + return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType); +} + static SDValue combineSub(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); @@ -35620,6 +36060,10 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineIncDecVector(N, DAG)) return V; + // Try to create PSUBUS if SUB's argument is max/min + if (SDValue V = combineSubToSubus(N, DAG, Subtarget)) + return V; + return combineAddOrSubToADCOrSBB(N, DAG); } @@ -35787,9 +36231,8 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, SDLoc dl(N); SDValue Vec = N->getOperand(0); SDValue SubVec = N->getOperand(1); - SDValue Idx = N->getOperand(2); - unsigned IdxVal = cast(Idx)->getZExtValue(); + unsigned IdxVal = N->getConstantOperandVal(2); MVT SubVecVT = SubVec.getSimpleValueType(); if (ISD::isBuildVectorAllZeros(Vec.getNode())) { @@ -35801,11 +36244,25 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, // just insert into the larger zero vector directly. if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR && ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) { - unsigned Idx2Val = cast(Idx)->getZExtValue(); + unsigned Idx2Val = SubVec.getConstantOperandVal(2); return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec.getOperand(1), DAG.getIntPtrConstant(IdxVal + Idx2Val, dl)); } + + // If we're inserting a bitcast into zeros, rewrite the insert and move the + // bitcast to the other side. This helps with detecting zero extending + // during isel. + // TODO: Is this useful for other indices than 0? + if (SubVec.getOpcode() == ISD::BITCAST && IdxVal == 0) { + MVT CastVT = SubVec.getOperand(0).getSimpleValueType(); + unsigned NumElems = OpVT.getSizeInBits() / CastVT.getScalarSizeInBits(); + MVT NewVT = MVT::getVectorVT(CastVT.getVectorElementType(), NumElems); + SDValue Insert = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, + DAG.getBitcast(NewVT, Vec), + SubVec.getOperand(0), N->getOperand(2)); + return DAG.getBitcast(OpVT, Insert); + } } // If this is an insert of an extract, combine to a shuffle. Don't do this @@ -35813,7 +36270,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && SubVec.getOperand(0).getSimpleValueType() == OpVT && (IdxVal != 0 || !Vec.isUndef())) { - int ExtIdxVal = cast(SubVec.getOperand(1))->getZExtValue(); + int ExtIdxVal = SubVec.getConstantOperandVal(1); if (ExtIdxVal != 0) { int VecNumElts = OpVT.getVectorNumElements(); int SubVecNumElts = SubVecVT.getVectorNumElements(); @@ -35895,7 +36352,8 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT), SubVec2, Vec.getOperand(2)); DCI.AddToWorklist(Vec.getNode()); - return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec, Idx); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec, SubVec, + N->getOperand(2)); } } @@ -35988,6 +36446,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SETCC: return combineSetCC(N, DAG, Subtarget); case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget); case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget); + case X86ISD::PACKSS: + case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget); case X86ISD::VSHLI: case X86ISD::VSRAI: case X86ISD::VSRLI: @@ -36019,6 +36479,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::MOVDDUP: case X86ISD::MOVSS: case X86ISD::MOVSD: + case X86ISD::VBROADCAST: case X86ISD::VPPERM: case X86ISD::VPERMI: case X86ISD::VPERMV: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 8b023b7a260a6..272dc615009e7 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -725,19 +725,6 @@ namespace llvm { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - // Return true if it is profitable to combine a BUILD_VECTOR to a TRUNCATE - // for given operand and result types. - // Example of such a combine: - // v4i32 build_vector((extract_elt V, 0), - // (extract_elt V, 2), - // (extract_elt V, 4), - // (extract_elt V, 6)) - // --> - // v4i32 truncate (bitcast V to v4i64) - bool isDesirableToCombineBuildVectorToTruncate() const override { - return true; - } - // Return true if it is profitable to combine a BUILD_VECTOR with a // stride-pattern to a shuffle and a truncate. // Example of such a combine: @@ -1189,6 +1176,7 @@ namespace llvm { SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 59064b3ccafc3..4002b1f1969d7 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -615,6 +615,139 @@ defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; + +multiclass vinsert_for_mask_cast p> { +let Predicates = p in { + def : Pat<(Cast.VT + (vselect Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT From.RC:$src2), + (iPTR imm))), + Cast.RC:$src0)), + (!cast(InstrStr#"rrk") + Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, + (INSERT_get_vinsert_imm To.RC:$ins))>; + def : Pat<(Cast.VT + (vselect Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT + (bitconvert + (From.LdFrag addr:$src2))), + (iPTR imm))), + Cast.RC:$src0)), + (!cast(InstrStr#"rmk") + Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, + (INSERT_get_vinsert_imm To.RC:$ins))>; + + def : Pat<(Cast.VT + (vselect Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT From.RC:$src2), + (iPTR imm))), + Cast.ImmAllZerosV)), + (!cast(InstrStr#"rrkz") + Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, + (INSERT_get_vinsert_imm To.RC:$ins))>; + def : Pat<(Cast.VT + (vselect Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT + (bitconvert + (From.LdFrag addr:$src2))), + (iPTR imm))), + Cast.ImmAllZerosV)), + (!cast(InstrStr#"rmkz") + Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, + (INSERT_get_vinsert_imm To.RC:$ins))>; +} +} + +defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, + v8f32x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, + v4f64x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; + +defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, + v8i32x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, + v8i32x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, + v8i32x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, + v4i64x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, + v4i64x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, + v4i64x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; + +defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, + v16f32_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, + v8f64_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI]>; + +defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, + v16i32_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, + v16i32_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, + v16i32_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, + v8i64_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, + v8i64_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, + v8i64_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI]>; + +defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, + v16f32_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, + v8f64_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasAVX512]>; + +defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, + v16i32_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, + v16i32_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, + v16i32_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, + v8i64_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, + v8i64_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, + v8i64_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasAVX512]>; + // vinsertps - insert f32 to XMM let ExeDomain = SSEPackedSingle in { def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), @@ -942,13 +1075,13 @@ def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>, - EVEX; + EVEX, VEX_WIG; def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs), (ins f32mem:$dst, VR128X:$src1, u8imm:$src2), "vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2), - addr:$dst)]>, EVEX, EVEX_CD8<32, CD8VT1>; + addr:$dst)]>, EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>; //===---------------------------------------------------------------------===// // AVX-512 BROADCAST @@ -977,17 +1110,29 @@ multiclass avx512_broadcast_scalar opc, string OpcodeStr, multiclass avx512_broadcast_rm_split opc, string OpcodeStr, X86VectorVTInfo MaskInfo, X86VectorVTInfo DestInfo, - X86VectorVTInfo SrcInfo> { - let ExeDomain = DestInfo.ExeDomain in { - defm r : AVX512_maskable { + let ExeDomain = DestInfo.ExeDomain, hasSideEffects = 0 in { + defm r : AVX512_maskable_split, T8PD, EVEX; - defm m : AVX512_maskable opc, string OpcodeStr, def : Pat<(MaskInfo.VT (bitconvert - (DestInfo.VT (X86VBroadcast + (DestInfo.VT (UnmaskedOp (SrcInfo.VT (scalar_to_vector (SrcInfo.ScalarLdFrag addr:$src))))))), (!cast(NAME#MaskInfo.ZSuffix#m) addr:$src)>; @@ -1077,7 +1222,7 @@ multiclass avx512_int_broadcast_reg opc, X86VectorVTInfo _, (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX; } -multiclass avx512_int_broadcastbw_reg opc, string Name, +multiclass avx512_int_broadcastbw_reg opc, string Name, X86VectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, SubRegIndex Subreg> { let hasSideEffects = 0, ExeDomain = _.ExeDomain in @@ -1105,7 +1250,7 @@ multiclass avx512_int_broadcastbw_reg_vl opc, string Name, AVX512VLVectorVTInfo _, SDPatternOperator OpNode, RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_int_broadcastbw_reg, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_int_broadcastbw_reg; } -let Predicates = [HasVLX, HasBWI] in { +let Predicates = [HasVLX] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), (VPBROADCASTQZ128m addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))), (VPBROADCASTQZ256m addr:$src)>; +} +let Predicates = [HasVLX, HasBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. // This means we'll encounter truncated i32 loads; match that here. def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), @@ -1351,11 +1498,11 @@ multiclass avx512_common_broadcast_32x2 opc, string OpcodeStr, AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { let Predicates = [HasDQI] in defm Z : avx512_broadcast_rm_split, + _Src.info512, _Src.info128, null_frag>, EVEX_V512; let Predicates = [HasDQI, HasVLX] in defm Z256 : avx512_broadcast_rm_split, + _Src.info256, _Src.info128, null_frag>, EVEX_V256; } @@ -1365,7 +1512,7 @@ multiclass avx512_common_broadcast_i32x2 opc, string OpcodeStr, let Predicates = [HasDQI, HasVLX] in defm Z128 : avx512_broadcast_rm_split, + _Src.info128, _Src.info128, null_frag>, EVEX_V128; } @@ -1835,11 +1982,11 @@ multiclass avx512_icmp_packed_rmb_vl opc, string OpcodeStr, defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm, avx512vl_i8_info, HasBWI, 1>, - EVEX_CD8<8, CD8VF>; + EVEX_CD8<8, CD8VF>, VEX_WIG; defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm, avx512vl_i16_info, HasBWI, 1>, - EVEX_CD8<16, CD8VF>; + EVEX_CD8<16, CD8VF>, VEX_WIG; defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm, avx512vl_i32_info, HasAVX512, 1>, @@ -1851,11 +1998,11 @@ defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm, defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, avx512vl_i8_info, HasBWI>, - EVEX_CD8<8, CD8VF>; + EVEX_CD8<8, CD8VF>, VEX_WIG; defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, avx512vl_i16_info, HasBWI>, - EVEX_CD8<16, CD8VF>; + EVEX_CD8<16, CD8VF>, VEX_WIG; defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, avx512vl_i32_info, HasAVX512>, @@ -1865,6 +2012,24 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; +// Transforms to swizzle an immediate to help matching memory operand in first +// operand. +def CommutePCMPCC : SDNodeXFormgetZExtValue() & 0x7; + switch (Imm) { + default: llvm_unreachable("Unreachable!"); + case 0x01: Imm = 0x06; break; // LT -> NLE + case 0x02: Imm = 0x05; break; // LE -> NLT + case 0x05: Imm = 0x02; break; // NLT -> LE + case 0x06: Imm = 0x01; break; // NLE -> LT + case 0x00: // EQ + case 0x03: // FALSE + case 0x04: // NE + case 0x07: // TRUE + break; + } + return getI8Imm(Imm, SDLoc(N)); +}]>; multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, X86VectorVTInfo _> { @@ -1936,6 +2101,17 @@ multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, "$dst {${mask}}, $src1, $src2, $cc}"), [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K; } + + def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)), + (_.VT _.RC:$src1), imm:$cc), + (!cast(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, + (CommutePCMPCC imm:$cc))>; + + def : Pat<(and _.KRCWM:$mask, (OpNode (bitconvert (_.LdFrag addr:$src2)), + (_.VT _.RC:$src1), imm:$cc)), + (!cast(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + (CommutePCMPCC imm:$cc))>; } multiclass avx512_icmp_cc_rmb opc, string Suffix, SDNode OpNode, @@ -1980,6 +2156,18 @@ multiclass avx512_icmp_cc_rmb opc, string Suffix, SDNode OpNode, "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K, EVEX_B; } + + def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + (_.VT _.RC:$src1), imm:$cc), + (!cast(NAME#_.ZSuffix#"rmib") _.RC:$src1, addr:$src2, + (CommutePCMPCC imm:$cc))>; + + def : Pat<(and _.KRCWM:$mask, (OpNode (X86VBroadcast + (_.ScalarLdFrag addr:$src2)), + (_.VT _.RC:$src1), imm:$cc)), + (!cast(NAME#_.ZSuffix#"rmibk") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + (CommutePCMPCC imm:$cc))>; } multiclass avx512_icmp_cc_vl opc, string Suffix, SDNode OpNode, @@ -2077,7 +2265,33 @@ multiclass avx512_vcmp_common { "$cc, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B; } - } + } + + // Patterns for selecting with loads in other operand. + def : Pat<(X86cmpm (_.LdFrag addr:$src2), (_.VT _.RC:$src1), + CommutableCMPCC:$cc), + (!cast(NAME#_.ZSuffix#"rmi") _.RC:$src1, addr:$src2, + imm:$cc)>; + + def : Pat<(and _.KRCWM:$mask, (X86cmpm (_.LdFrag addr:$src2), + (_.VT _.RC:$src1), + CommutableCMPCC:$cc)), + (!cast(NAME#_.ZSuffix#"rmik") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + imm:$cc)>; + + def : Pat<(X86cmpm (X86VBroadcast (_.ScalarLdFrag addr:$src2)), + (_.VT _.RC:$src1), CommutableCMPCC:$cc), + (!cast(NAME#_.ZSuffix#"rmbi") _.RC:$src1, addr:$src2, + imm:$cc)>; + + def : Pat<(and _.KRCWM:$mask, (X86cmpm (X86VBroadcast + (_.ScalarLdFrag addr:$src2)), + (_.VT _.RC:$src1), + CommutableCMPCC:$cc)), + (!cast(NAME#_.ZSuffix#"rmbik") _.KRCWM:$mask, + _.RC:$src1, addr:$src2, + imm:$cc)>; } multiclass avx512_vcmp_sae { @@ -2119,6 +2333,17 @@ defm VCMPPS : avx512_vcmp, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; +// Patterns to select fp compares with load as first operand. +let Predicates = [HasAVX512] in { + def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, + CommutableCMPCC:$cc)), + (VCMPSDZrm FR64X:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, + CommutableCMPCC:$cc)), + (VCMPSSZrm FR32X:$src1, addr:$src2, imm:$cc)>; +} + // ---------------------------------------------------------------- // FPClass //handle fpclass instruction mask = op(reg_scalar,imm) @@ -3286,28 +3511,25 @@ def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), multiclass avx512_move_scalar { def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), - (ins _.RC:$src1, _.FRC:$src2), + (ins _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, - (scalar_to_vector _.FRC:$src2))))], + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), - (ins _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2), + (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", "$dst {${mask}} {z}, $src1, $src2}"), [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, - (_.VT (OpNode _.RC:$src1, - (scalar_to_vector _.FRC:$src2))), + (_.VT (OpNode _.RC:$src1, _.RC:$src2)), _.ImmAllZerosV)))], _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ; let Constraints = "$src0 = $dst" in def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), - (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2), + (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2}"), [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, - (_.VT (OpNode _.RC:$src1, - (scalar_to_vector _.FRC:$src2))), + (_.VT (OpNode _.RC:$src1, _.RC:$src2)), (_.VT _.RC:$src0))))], _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K; let canFoldAsLoad = 1, isReMaterializable = 1 in @@ -3354,21 +3576,21 @@ def : Pat<(_.VT (OpNode _.RC:$src0, (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))), (_.EltVT _.FRC:$src1), (_.EltVT _.FRC:$src2))))))), - (COPY_TO_REGCLASS (!cast(InstrStr#rrk) - (COPY_TO_REGCLASS _.FRC:$src2, _.RC), - (COPY_TO_REGCLASS GR32:$mask, VK1WM), - (_.VT _.RC:$src0), _.FRC:$src1), - _.RC)>; + (!cast(InstrStr#rrk) + (COPY_TO_REGCLASS _.FRC:$src2, _.RC), + (COPY_TO_REGCLASS GR32:$mask, VK1WM), + (_.VT _.RC:$src0), + (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>; def : Pat<(_.VT (OpNode _.RC:$src0, (_.VT (scalar_to_vector (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))), (_.EltVT _.FRC:$src1), (_.EltVT ZeroFP))))))), - (COPY_TO_REGCLASS (!cast(InstrStr#rrkz) - (COPY_TO_REGCLASS GR32:$mask, VK1WM), - (_.VT _.RC:$src0), _.FRC:$src1), - _.RC)>; + (!cast(InstrStr#rrkz) + (COPY_TO_REGCLASS GR32:$mask, VK1WM), + (_.VT _.RC:$src0), + (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>; } multiclass avx512_store_scalar_lowering; + (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)), + FR32X)>; def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), - VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))), (f64 FR64X:$src1), (f64 FR64X:$src2))), @@ -3492,11 +3716,13 @@ def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))), (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), - (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), + FR64X)>; def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), - VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), @@ -3504,7 +3730,7 @@ def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), let hasSideEffects = 0 in { def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR32X:$src2), + (ins VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], NoItinerary>, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrr">; @@ -3512,21 +3738,21 @@ let hasSideEffects = 0 in { let Constraints = "$src0 = $dst" in def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, - VR128X:$src1, FR32X:$src2), + VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", [], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrrk">; def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), - (ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2), + (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", [], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrrkz">; def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR64X:$src2), + (ins VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W, FoldGenData<"VMOVSDZrr">; @@ -3534,7 +3760,7 @@ let Constraints = "$src0 = $dst" in let Constraints = "$src0 = $dst" in def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, - VR128X:$src1, FR64X:$src2), + VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", [], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG, @@ -3542,7 +3768,7 @@ let Constraints = "$src0 = $dst" in def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f64x_info.KRCWM:$mask, VR128X:$src1, - FR64X:$src2), + VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, @@ -3552,11 +3778,12 @@ let Constraints = "$src0 = $dst" in let Predicates = [HasAVX512] in { let AddedComplexity = 15 in { def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), - (VMOVSSZrr (v4f32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), - (VMOVSSZrr (v4i32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))), - (VMOVSDZrr (v2f64 (AVX512_128_SET0)), FR64X:$src)>; + (VMOVSDZrr (v2f64 (AVX512_128_SET0)), + (COPY_TO_REGCLASS FR64X:$src, VR128))>; } // Move low f32 and clear high bits. @@ -3662,22 +3889,23 @@ let Predicates = [HasAVX512] in { // Shuffle with VMOVSS def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)), - (VMOVSSZrr (v4i32 VR128X:$src1), - (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>; - def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)), - (VMOVSSZrr (v4f32 VR128X:$src1), - (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>; + (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>; + + def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))), + (VMOVSSZrr VR128X:$src1, + (COPY_TO_REGCLASS FR32X:$src2, VR128X))>; // Shuffle with VMOVSD def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + (VMOVSDZrr VR128X:$src1, VR128X:$src2)>; + + def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>; def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + (VMOVSDZrr VR128X:$src1, VR128X:$src2)>; def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + (VMOVSDZrr VR128X:$src1, VR128X:$src2)>; } let AddedComplexity = 15 in @@ -3805,12 +4033,6 @@ let Predicates = [HasAVX512], AddedComplexity = 400 in { (VMOVNTDQAZrm addr:$src)>; def : Pat<(v8i64 (alignednontemporalload addr:$src)), (VMOVNTDQAZrm addr:$src)>; - def : Pat<(v16i32 (bitconvert (v8i64 (alignednontemporalload addr:$src)))), - (VMOVNTDQAZrm addr:$src)>; - def : Pat<(v32i16 (bitconvert (v8i64 (alignednontemporalload addr:$src)))), - (VMOVNTDQAZrm addr:$src)>; - def : Pat<(v64i8 (bitconvert (v8i64 (alignednontemporalload addr:$src)))), - (VMOVNTDQAZrm addr:$src)>; } let Predicates = [HasVLX], AddedComplexity = 400 in { @@ -3827,12 +4049,6 @@ let Predicates = [HasVLX], AddedComplexity = 400 in { (VMOVNTDQAZ256rm addr:$src)>; def : Pat<(v4i64 (alignednontemporalload addr:$src)), (VMOVNTDQAZ256rm addr:$src)>; - def : Pat<(v8i32 (bitconvert (v4i64 (alignednontemporalload addr:$src)))), - (VMOVNTDQAZ256rm addr:$src)>; - def : Pat<(v16i16 (bitconvert (v4i64 (alignednontemporalload addr:$src)))), - (VMOVNTDQAZ256rm addr:$src)>; - def : Pat<(v32i8 (bitconvert (v4i64 (alignednontemporalload addr:$src)))), - (VMOVNTDQAZ256rm addr:$src)>; def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst), (VMOVNTDQZ128mr addr:$dst, VR128X:$src)>; @@ -3847,12 +4063,6 @@ let Predicates = [HasVLX], AddedComplexity = 400 in { (VMOVNTDQAZ128rm addr:$src)>; def : Pat<(v2i64 (alignednontemporalload addr:$src)), (VMOVNTDQAZ128rm addr:$src)>; - def : Pat<(v4i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))), - (VMOVNTDQAZ128rm addr:$src)>; - def : Pat<(v8i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))), - (VMOVNTDQAZ128rm addr:$src)>; - def : Pat<(v16i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))), - (VMOVNTDQAZ128rm addr:$src)>; } //===----------------------------------------------------------------------===// @@ -3941,14 +4151,16 @@ multiclass avx512_binop_rm_vl_w opc, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl, EVEX_CD8<16, CD8VF>; + itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, + VEX_WIG; } multiclass avx512_binop_rm_vl_b opc, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl, EVEX_CD8<8, CD8VF>; + itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, + VEX_WIG; } multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, @@ -4124,12 +4336,12 @@ multiclass avx512_packs_all_i16_i8 opc, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in defm NAME#Z : avx512_packs_rm, EVEX_V512; + v64i8_info>, EVEX_V512, VEX_WIG; let Predicates = [HasBWI, HasVLX] in { defm NAME#Z256 : avx512_packs_rm, EVEX_V256; + v32i8x_info>, EVEX_V256, VEX_WIG; defm NAME#Z128 : avx512_packs_rm, EVEX_V128; + v16i8x_info>, EVEX_V128, VEX_WIG; } } @@ -4153,9 +4365,9 @@ defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512B defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase; defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw, - avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD; + avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG; defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, - avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase; + avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; @@ -4944,12 +5156,12 @@ multiclass avx512_shift_rmi_w opcw, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in defm WZ: avx512_shift_rmi, EVEX_V512; + v32i16_info>, EVEX_V512, VEX_WIG; let Predicates = [HasVLX, HasBWI] in { defm WZ256: avx512_shift_rmi, EVEX_V256; + v16i16x_info>, EVEX_V256, VEX_WIG; defm WZ128: avx512_shift_rmi, EVEX_V128; + v8i16x_info>, EVEX_V128, VEX_WIG; } } @@ -5424,7 +5636,7 @@ multiclass avx512_pshufb_sizes opc, string OpcodeStr, SDNode OpNode> { } } -defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>; +defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>, VEX_WIG; //===----------------------------------------------------------------------===// // Move Low to High and High to Low packed FP Instructions @@ -6307,9 +6519,11 @@ multiclass avx512_cvt_fp_scalar_ss2sd opc, string OpcodeStr, } } defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", - X86froundRnd, f64x_info, f32x_info>; + X86froundRnd, f64x_info, f32x_info>, + NotMemoryFoldable; defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", - X86fpextRnd,f32x_info, f64x_info >; + X86fpextRnd,f32x_info, f64x_info >, + NotMemoryFoldable; def : Pat<(f64 (fpextend FR32X:$src)), (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>, @@ -6439,10 +6653,14 @@ def : Pat<(v8f64 (extloadv8f32 addr:$src)), (VCVTPS2PDZrm addr:$src)>; let Predicates = [HasVLX] in { - let AddedComplexity = 15 in - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), - (VCVTPD2PSZ128rr VR128X:$src)>; + let AddedComplexity = 15 in { + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), + (VCVTPD2PSZ128rr VR128X:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), + (VCVTPD2PSZ128rm addr:$src)>; + } def : Pat<(v2f64 (extloadv2f32 addr:$src)), (VCVTPS2PDZ128rm addr:$src)>; def : Pat<(v4f64 (extloadv4f32 addr:$src)), @@ -6815,16 +7033,32 @@ let Predicates = [HasAVX512, HasVLX] in { def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))), (VCVTPD2DQZ128rr VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))))), + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), + (VCVTPD2DQZ128rm addr:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))), (VCVTPD2UDQZ128rr VR128X:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))), (VCVTTPD2DQZ128rr VR128X:$src)>; - def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))))), + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), + (VCVTTPD2DQZ128rm addr:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))), (VCVTTPD2UDQZ128rr VR128X:$src)>; } + + def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), + (VCVTDQ2PDZ128rm addr:$src)>; + def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), + (VCVTDQ2PDZ128rm addr:$src)>; + + def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), + (VCVTUDQ2PDZ128rm addr:$src)>; + def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), + (VCVTUDQ2PDZ128rm addr:$src)>; } let Predicates = [HasAVX512] in { @@ -7101,13 +7335,13 @@ multiclass avx512_fp14_s opc, string OpcodeStr, SDNode OpNode, } defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>, - EVEX_CD8<32, CD8VT1>, T8PD; + EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>, - VEX_W, EVEX_CD8<64, CD8VT1>, T8PD; + VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>, - EVEX_CD8<32, CD8VT1>, T8PD; + EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>, - VEX_W, EVEX_CD8<64, CD8VT1>, T8PD; + VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p opc, string OpcodeStr, SDNode OpNode, @@ -7367,9 +7601,11 @@ multiclass avx512_sqrt_scalar opc, string OpcodeStr,X86VectorVTInfo _, multiclass avx512_sqrt_scalar_all opc, string OpcodeStr> { defm SSZ : avx512_sqrt_scalar, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS; + X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, + NotMemoryFoldable; defm SDZ : avx512_sqrt_scalar, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W; + X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W, + NotMemoryFoldable; } defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>, @@ -7649,16 +7885,16 @@ multiclass avx512_extend_BW opc, string OpcodeStr, let Predicates = [HasVLX, HasBWI] in { defm Z128: avx512_extend_common, - EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128; + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; defm Z256: avx512_extend_common, - EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256; + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasBWI] in { defm Z : avx512_extend_common, - EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512; + EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; } } @@ -7668,16 +7904,16 @@ multiclass avx512_extend_BD opc, string OpcodeStr, let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_extend_common, - EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128; + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; defm Z256: avx512_extend_common, - EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256; + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { defm Z : avx512_extend_common, - EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512; + EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; } } @@ -7687,16 +7923,16 @@ multiclass avx512_extend_BQ opc, string OpcodeStr, let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_extend_common, - EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128; + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; defm Z256: avx512_extend_common, - EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256; + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { defm Z : avx512_extend_common, - EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512; + EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; } } @@ -7706,16 +7942,16 @@ multiclass avx512_extend_WD opc, string OpcodeStr, let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_extend_common, - EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128; + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; defm Z256: avx512_extend_common, - EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256; + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { defm Z : avx512_extend_common, - EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512; + EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; } } @@ -7725,16 +7961,16 @@ multiclass avx512_extend_WQ opc, string OpcodeStr, let Predicates = [HasVLX, HasAVX512] in { defm Z128: avx512_extend_common, - EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128; + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; defm Z256: avx512_extend_common, - EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256; + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { defm Z : avx512_extend_common, - EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512; + EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; } } @@ -7772,46 +8008,6 @@ defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">; defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">; defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">; -// EXTLOAD patterns, implemented using vpmovz -multiclass avx512_ext_lowering { - def : Pat<(To.VT (LdFrag addr:$src)), - (!cast("VPMOVZX"#InstrStr#"rm") addr:$src)>; - def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src), To.RC:$src0)), - (!cast("VPMOVZX"#InstrStr#"rmk") To.RC:$src0, - To.KRC:$mask, addr:$src)>; - def : Pat<(To.VT (vselect To.KRCWM:$mask, (LdFrag addr:$src), - To.ImmAllZerosV)), - (!cast("VPMOVZX"#InstrStr#"rmkz") To.KRC:$mask, - addr:$src)>; -} - -let Predicates = [HasVLX, HasBWI] in { - defm : avx512_ext_lowering<"BWZ128", v8i16x_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"BWZ256", v16i16x_info, v16i8x_info, extloadvi8>; -} -let Predicates = [HasBWI] in { - defm : avx512_ext_lowering<"BWZ", v32i16_info, v32i8x_info, extloadvi8>; -} -let Predicates = [HasVLX, HasAVX512] in { - defm : avx512_ext_lowering<"BDZ128", v4i32x_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"BDZ256", v8i32x_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"BQZ128", v2i64x_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"BQZ256", v4i64x_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"WDZ128", v4i32x_info, v8i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"WDZ256", v8i32x_info, v8i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"WQZ128", v2i64x_info, v8i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"WQZ256", v4i64x_info, v8i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"DQZ128", v2i64x_info, v4i32x_info, extloadvi32>; - defm : avx512_ext_lowering<"DQZ256", v4i64x_info, v4i32x_info, extloadvi32>; -} -let Predicates = [HasAVX512] in { - defm : avx512_ext_lowering<"BDZ", v16i32_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"BQZ", v8i64_info, v16i8x_info, extloadvi8>; - defm : avx512_ext_lowering<"WDZ", v16i32_info, v16i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"WQZ", v8i64_info, v8i16x_info, extloadvi16>; - defm : avx512_ext_lowering<"DQZ", v8i64_info, v8i32x_info, extloadvi32>; -} multiclass AVX512_pmovx_patterns { @@ -8755,8 +8951,8 @@ multiclass avx512_unary_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, multiclass avx512_unary_rm_vl_bw opc_b, bits<8> opc_w, string OpcodeStr, SDNode OpNode, Predicate prd> { - defm W : avx512_unary_rm_vl; - defm B : avx512_unary_rm_vl; + defm W : avx512_unary_rm_vl, VEX_WIG; + defm B : avx512_unary_rm_vl, VEX_WIG; } multiclass avx512_unary_rm_vl_all opc_b, bits<8> opc_w, @@ -8868,7 +9064,7 @@ defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>; //===----------------------------------------------------------------------===// multiclass avx512_movddup_128 opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, multiclass avx512_movddup_common opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { - defm Z : avx512_unary_rm, EVEX_V512; + defm Z : avx512_unary_rm, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_unary_rm, + defm Z256 : avx512_unary_rm, EVEX_V256; - defm Z128 : avx512_movddup_128, - EVEX_V128; + defm Z128 : avx512_movddup_128, + EVEX_V128; } } @@ -8902,19 +9098,12 @@ multiclass avx512_movddup opc, string OpcodeStr, SDNode OpNode>{ defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>; let Predicates = [HasVLX] in { -def : Pat<(X86Movddup (loadv2f64 addr:$src)), - (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), (VMOVDDUPZ128rm addr:$src)>; def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>; - -def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)), - (v2f64 VR128X:$src0)), - (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; -def : Pat<(vselect (v2i1 VK2WM:$mask), (X86Movddup (loadv2f64 addr:$src)), - (bitconvert (v4i32 immAllZerosV))), - (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; +def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), + (VMOVDDUPZ128rm addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), (v2f64 VR128X:$src0)), @@ -8930,6 +9119,13 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src) def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), (bitconvert (v4i32 immAllZerosV))), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; + +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), + (v2f64 VR128X:$src0)), + (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; +def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), + (bitconvert (v4i32 immAllZerosV))), + (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } //===----------------------------------------------------------------------===// @@ -8967,9 +9163,8 @@ multiclass avx512_extract_elt_bw_m opc, string OpcodeStr, SDNode OpNode, def mr : AVX512Ii8, + [(store (_.EltVT (trunc (OpNode (_.VT _.RC:$src1), imm:$src2))), + addr:$dst)]>, EVEX, EVEX_CD8<_.EltSize, CD8VT1>; } @@ -9024,8 +9219,8 @@ multiclass avx512_extract_elt_dq; -defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>; +defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG; +defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG; defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>; defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W; @@ -9068,9 +9263,9 @@ multiclass avx512_insert_elt_dq opc, string OpcodeStr, } defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info, - extloadi8>, TAPD; + extloadi8>, TAPD, VEX_WIG; defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info, - extloadi16>, PD; + extloadi16>, PD, VEX_WIG; defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>; defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; //===----------------------------------------------------------------------===// @@ -9116,9 +9311,9 @@ multiclass avx512_shift_packed_all opc, SDNode OpNode, Format MRMr, } } defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", - HasBWI>, AVX512PDIi8Base, EVEX_4V; + HasBWI>, AVX512PDIi8Base, EVEX_4V, VEX_WIG; defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", - HasBWI>, AVX512PDIi8Base, EVEX_4V; + HasBWI>, AVX512PDIi8Base, EVEX_4V, VEX_WIG; multiclass avx512_psadbw_packed opc, SDNode OpNode, @@ -9153,7 +9348,7 @@ multiclass avx512_psadbw_packed_all opc, SDNode OpNode, } defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", - HasBWI>, EVEX_4V; + HasBWI>, EVEX_4V, VEX_WIG; // Transforms to swizzle an immediate to enable better matching when // memory operand isn't in the right place. @@ -9359,26 +9554,26 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src2, _.RC:$src1, (i8 imm:$src4)), _.RC:$src1)), - (!cast(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, + (!cast(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), (i8 imm:$src4)), _.RC:$src1)), - (!cast(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, + (!cast(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode _.RC:$src2, (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src1, (i8 imm:$src4)), _.RC:$src1)), - (!cast(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, + (!cast(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>; def : Pat<(_.VT (vselect _.KRCWM:$mask, (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)), _.RC:$src1, _.RC:$src2, (i8 imm:$src4)), _.RC:$src1)), - (!cast(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask, + (!cast(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; } @@ -9555,23 +9750,11 @@ multiclass AVX512_scalar_math_f32_patterns { (!cast("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, (COPY_TO_REGCLASS FR32X:$src, VR128X))>; - // extracted scalar math op with insert via blend - def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector - (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))), - FR32X:$src))), (i8 1))), - (!cast("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, - (COPY_TO_REGCLASS FR32X:$src, VR128X))>; - // vector math op with insert via movss def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))), (!cast("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>; - // vector math op with insert via blend - def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst), - (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)), (i8 1))), - (!cast("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>; - // extracted masked scalar math op with insert via movss def : Pat<(X86Movss (v4f32 VR128X:$src1), (scalar_to_vector @@ -9599,23 +9782,11 @@ multiclass AVX512_scalar_math_f64_patterns { (!cast("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, (COPY_TO_REGCLASS FR64X:$src, VR128X))>; - // extracted scalar math op with insert via blend - def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector - (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))), - FR64X:$src))), (i8 1))), - (!cast("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, - (COPY_TO_REGCLASS FR64X:$src, VR128X))>; - // vector math op with insert via movsd def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))), (!cast("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>; - // vector math op with insert via blend - def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst), - (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)), (i8 1))), - (!cast("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>; - // extracted masked scalar math op with insert via movss def : Pat<(X86Movsd (v2f64 VR128X:$src1), (scalar_to_vector diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 26771e0dfcd87..95f9e84af819e 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -652,9 +652,8 @@ class ITy opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, // BinOpRR - Instructions like "add reg, reg, reg". class BinOpRR opcode, string mnemonic, X86TypeInfo typeinfo, - dag outlist, list pattern, InstrItinClass itin, - Format f = MRMDestReg> - : ITy pattern, InstrItinClass itin> + : ITy, Sched<[WriteALU]>; @@ -662,11 +661,11 @@ class BinOpRR opcode, string mnemonic, X86TypeInfo typeinfo, // BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has // just a EFLAGS as a result. class BinOpRR_F opcode, string mnemonic, X86TypeInfo typeinfo, - SDPatternOperator opnode, Format f = MRMDestReg> + SDPatternOperator opnode> : BinOpRR; + IIC_BIN_NONMEM>; // BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has // both a regclass and EFLAGS as a result. @@ -727,7 +726,7 @@ class BinOpRM opcode, string mnemonic, X86TypeInfo typeinfo, // BinOpRM_F - Instructions like "cmp reg, [mem]". class BinOpRM_F opcode, string mnemonic, X86TypeInfo typeinfo, - SDPatternOperator opnode> + SDNode opnode> : BinOpRM; @@ -837,7 +836,7 @@ class BinOpMR_RMW_FF opcode, string mnemonic, X86TypeInfo typeinfo, // BinOpMR_F - Instructions like "cmp [mem], reg". class BinOpMR_F opcode, string mnemonic, X86TypeInfo typeinfo, - SDNode opnode> + SDPatternOperator opnode> : BinOpMR; @@ -1224,10 +1223,10 @@ let isCompare = 1 in { def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>; } // isCommutable - def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>; - def TEST16rm : BinOpRM_F<0x84, "test", Xi16, X86testpat>; - def TEST32rm : BinOpRM_F<0x84, "test", Xi32, X86testpat>; - def TEST64rm : BinOpRM_F<0x84, "test", Xi64, X86testpat>; + def TEST8mr : BinOpMR_F<0x84, "test", Xi8 , X86testpat>; + def TEST16mr : BinOpMR_F<0x84, "test", Xi16, X86testpat>; + def TEST32mr : BinOpMR_F<0x84, "test", Xi32, X86testpat>; + def TEST64mr : BinOpMR_F<0x84, "test", Xi64, X86testpat>; def TEST8ri : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>; def TEST16ri : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>; diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index ec560a6e3ce83..6f7a8d3817ce9 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1488,21 +1488,16 @@ def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), Requires<[Not64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG - (MOVZX32rr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), - sub_16bit)>, - Requires<[Not64BitMode]>; + (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), + sub_16bit)>; def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), - (MOVZX32rr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>, - Requires<[Not64BitMode]>; + (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>; def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), - (MOVZX32rr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>, - Requires<[Not64BitMode]>; + (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), - (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>, - Requires<[Not64BitMode]>; + (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), - (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>, - Requires<[Not64BitMode]>; + (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; // h-register tricks. // For now, be conservative on x86-64 and use an h-register extract only if the @@ -1518,27 +1513,6 @@ def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR64:$src, sub_8bit_hi)), sub_32bit)>; -def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), - (MOVZX32_NOREXrr8 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(srl GR16:$src, (i8 8)), - (EXTRACT_SUBREG - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), - sub_16bit)>, - Requires<[In64BitMode]>; -def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>, - Requires<[In64BitMode]>; -def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), - (MOVZX32_NOREXrr8 - (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>, - Requires<[In64BitMode]>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 8f9226cf228d6..4b8c24a1c047e 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -243,9 +243,9 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in let Uses = [ESP] in { def TCRETURNdi : PseudoI<(outs), - (ins i32imm_pcrel:$dst, i32imm:$offset), []>; + (ins i32imm_pcrel:$dst, i32imm:$offset), []>, NotMemoryFoldable; def TCRETURNri : PseudoI<(outs), - (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>; + (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable; let mayLoad = 1 in def TCRETURNmi : PseudoI<(outs), (ins i32mem_TC:$dst, i32imm:$offset), []>; @@ -315,10 +315,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, (ins i64i32imm_pcrel:$dst, i32imm:$offset), []>; def TCRETURNri64 : PseudoI<(outs), - (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>; + (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>, NotMemoryFoldable; let mayLoad = 1 in def TCRETURNmi64 : PseudoI<(outs), - (ins i64mem_TC:$dst, i32imm:$offset), []>; + (ins i64mem_TC:$dst, i32imm:$offset), []>, NotMemoryFoldable; def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst), "jmp\t$dst", [], IIC_JMP_REL>; diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 78608c430289a..f096f51d6bef4 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -57,20 +57,20 @@ def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore, // FPStack pattern fragments //===----------------------------------------------------------------------===// -def fpimm0 : PatLeaf<(fpimm), [{ - return N->isExactlyValue(+0.0); +def fpimm0 : FPImmLeaf; -def fpimmneg0 : PatLeaf<(fpimm), [{ - return N->isExactlyValue(-0.0); +def fpimmneg0 : FPImmLeaf; -def fpimm1 : PatLeaf<(fpimm), [{ - return N->isExactlyValue(+1.0); +def fpimm1 : FPImmLeaf; -def fpimmneg1 : PatLeaf<(fpimm), [{ - return N->isExactlyValue(-1.0); +def fpimmneg1 : FPImmLeaf; // Some 'special' instructions diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 57f0c1944c9a1..2653e8c0a39ce 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -231,6 +231,9 @@ class FoldGenData { string FoldGenRegForm = _RegisterForm; } +// Mark the instruction as "illegal to memory fold/unfold" +class NotMemoryFoldable { bit isMemoryFoldable = 0; } + class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, InstrItinClass itin, @@ -314,6 +317,8 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, // instruction to replace the current one in case it got picked during generation. string FoldGenRegForm = ?; + bit isMemoryFoldable = 1; // Is it allowed to memory fold/unfold this instruction? + // TSFlags layout should be kept in sync with X86BaseInfo.h. let TSFlags{6-0} = FormBits; let TSFlags{8-7} = OpSizeBits; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 6dcfa97b85157..e3611a83a1bc4 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -122,12 +122,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Subtarget(STI), RI(STI.getTargetTriple()) { static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = { + { X86::ADC16ri, X86::ADC16mi, 0 }, + { X86::ADC16ri8, X86::ADC16mi8, 0 }, + { X86::ADC16rr, X86::ADC16mr, 0 }, { X86::ADC32ri, X86::ADC32mi, 0 }, { X86::ADC32ri8, X86::ADC32mi8, 0 }, { X86::ADC32rr, X86::ADC32mr, 0 }, { X86::ADC64ri32, X86::ADC64mi32, 0 }, { X86::ADC64ri8, X86::ADC64mi8, 0 }, { X86::ADC64rr, X86::ADC64mr, 0 }, + { X86::ADC8ri, X86::ADC8mi, 0 }, + { X86::ADC8ri8, X86::ADC8mi8, 0 }, + { X86::ADC8rr, X86::ADC8mr, 0 }, { X86::ADD16ri, X86::ADD16mi, 0 }, { X86::ADD16ri8, X86::ADD16mi8, 0 }, { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE }, @@ -147,6 +153,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::ADD64rr, X86::ADD64mr, 0 }, { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE }, { X86::ADD8ri, X86::ADD8mi, 0 }, + { X86::ADD8ri8, X86::ADD8mi8, 0 }, { X86::ADD8rr, X86::ADD8mr, 0 }, { X86::AND16ri, X86::AND16mi, 0 }, { X86::AND16ri8, X86::AND16mi8, 0 }, @@ -158,7 +165,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::AND64ri8, X86::AND64mi8, 0 }, { X86::AND64rr, X86::AND64mr, 0 }, { X86::AND8ri, X86::AND8mi, 0 }, + { X86::AND8ri8, X86::AND8mi8, 0 }, { X86::AND8rr, X86::AND8mr, 0 }, + { X86::BTC16ri8, X86::BTC16mi8, 0 }, + { X86::BTC32ri8, X86::BTC32mi8, 0 }, + { X86::BTC64ri8, X86::BTC64mi8, 0 }, + { X86::BTR16ri8, X86::BTR16mi8, 0 }, + { X86::BTR32ri8, X86::BTR32mi8, 0 }, + { X86::BTR64ri8, X86::BTR64mi8, 0 }, + { X86::BTS16ri8, X86::BTS16mi8, 0 }, + { X86::BTS32ri8, X86::BTS32mi8, 0 }, + { X86::BTS64ri8, X86::BTS64mi8, 0 }, { X86::DEC16r, X86::DEC16m, 0 }, { X86::DEC32r, X86::DEC32m, 0 }, { X86::DEC64r, X86::DEC64m, 0 }, @@ -185,7 +202,32 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::OR64ri8, X86::OR64mi8, 0 }, { X86::OR64rr, X86::OR64mr, 0 }, { X86::OR8ri, X86::OR8mi, 0 }, + { X86::OR8ri8, X86::OR8mi8, 0 }, { X86::OR8rr, X86::OR8mr, 0 }, + { X86::RCL16r1, X86::RCL16m1, 0 }, + { X86::RCL16rCL, X86::RCL16mCL, 0 }, + { X86::RCL16ri, X86::RCL16mi, 0 }, + { X86::RCL32r1, X86::RCL32m1, 0 }, + { X86::RCL32rCL, X86::RCL32mCL, 0 }, + { X86::RCL32ri, X86::RCL32mi, 0 }, + { X86::RCL64r1, X86::RCL64m1, 0 }, + { X86::RCL64rCL, X86::RCL64mCL, 0 }, + { X86::RCL64ri, X86::RCL64mi, 0 }, + { X86::RCL8r1, X86::RCL8m1, 0 }, + { X86::RCL8rCL, X86::RCL8mCL, 0 }, + { X86::RCL8ri, X86::RCL8mi, 0 }, + { X86::RCR16r1, X86::RCR16m1, 0 }, + { X86::RCR16rCL, X86::RCR16mCL, 0 }, + { X86::RCR16ri, X86::RCR16mi, 0 }, + { X86::RCR32r1, X86::RCR32m1, 0 }, + { X86::RCR32rCL, X86::RCR32mCL, 0 }, + { X86::RCR32ri, X86::RCR32mi, 0 }, + { X86::RCR64r1, X86::RCR64m1, 0 }, + { X86::RCR64rCL, X86::RCR64mCL, 0 }, + { X86::RCR64ri, X86::RCR64mi, 0 }, + { X86::RCR8r1, X86::RCR8m1, 0 }, + { X86::RCR8rCL, X86::RCR8mCL, 0 }, + { X86::RCR8ri, X86::RCR8mi, 0 }, { X86::ROL16r1, X86::ROL16m1, 0 }, { X86::ROL16rCL, X86::ROL16mCL, 0 }, { X86::ROL16ri, X86::ROL16mi, 0 }, @@ -222,12 +264,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::SAR8r1, X86::SAR8m1, 0 }, { X86::SAR8rCL, X86::SAR8mCL, 0 }, { X86::SAR8ri, X86::SAR8mi, 0 }, + { X86::SBB16ri, X86::SBB16mi, 0 }, + { X86::SBB16ri8, X86::SBB16mi8, 0 }, + { X86::SBB16rr, X86::SBB16mr, 0 }, { X86::SBB32ri, X86::SBB32mi, 0 }, { X86::SBB32ri8, X86::SBB32mi8, 0 }, { X86::SBB32rr, X86::SBB32mr, 0 }, { X86::SBB64ri32, X86::SBB64mi32, 0 }, { X86::SBB64ri8, X86::SBB64mi8, 0 }, { X86::SBB64rr, X86::SBB64mr, 0 }, + { X86::SBB8ri, X86::SBB8mi, 0 }, + { X86::SBB8ri8, X86::SBB8mi8, 0 }, + { X86::SBB8rr, X86::SBB8mr, 0 }, { X86::SHL16r1, X86::SHL16m1, 0 }, { X86::SHL16rCL, X86::SHL16mCL, 0 }, { X86::SHL16ri, X86::SHL16mi, 0 }, @@ -274,6 +322,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::SUB64ri8, X86::SUB64mi8, 0 }, { X86::SUB64rr, X86::SUB64mr, 0 }, { X86::SUB8ri, X86::SUB8mi, 0 }, + { X86::SUB8ri8, X86::SUB8mi8, 0 }, { X86::SUB8rr, X86::SUB8mr, 0 }, { X86::XOR16ri, X86::XOR16mi, 0 }, { X86::XOR16ri8, X86::XOR16mi8, 0 }, @@ -285,6 +334,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::XOR64ri8, X86::XOR64mi8, 0 }, { X86::XOR64rr, X86::XOR64mr, 0 }, { X86::XOR8ri, X86::XOR8mi, 0 }, + { X86::XOR8ri8, X86::XOR8mi8, 0 }, { X86::XOR8rr, X86::XOR8mr, 0 } }; @@ -375,9 +425,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD }, { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD }, { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD }, + { X86::TEST16rr, X86::TEST16mr, TB_FOLDED_LOAD }, { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD }, + { X86::TEST32rr, X86::TEST32mr, TB_FOLDED_LOAD }, { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, + { X86::TEST64rr, X86::TEST64mr, TB_FOLDED_LOAD }, { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD }, + { X86::TEST8rr, X86::TEST8mr, TB_FOLDED_LOAD }, // AVX 128-bit versions of foldable instructions { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE }, @@ -608,10 +662,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::SQRTSDr_Int, X86::SQRTSDm_Int, TB_NO_REVERSE }, { X86::SQRTSSr, X86::SQRTSSm, 0 }, { X86::SQRTSSr_Int, X86::SQRTSSm_Int, TB_NO_REVERSE }, - { X86::TEST16rr, X86::TEST16rm, 0 }, - { X86::TEST32rr, X86::TEST32rm, 0 }, - { X86::TEST64rr, X86::TEST64rm, 0 }, - { X86::TEST8rr, X86::TEST8rm, 0 }, // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, @@ -714,12 +764,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, // AVX 256-bit foldable instructions - { X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, TB_NO_REVERSE }, + { X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, 0 }, { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 }, { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 }, { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 }, { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 }, - { X86::VCVTPS2PDYrr, X86::VCVTPS2PDYrm, TB_NO_REVERSE }, + { X86::VCVTPS2PDYrr, X86::VCVTPS2PDYrm, 0 }, { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 }, { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 }, { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, @@ -879,6 +929,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable instructions { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, + { X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrm, 0 }, + { X86::VCVTPD2PSZrr, X86::VCVTPD2PSZrm, 0 }, + { X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrm, 0 }, { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 }, { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 }, @@ -938,6 +991,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable instructions (256-bit versions) { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, + { X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0 }, + { X86::VCVTPD2PSZ256rr, X86::VCVTPD2PSZ256rm, 0 }, + { X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rm, 0 }, { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 }, @@ -989,6 +1045,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // AVX-512 foldable instructions (128-bit versions) { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, + { X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE }, + { X86::VCVTPD2PSZ128rr, X86::VCVTPD2PSZ128rm, 0 }, + { X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rm, TB_NO_REVERSE }, { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 }, @@ -5189,18 +5248,8 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break; } - // MOVSD/MOVSS's 2nd operand is a FR64/FR32 reg class - we need to copy - // this over to a VR128 class like the 1st operand to use a BLENDPD/BLENDPS. - auto &MRI = MI.getParent()->getParent()->getRegInfo(); - auto VR128RC = MRI.getRegClass(MI.getOperand(1).getReg()); - unsigned VR128 = MRI.createVirtualRegister(VR128RC); - BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY), - VR128) - .addReg(MI.getOperand(2).getReg()); - auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(Opc)); - WorkingMI.getOperand(2).setReg(VR128); WorkingMI.addOperand(MachineOperand::CreateImm(Mask)); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); @@ -9402,6 +9451,8 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::UNPCKLPSrr, X86::UNPCKLPSrr, X86::PUNPCKLDQrr }, { X86::UNPCKHPSrm, X86::UNPCKHPSrm, X86::PUNPCKHDQrm }, { X86::UNPCKHPSrr, X86::UNPCKHPSrr, X86::PUNPCKHDQrr }, + { X86::EXTRACTPSmr, X86::EXTRACTPSmr, X86::PEXTRDmr }, + { X86::EXTRACTPSrr, X86::EXTRACTPSrr, X86::PEXTRDrr }, // AVX 128-bit support { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr }, { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm }, @@ -9430,6 +9481,8 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VUNPCKLPSrr, X86::VUNPCKLPSrr, X86::VPUNPCKLDQrr }, { X86::VUNPCKHPSrm, X86::VUNPCKHPSrm, X86::VPUNPCKHDQrm }, { X86::VUNPCKHPSrr, X86::VUNPCKHPSrr, X86::VPUNPCKHDQrr }, + { X86::VEXTRACTPSmr, X86::VEXTRACTPSmr, X86::VPEXTRDmr }, + { X86::VEXTRACTPSrr, X86::VEXTRACTPSrr, X86::VPEXTRDrr }, // AVX 256-bit support { X86::VMOVAPSYmr, X86::VMOVAPDYmr, X86::VMOVDQAYmr }, { X86::VMOVAPSYrm, X86::VMOVAPDYrm, X86::VMOVDQAYrm }, @@ -9528,6 +9581,8 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrr, X86::VPUNPCKLDQZrr }, { X86::VUNPCKHPSZrm, X86::VUNPCKHPSZrm, X86::VPUNPCKHDQZrm }, { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrr, X86::VPUNPCKHDQZrr }, + { X86::VEXTRACTPSZmr, X86::VEXTRACTPSZmr, X86::VPEXTRDZmr }, + { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZrr, X86::VPEXTRDZrr }, }; static const uint16_t ReplaceableInstrsAVX2[][3] = { @@ -10738,7 +10793,7 @@ llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } /// /// * Call construction overhead: 1 (call instruction) /// * Frame construction overhead: 1 (return instruction) -/// +/// /// \p MachineOutlinerTailCall implies that the function is being tail called. /// A jump is emitted instead of a call, and the return is already present in /// the outlined sequence. That is, @@ -10768,13 +10823,27 @@ X86InstrInfo::getOutlininingCandidateInfo( MachineOutlinerTailCall, // Type of call. MachineOutlinerTailCall // Type of frame. ); - + return MachineOutlinerInfo(1, 1, MachineOutlinerDefault, MachineOutlinerDefault); } -bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { - return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); +bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const { + const Function *F = MF.getFunction(); + + // Does the function use a red zone? If it does, then we can't risk messing + // with the stack. + if (!F->hasFnAttribute(Attribute::NoRedZone)) + return false; + + // If we *don't* want to outline from things that could potentially be deduped + // then return false. + if (!OutlineFromLinkOnceODRs && F->hasLinkOnceODRLinkage()) + return false; + + // This function is viable for outlining, so return true. + return true; } X86GenInstrInfo::MachineOutlinerInstrType diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 8bbf7dc6d2335..e665ec1f14dce 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -564,7 +564,8 @@ class X86InstrInfo final : public X86GenInstrInfo { std::pair> &RepeatedSequenceLocs) const override; - bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; + bool isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const override; llvm::X86GenInstrInfo::MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const override; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 594b07ad320d0..17b74d006eaba 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -904,7 +904,6 @@ let RecomputePerFunction = 1 in { "MF->getFunction()->optForSize()">; } -def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; @@ -1658,40 +1657,36 @@ let SchedRW = [WriteALU] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))], IIC_BT_RR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB; + [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB, + NotMemoryFoldable; } // SchedRW // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's // perspective, this is pretty bizarre. Make these instructions disassembly -// only for now. +// only for now. These instructions are also slow on modern CPUs so that's +// another reason to avoid generating them. let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - // [(X86bt (loadi16 addr:$src1), GR16:$src2), - // (implicit EFLAGS)] [], IIC_BT_MR - >, OpSize16, TB, Requires<[FastBTMem]>; + >, OpSize16, TB, NotMemoryFoldable; def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - // [(X86bt (loadi32 addr:$src1), GR32:$src2), - // (implicit EFLAGS)] [], IIC_BT_MR - >, OpSize32, TB, Requires<[FastBTMem]>; + >, OpSize32, TB, NotMemoryFoldable; def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - // [(X86bt (loadi64 addr:$src1), GR64:$src2), - // (implicit EFLAGS)] [], IIC_BT_MR - >, TB; + >, TB, NotMemoryFoldable; } let SchedRW = [WriteALU] in { @@ -1709,9 +1704,8 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), IIC_BT_RI>, TB; } // SchedRW -// Note that these instructions don't need FastBTMem because that -// only applies when the other operand is in a register. When it's -// an immediate, bt is still fast. +// Note that these instructions aren't slow because that only applies when the +// other operand is in a register. When it's an immediate, bt is still fast. let SchedRW = [WriteALU] in { def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", @@ -1731,23 +1725,25 @@ let hasSideEffects = 0 in { let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTC32rr : I<0xBB, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTC64rr : RI<0xBB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; + "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB, + NotMemoryFoldable; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; + "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB, + NotMemoryFoldable; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { @@ -1775,23 +1771,24 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTR32rr : I<0xB3, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTR64rr : RI<0xB3, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; + "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB, NotMemoryFoldable; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; + "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB, + NotMemoryFoldable; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { @@ -1819,23 +1816,25 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTS32rr : I<0xAB, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTS64rr : RI<0xAB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; + "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB, + NotMemoryFoldable; } // SchedRW let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize16, TB; + OpSize16, TB, NotMemoryFoldable; def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, - OpSize32, TB; + OpSize32, TB, NotMemoryFoldable; def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), - "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; + "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB, + NotMemoryFoldable; } let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { @@ -2181,13 +2180,13 @@ let Predicates = [HasMOVBE] in { let Predicates = [HasRDRAND], Defs = [EFLAGS] in { def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins), "rdrand{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86rdrand))]>, OpSize16, TB; + [(set GR16:$dst, EFLAGS, (X86rdrand))]>, OpSize16, PS; def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins), "rdrand{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86rdrand))]>, OpSize32, TB; + [(set GR32:$dst, EFLAGS, (X86rdrand))]>, OpSize32, PS; def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins), "rdrand{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86rdrand))]>, TB; + [(set GR64:$dst, EFLAGS, (X86rdrand))]>, PS; } //===----------------------------------------------------------------------===// @@ -2196,13 +2195,13 @@ let Predicates = [HasRDRAND], Defs = [EFLAGS] in { let Predicates = [HasRDSEED], Defs = [EFLAGS] in { def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins), "rdseed{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize16, TB; + [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize16, PS; def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins), "rdseed{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86rdseed))]>, OpSize32, TB; + [(set GR32:$dst, EFLAGS, (X86rdseed))]>, OpSize32, PS; def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdseed{q}\t$dst", - [(set GR64:$dst, EFLAGS, (X86rdseed))]>, TB; + [(set GR64:$dst, EFLAGS, (X86rdseed))]>, PS; } //===----------------------------------------------------------------------===// @@ -2692,9 +2691,9 @@ let Predicates = [HasCLFLUSHOPT] in def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, PD; -// TODO: Add an instrincis for this. let Predicates = [HasCLWB] in -def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src", []>, PD; +def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src", + [(int_x86_clwb addr:$src)]>, PD; //===----------------------------------------------------------------------===// @@ -3236,14 +3235,14 @@ defm : ShiftRotateByOneAlias<"ror", "ROR">; FIXME */ // test: We accept "testX , " and "testX , " as synonyms. -def : InstAlias<"test{b}\t{$val, $mem|$mem, $val}", - (TEST8rm GR8 :$val, i8mem :$mem), 0>; -def : InstAlias<"test{w}\t{$val, $mem|$mem, $val}", - (TEST16rm GR16:$val, i16mem:$mem), 0>; -def : InstAlias<"test{l}\t{$val, $mem|$mem, $val}", - (TEST32rm GR32:$val, i32mem:$mem), 0>; -def : InstAlias<"test{q}\t{$val, $mem|$mem, $val}", - (TEST64rm GR64:$val, i64mem:$mem), 0>; +def : InstAlias<"test{b}\t{$mem, $val|$val, $mem}", + (TEST8mr i8mem :$mem, GR8 :$val), 0>; +def : InstAlias<"test{w}\t{$mem, $val|$val, $mem}", + (TEST16mr i16mem:$mem, GR16:$val), 0>; +def : InstAlias<"test{l}\t{$mem, $val|$val, $mem}", + (TEST32mr i32mem:$mem, GR32:$val), 0>; +def : InstAlias<"test{q}\t{$mem, $val|$val, $mem}", + (TEST64mr i64mem:$mem, GR64:$val), 0>; // xchg: We accept "xchgX , " and "xchgX , " as synonyms. def : InstAlias<"xchg{b}\t{$mem, $val|$val, $mem}", diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 77eb33d32b4ab..451303054f56a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -384,22 +384,21 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // don't use movss/movsd for copies. //===----------------------------------------------------------------------===// -multiclass sse12_move_rr { let isCommutable = 1 in def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, RC:$src2), + (ins VR128:$src1, VR128:$src2), !strconcat(base_opc, asm_opr), - [(set VR128:$dst, (vt (OpNode VR128:$src1, - (scalar_to_vector RC:$src2))))], + [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_S_RR, d>, Sched<[WriteFShuffle]>; // For the disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, RC:$src2), + (ins VR128:$src1, VR128:$src2), !strconcat(base_opc, asm_opr), [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>, FoldGenData; @@ -409,7 +408,7 @@ multiclass sse12_move { // AVX - defm V#NAME : sse12_move_rr, VEX_4V, VEX_LIG, VEX_WIG; @@ -420,7 +419,7 @@ multiclass sse12_move, VEX_WIG; // SSE1 & 2 let Constraints = "$src1 = $dst" in { - defm NAME : sse12_move_rr; } @@ -506,30 +505,30 @@ let Predicates = [UseAVX] in { // Shuffle with VMOVSS def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (VMOVSSrr (v4i32 VR128:$src1), - (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>; - def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (VMOVSSrr (v4f32 VR128:$src1), - (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>; + (VMOVSSrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (VMOVSSrr VR128:$src1, (COPY_TO_REGCLASS FR32:$src2, VR128))>; // Shuffle with VMOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; - def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (VMOVSDrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS FR64:$src2, VR128))>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (VMOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (VMOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (VMOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (VMOVSDrr VR128:$src1, VR128:$src2)>; } let Predicates = [UseSSE1] in { @@ -537,9 +536,9 @@ let Predicates = [UseSSE1] in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSS to the lower bits. def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; + (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; + (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; } let AddedComplexity = 20 in { @@ -561,9 +560,10 @@ let Predicates = [UseSSE1] in { // Shuffle with MOVSS def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; - def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; + (MOVSSrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS FR32:$src2, VR128))>; } let Predicates = [UseSSE2] in { @@ -571,7 +571,7 @@ let Predicates = [UseSSE2] in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSD to the lower bits. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + (MOVSDrr (v2f64 (V_SET0)), (COPY_TO_REGCLASS FR64:$src, VR128))>; } let AddedComplexity = 20 in { @@ -590,22 +590,23 @@ let Predicates = [UseSSE2] in { // Shuffle with MOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; - def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (MOVSDrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS FR64:$src2, VR128))>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem // is during lowering, where it's not possible to recognize the fold because // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (MOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (MOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (MOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (MOVSDrr VR128:$src1, VR128:$src2)>; } // Aliases to help the assembler pick two byte VEX encodings by swapping the @@ -1508,14 +1509,14 @@ def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, - Sched<[WriteCvtF2F]>, VEX_WIG; + Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, - Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG; + Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable; } def : Pat<(f32 (fpround FR64:$src)), @@ -1575,14 +1576,14 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, - Sched<[WriteCvtF2F]>, VEX_WIG; + Sched<[WriteCvtF2F]>, VEX_WIG, NotMemoryFoldable; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, - Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG; + Sched<[WriteCvtF2FLd, ReadAfterLd]>, VEX_WIG, NotMemoryFoldable; } def : Pat<(f64 (fpextend FR32:$src)), @@ -1888,9 +1889,15 @@ let Predicates = [HasAVX, NoVLX] in { def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), (VCVTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), + (VCVTPD2DQrm addr:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), (VCVTTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), + (VCVTTPD2DQrm addr:$src)>; } } // Predicates = [HasAVX] @@ -1910,9 +1917,15 @@ let Predicates = [UseSSE2] in { def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), (CVTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))), + (CVTPD2DQrm addr:$src)>; def : Pat<(X86vzmovl (v2i64 (bitconvert (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), (CVTTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))), + (CVTTPD2DQrm addr:$src)>; } } // Predicates = [UseSSE2] @@ -1954,7 +1967,7 @@ let hasSideEffects = 0, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))))]>, + (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>, VEX, Sched<[WriteCvtI2FLd]>, VEX_WIG; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", @@ -1977,7 +1990,7 @@ let hasSideEffects = 0, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))))], + (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))], IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", @@ -1989,12 +2002,16 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), let Predicates = [HasAVX, NoVLX] in { def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (VCVTDQ2PDrm addr:$src)>; + def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), + (VCVTDQ2PDrm addr:$src)>; } // Predicates = [HasAVX, NoVLX] // SSE2 register conversion intrinsics let Predicates = [UseSSE2] in { def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (CVTDQ2PDrm addr:$src)>; + def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), + (CVTDQ2PDrm addr:$src)>; } // Predicates = [UseSSE2] // Convert packed double to packed single @@ -2049,18 +2066,26 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), let Predicates = [HasAVX, NoVLX] in { // Match fpround and fpextend for 128/256-bit conversions - let AddedComplexity = 15 in - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128:$src)))))), - (VCVTPD2PSrr VR128:$src)>; + let AddedComplexity = 15 in { + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128:$src)))))), + (VCVTPD2PSrr VR128:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), + (VCVTPD2PSrm addr:$src)>; + } } let Predicates = [UseSSE2] in { // Match fpround and fpextend for 128 conversions - let AddedComplexity = 15 in - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128:$src)))))), - (CVTPD2PSrr VR128:$src)>; + let AddedComplexity = 15 in { + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128:$src)))))), + (CVTPD2PSrr VR128:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (memopv2f64 addr:$src)))))), + (CVTPD2PSrm addr:$src)>; + } } //===----------------------------------------------------------------------===// @@ -2308,6 +2333,58 @@ let Constraints = "$src1 = $dst" in { SSEPackedDouble, memopv2f64, SSE_ALU_F64P>, PD; } +def CommutableCMPCC : PatLeaf<(imm), [{ + return (N->getZExtValue() == 0x00 || N->getZExtValue() == 0x03 || + N->getZExtValue() == 0x04 || N->getZExtValue() == 0x07); +}]>; + +// Patterns to select compares with loads in first operand. +let Predicates = [HasAVX] in { + def : Pat<(v4f64 (X86cmpp (loadv4f64 addr:$src2), VR256:$src1, + CommutableCMPCC:$cc)), + (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(v8f32 (X86cmpp (loadv8f32 addr:$src2), VR256:$src1, + CommutableCMPCC:$cc)), + (VCMPPSYrmi VR256:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(v2f64 (X86cmpp (loadv2f64 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), + (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(v4f32 (X86cmpp (loadv4f32 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), + (VCMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, + CommutableCMPCC:$cc)), + (VCMPSDrm FR64:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, + CommutableCMPCC:$cc)), + (VCMPSSrm FR32:$src1, addr:$src2, imm:$cc)>; +} + +let Predicates = [UseSSE2] in { + def : Pat<(v2f64 (X86cmpp (memopv2f64 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), + (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(f64 (X86cmps (loadf64 addr:$src2), FR64:$src1, + CommutableCMPCC:$cc)), + (CMPSDrm FR64:$src1, addr:$src2, imm:$cc)>; +} + +let Predicates = [UseSSE1] in { + def : Pat<(v4f32 (X86cmpp (memopv4f32 addr:$src2), VR128:$src1, + CommutableCMPCC:$cc)), + (CMPPSrmi VR128:$src1, addr:$src2, imm:$cc)>; + + def : Pat<(f32 (X86cmps (loadf32 addr:$src2), FR32:$src1, + CommutableCMPCC:$cc)), + (CMPSSrm FR32:$src1, addr:$src2, imm:$cc)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Shuffle Instructions //===----------------------------------------------------------------------===// @@ -2858,22 +2935,6 @@ multiclass scalar_math_f32_patterns { (!cast(OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>; } - // With SSE 4.1, blendi is preferred to movsd, so match that too. - let Predicates = [UseSSE41] in { - // extracted scalar math op with insert via blend - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), - FR32:$src))), (i8 1))), - (!cast(OpcPrefix#SSrr_Int) v4f32:$dst, - (COPY_TO_REGCLASS FR32:$src, VR128))>; - - // vector math op with insert via blend - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), - (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), - (!cast(OpcPrefix#SSrr_Int)v4f32:$dst, v4f32:$src)>; - - } - // Repeat everything for AVX. let Predicates = [UseAVX] in { // extracted scalar math op with insert via movss @@ -2883,22 +2944,10 @@ multiclass scalar_math_f32_patterns { (!cast("V"#OpcPrefix#SSrr_Int) v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; - // extracted scalar math op with insert via blend - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))), - FR32:$src))), (i8 1))), - (!cast("V"#OpcPrefix#SSrr_Int) v4f32:$dst, - (COPY_TO_REGCLASS FR32:$src, VR128))>; - // vector math op with insert via movss def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))), (!cast("V"#OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>; - - // vector math op with insert via blend - def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), - (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), - (!cast("V"#OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>; } } @@ -2922,21 +2971,6 @@ multiclass scalar_math_f64_patterns { (!cast(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>; } - // With SSE 4.1, blendi is preferred to movsd, so match those too. - let Predicates = [UseSSE41] in { - // extracted scalar math op with insert via blend - def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))), - FR64:$src))), (i8 1))), - (!cast(OpcPrefix#SDrr_Int) v2f64:$dst, - (COPY_TO_REGCLASS FR64:$src, VR128))>; - - // vector math op with insert via blend - def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), - (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), - (!cast(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>; - } - // Repeat everything for AVX. let Predicates = [UseAVX] in { // extracted scalar math op with insert via movsd @@ -2946,22 +2980,10 @@ multiclass scalar_math_f64_patterns { (!cast("V"#OpcPrefix#SDrr_Int) v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; - // extracted scalar math op with insert via blend - def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))), - FR64:$src))), (i8 1))), - (!cast("V"#OpcPrefix#SDrr_Int) v2f64:$dst, - (COPY_TO_REGCLASS FR64:$src, VR128))>; - // vector math op with insert via movsd def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)))), (!cast("V"#OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>; - - // vector math op with insert via blend - def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), - (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), - (!cast("V"#OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>; } } @@ -3205,7 +3227,8 @@ multiclass sse1_fp_unop_s opc, string OpcodeStr, SDNode OpNode, defm V#NAME#SS : avx_fp_unop_s("int_x86_sse_"##OpcodeStr##_ss), OpNode, - SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG; + SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG, VEX_WIG, + NotMemoryFoldable; } multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, @@ -3217,7 +3240,7 @@ multiclass sse2_fp_unop_s opc, string OpcodeStr, SDNode OpNode, f64mem, !cast("int_x86_sse2_"##OpcodeStr##_sd), OpNode, SSEPackedDouble, itins, "SD">, - XD, VEX_4V, VEX_LIG, VEX_WIG; + XD, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable; } // Square root. @@ -3247,19 +3270,10 @@ multiclass scalar_unary_math_patterns(OpcPrefix#r_Int) VT:$dst, VT:$src)>; } - // With SSE 4.1, blendi is preferred to movs*, so match that too. - let Predicates = [UseSSE41] in { - def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))), - (!cast(OpcPrefix#r_Int) VT:$dst, VT:$src)>; - } - // Repeat for AVX versions of the instructions. let Predicates = [HasAVX] in { def : Pat<(VT (Move VT:$dst, (Intr VT:$src))), (!cast("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; - - def : Pat<(VT (X86Blendi VT:$dst, (Intr VT:$src), (i8 1))), - (!cast("V"#OpcPrefix#r_Int) VT:$dst, VT:$src)>; } } @@ -5515,8 +5529,8 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { (ins i8mem:$dst, VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(store (i8 (trunc (assertzext (X86pextrb (v16i8 VR128:$src1), - imm:$src2)))), addr:$dst)]>; + [(store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), + addr:$dst)]>; } let Predicates = [HasAVX, NoBWI] in @@ -5540,8 +5554,8 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { (ins i16mem:$dst, VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(store (i16 (trunc (assertzext (X86pextrw (v8i16 VR128:$src1), - imm:$src2)))), addr:$dst)]>; + [(store (i16 (trunc (X86pextrw (v8i16 VR128:$src1), imm:$src2))), + addr:$dst)]>; } let Predicates = [HasAVX, NoBWI] in @@ -5947,7 +5961,7 @@ let Predicates = [HasAVX] in { defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", int_x86_sse41_round_ss, int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG, VEX_WIG; - defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG; + defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG, VEX_WIG; } let Predicates = [UseAVX] in { @@ -6670,7 +6684,7 @@ let Predicates = [UseAVX] in { def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + (VMOVSDrr (v2f64 (V_SET0)), (COPY_TO_REGCLASS FR64:$src, VR128))>; // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), @@ -7391,6 +7405,15 @@ let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256, v4f64, v2f64, WriteFShuffle256>, VEX_L; +let Predicates = [HasAVX, NoVLX] in { + def : Pat<(v4f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (VBROADCASTSSrm addr:$src)>; + def : Pat<(v8f32 (X86VBroadcast (v4f32 (scalar_to_vector (loadf32 addr:$src))))), + (VBROADCASTSSYrm addr:$src)>; + def : Pat<(v4f64 (X86VBroadcast (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (VBROADCASTSDYrm addr:$src)>; +} + //===----------------------------------------------------------------------===// // VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both // halves of a 256-bit vector. @@ -7865,12 +7888,23 @@ defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, v2i64, v4i64, NoVLX>; -let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { +let Predicates = [HasAVX2, NoVLX] in { // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD. def : Pat<(v2i64 (X86VBroadcast (v2i64 (X86vzload addr:$src)))), (VPBROADCASTQrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (v4i64 (X86vzload addr:$src)))), (VPBROADCASTQYrm addr:$src)>; + + def : Pat<(v4i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (VPBROADCASTDrm addr:$src)>; + def : Pat<(v8i32 (X86VBroadcast (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (VPBROADCASTDYrm addr:$src)>; + def : Pat<(v2i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (VPBROADCASTQrm addr:$src)>; + def : Pat<(v4i64 (X86VBroadcast (v2i64 (scalar_to_vector (loadi64 addr:$src))))), + (VPBROADCASTQYrm addr:$src)>; +} +let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. // This means we'll encounter truncated i32 loads; match that here. def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))), @@ -7959,6 +7993,11 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128))>; def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), (VMOVDDUPrm addr:$src)>; + + def : Pat<(v2f64 (X86VBroadcast v2f64:$src)), + (VMOVDDUPrr VR128:$src)>; + def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), + (VMOVDDUPrm addr:$src)>; } let Predicates = [HasAVX1Only] in { diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index e9b6c6785bc9c..86b3f21018fd2 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -506,16 +506,16 @@ let Uses = [EDX, EAX] in { let Predicates = [HasXSAVE] in { def XSAVE : I<0xAE, MRM4m, (outs), (ins opaque512mem:$dst), "xsave\t$dst", - [(int_x86_xsave addr:$dst, EDX, EAX)]>, TB; + [(int_x86_xsave addr:$dst, EDX, EAX)]>, PS; def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaque512mem:$dst), "xsave64\t$dst", - [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; + [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, PS, Requires<[In64BitMode]>; def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), "xrstor\t$dst", - [(int_x86_xrstor addr:$dst, EDX, EAX)]>, TB; + [(int_x86_xrstor addr:$dst, EDX, EAX)]>, PS; def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst), "xrstor64\t$dst", - [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>; + [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, PS, Requires<[In64BitMode]>; } let Predicates = [HasXSAVEOPT] in { def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaque512mem:$dst), @@ -635,3 +635,24 @@ let Defs = [EFLAGS] in { let Uses = [RAX, RBX, RCX, RDX], Defs = [RAX, RBX, RCX] in { def GETSEC : I<0x37, RawFrm, (outs), (ins), "getsec", []>, TB; } + +//===----------------------------------------------------------------------===// +// RDPID Instruction +def RDPID32 : I<0xC7, MRM7r, (outs GR32:$src), (ins), + "rdpid\t$src", []>, XS, + Requires<[Not64BitMode]>; +def RDPID64 : I<0xC7, MRM7r, (outs GR64:$src), (ins), + "rdpid\t$src", []>, XS, + Requires<[In64BitMode]>; + +//===----------------------------------------------------------------------===// +// PTWRITE Instruction +def PTWRITEm: I<0xAE, MRM4m, (outs), (ins i32mem:$dst), + "ptwrite{l}\t$dst", []>, XS; +def PTWRITE64m : RI<0xAE, MRM4m, (outs), (ins i64mem:$dst), + "ptwrite{q}\t$dst", []>, XS, Requires<[In64BitMode]>; + +def PTWRITEr : I<0xAE, MRM4r, (outs), (ins GR32:$dst), + "ptwrite{l}\t$dst", []>, XS; +def PTWRITE64r : RI<0xAE, MRM4r, (outs), (ins GR64:$dst), + "ptwrite{q}\t$dst", []>, XS, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td index 315a69e6a2a24..273ad24e84ba4 100644 --- a/lib/Target/X86/X86InstrVMX.td +++ b/lib/Target/X86/X86InstrVMX.td @@ -42,7 +42,7 @@ def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB; def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs), "vmptrld\t$vmcs", []>, PS; def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs), - "vmptrst\t$vmcs", []>, TB; + "vmptrst\t$vmcs", []>, PS; def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>; def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td index f6b41c46f6d8d..7e2195cf93aa8 100644 --- a/lib/Target/X86/X86InstrVecCompiler.td +++ b/lib/Target/X86/X86InstrVecCompiler.td @@ -368,7 +368,8 @@ let Predicates = [HasAVX512, NoVLX] in { // where we explicitly insert zeros. class veczeroupper : PatLeaf<(vt RC:$src), [{ - return N->getOpcode() == X86ISD::VPMADDWD; + return N->getOpcode() == X86ISD::VPMADDWD || + N->getOpcode() == X86ISD::PSADBW; }]>; def zeroupperv2f64 : veczeroupper; diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index 601840da5fec9..6bce2558c021a 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -1,4 +1,4 @@ -//===- X86InstructionSelector.cpp ----------------------------*- C++ -*-==// +//===- X86InstructionSelector.cpp -----------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,6 +12,9 @@ /// \todo This should be generated by TableGen. //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "X86-isel" + +#include "MCTargetDesc/X86BaseInfo.h" #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86RegisterBankInfo.h" @@ -19,21 +22,31 @@ #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Type.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" - -#define DEBUG_TYPE "X86-isel" - -#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include using namespace llvm; @@ -205,7 +218,6 @@ static const TargetRegisterClass *getRegClassFromGRPhysReg(unsigned Reg) { // Set X86 Opcode and constrain DestReg. bool X86InstructionSelector::selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const { - unsigned DstReg = I.getOperand(0).getReg(); const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); @@ -432,7 +444,6 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, static void X86SelectAddress(const MachineInstr &I, const MachineRegisterInfo &MRI, X86AddressMode &AM) { - assert(I.getOperand(0).isReg() && "unsupported opperand."); assert(MRI.getType(I.getOperand(0).getReg()).isPointer() && "unsupported type."); @@ -454,13 +465,11 @@ static void X86SelectAddress(const MachineInstr &I, // Default behavior. AM.Base.Reg = I.getOperand(0).getReg(); - return; } bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - unsigned Opc = I.getOpcode(); assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) && @@ -537,7 +546,6 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) && "unexpected instruction"); @@ -548,7 +556,7 @@ bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, // Can't handle alternate code models yet. if (TM.getCodeModel() != CodeModel::Small) - return 0; + return false; X86AddressMode AM; AM.GV = GV; @@ -584,7 +592,6 @@ bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, bool X86InstructionSelector::selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_CONSTANT) && "unexpected instruction"); @@ -614,14 +621,13 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, case 32: NewOpc = X86::MOV32ri; break; - case 64: { + case 64: // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used if (isInt<32>(Val)) NewOpc = X86::MOV64ri32; else NewOpc = X86::MOV64ri; break; - } default: llvm_unreachable("Can't select G_CONSTANT, unsupported type."); } @@ -633,7 +639,6 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, bool X86InstructionSelector::selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_TRUNC) && "unexpected instruction"); const unsigned DstReg = I.getOperand(0).getReg(); @@ -692,7 +697,6 @@ bool X86InstructionSelector::selectTrunc(MachineInstr &I, bool X86InstructionSelector::selectZext(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction"); const unsigned DstReg = I.getOperand(0).getReg(); @@ -740,7 +744,6 @@ bool X86InstructionSelector::selectZext(MachineInstr &I, bool X86InstructionSelector::selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction"); const unsigned DstReg = I.getOperand(0).getReg(); @@ -790,7 +793,6 @@ bool X86InstructionSelector::selectAnyext(MachineInstr &I, bool X86InstructionSelector::selectCmp(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction"); X86::CondCode CC; @@ -843,7 +845,6 @@ bool X86InstructionSelector::selectCmp(MachineInstr &I, bool X86InstructionSelector::selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_UADDE) && "unexpected instruction"); const unsigned DstReg = I.getOperand(0).getReg(); @@ -903,7 +904,6 @@ bool X86InstructionSelector::selectUadde(MachineInstr &I, bool X86InstructionSelector::selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_EXTRACT) && "unexpected instruction"); @@ -962,7 +962,6 @@ bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); unsigned SubIdx = X86::NoSubRegister; @@ -1001,7 +1000,6 @@ bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); unsigned SubIdx = X86::NoSubRegister; @@ -1039,7 +1037,6 @@ bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg, bool X86InstructionSelector::selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction"); const unsigned DstReg = I.getOperand(0).getReg(); @@ -1098,7 +1095,6 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I, bool X86InstructionSelector::selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) && "unexpected instruction"); @@ -1108,7 +1104,6 @@ bool X86InstructionSelector::selectUnmergeValues(MachineInstr &I, unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { - MachineInstr &ExtrInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg()) @@ -1126,7 +1121,6 @@ bool X86InstructionSelector::selectUnmergeValues(MachineInstr &I, bool X86InstructionSelector::selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES) && "unexpected instruction"); @@ -1147,7 +1141,6 @@ bool X86InstructionSelector::selectMergeValues(MachineInstr &I, return false; for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { - unsigned Tmp = MRI.createGenericVirtualRegister(DstTy); MRI.setRegBank(Tmp, RegBank); @@ -1177,7 +1170,6 @@ bool X86InstructionSelector::selectMergeValues(MachineInstr &I, bool X86InstructionSelector::selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction"); const unsigned CondReg = I.getOperand(0).getReg(); @@ -1199,7 +1191,6 @@ bool X86InstructionSelector::selectCondBranch(MachineInstr &I, bool X86InstructionSelector::materializeFP(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { - assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) && "unexpected instruction"); @@ -1265,7 +1256,6 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I, bool X86InstructionSelector::selectImplicitDefOrPHI( MachineInstr &I, MachineRegisterInfo &MRI) const { - assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || I.getOpcode() == TargetOpcode::G_PHI) && "unexpected instruction"); diff --git a/lib/Target/X86/X86InterleavedAccess.cpp b/lib/Target/X86/X86InterleavedAccess.cpp index ff7244f62194a..6e87116f4d1a2 100644 --- a/lib/Target/X86/X86InterleavedAccess.cpp +++ b/lib/Target/X86/X86InterleavedAccess.cpp @@ -1,25 +1,44 @@ -//===--------- X86InterleavedAccess.cpp ----------------------------------===// +//===- X86InterleavedAccess.cpp -------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//===--------------------------------------------------------------------===// -/// +//===----------------------------------------------------------------------===// +// /// \file /// This file contains the X86 implementation of the interleaved accesses /// optimization generating X86-specific instructions/intrinsics for /// interleaved access groups. -/// -//===--------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// -#include "X86TargetMachine.h" +#include "X86ISelLowering.h" +#include "X86Subtarget.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include using namespace llvm; namespace { + /// \brief This class holds necessary information to represent an interleaved /// access group and supports utilities to lower the group into /// X86-specific instructions/intrinsics. @@ -104,6 +123,7 @@ class X86InterleavedAccessGroup { /// instructions/intrinsics. bool lowerIntoOptimizedSequence(); }; + } // end anonymous namespace bool X86InterleavedAccessGroup::isSupported() const { @@ -123,6 +143,8 @@ bool X86InterleavedAccessGroup::isSupported() const { if (isa(Inst)) { WideInstSize = DL.getTypeSizeInBits(Inst->getType()); + if (cast(Inst)->getPointerAddressSpace()) + return false; } else WideInstSize = DL.getTypeSizeInBits(Shuffles[0]->getType()); @@ -132,11 +154,12 @@ bool X86InterleavedAccessGroup::isSupported() const { return true; if (ShuffleElemSize == 8 && isa(Inst) && Factor == 4 && - (WideInstSize == 256 || WideInstSize == 512 || WideInstSize == 1024)) - return true; + (WideInstSize == 256 || WideInstSize == 512 || WideInstSize == 1024 || + WideInstSize == 2048)) + return true; if (ShuffleElemSize == 8 && Factor == 3 && - (WideInstSize == 384 || WideInstSize == 768)) + (WideInstSize == 384 || WideInstSize == 768 || WideInstSize == 1536)) return true; return false; @@ -145,14 +168,13 @@ bool X86InterleavedAccessGroup::isSupported() const { void X86InterleavedAccessGroup::decompose( Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy, SmallVectorImpl &DecomposedVectors) { - assert((isa(VecInst) || isa(VecInst)) && "Expected Load or Shuffle"); - Type *VecTy = VecInst->getType(); - (void)VecTy; - assert(VecTy->isVectorTy() && - DL.getTypeSizeInBits(VecTy) >= + Type *VecWidth = VecInst->getType(); + (void)VecWidth; + assert(VecWidth->isVectorTy() && + DL.getTypeSizeInBits(VecWidth) >= DL.getTypeSizeInBits(SubVecTy) * NumSubVectors && "Invalid Inst-size!!!"); @@ -178,11 +200,12 @@ void X86InterleavedAccessGroup::decompose( // In the case of stride 3 with a vector of 32 elements load the information // in the following way: // [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1] - if (DL.getTypeSizeInBits(VecTy) == 768) { + unsigned VecLength = DL.getTypeSizeInBits(VecWidth); + if (VecLength == 768 || VecLength == 1536) { Type *VecTran = VectorType::get(Type::getInt8Ty(LI->getContext()), 16)->getPointerTo(); VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecTran); - NumLoads = NumSubVectors * 2; + NumLoads = NumSubVectors * (VecLength / 384); } else VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy); // Generate N loads of T type. @@ -195,26 +218,6 @@ void X86InterleavedAccessGroup::decompose( } } -// Create shuffle mask for concatenation of two half vectors. -// Low = false: mask generated for the shuffle -// shuffle(VEC1,VEC2,{NumElement/2, NumElement/2+1, NumElement/2+2..., -// NumElement-1, NumElement+NumElement/2, -// NumElement+NumElement/2+1..., 2*NumElement-1}) -// = concat(high_half(VEC1),high_half(VEC2)) -// Low = true: mask generated for the shuffle -// shuffle(VEC1,VEC2,{0,1,2,...,NumElement/2-1,NumElement, -// NumElement+1...,NumElement+NumElement/2-1}) -// = concat(low_half(VEC1),low_half(VEC2)) -static void createConcatShuffleMask(int NumElements, - SmallVectorImpl &Mask, bool Low) { - int NumHalfElements = NumElements / 2; - int Offset = Low ? 0 : NumHalfElements; - for (int i = 0; i < NumHalfElements; ++i) - Mask.push_back(i + Offset); - for (int i = 0; i < NumHalfElements; ++i) - Mask.push_back(i + Offset + NumElements); -} - // Changing the scale of the vector type by reducing the number of elements and // doubling the scalar size. static MVT scaleVectorType(MVT VT) { @@ -223,6 +226,91 @@ static MVT scaleVectorType(MVT VT) { VT.getVectorNumElements() / 2); } +static uint32_t Concat[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 }; + +// genShuffleBland - Creates shuffle according to two vectors.This function is +// only works on instructions with lane inside 256 registers. According to +// the mask 'Mask' creates a new Mask 'Out' by the offset of the mask. The +// offset amount depends on the two integer, 'LowOffset' and 'HighOffset'. +// Where the 'LowOffset' refers to the first vector and the highOffset refers to +// the second vector. +// |a0....a5,b0....b4,c0....c4|a16..a21,b16..b20,c16..c20| +// |c5...c10,a5....a9,b5....b9|c21..c26,a22..a26,b21..b25| +// |b10..b15,c11..c15,a10..a15|b26..b31,c27..c31,a27..a31| +// For the sequence to work as a mirror to the load. +// We must consider the elements order as above. +// In this function we are combining two types of shuffles. +// The first one is vpshufed and the second is a type of "blend" shuffle. +// By computing the shuffle on a sequence of 16 elements(one lane) and add the +// correct offset. We are creating a vpsuffed + blend sequence between two +// shuffles. +static void genShuffleBland(MVT VT, ArrayRef Mask, + SmallVectorImpl &Out, int LowOffset, + int HighOffset) { + assert(VT.getSizeInBits() >= 256 && + "This function doesn't accept width smaller then 256"); + unsigned NumOfElm = VT.getVectorNumElements(); + for (unsigned i = 0; i < Mask.size(); i++) + Out.push_back(Mask[i] + LowOffset); + for (unsigned i = 0; i < Mask.size(); i++) + Out.push_back(Mask[i] + HighOffset + NumOfElm); +} + +// reorderSubVecotr returns the data to is the original state. And de-facto is +// the opposite of the function concatSubVector. + +// For VecElems = 16 +// Invec[0] - |0| TransposedMatrix[0] - |0| +// Invec[1] - |1| => TransposedMatrix[1] - |1| +// Invec[2] - |2| TransposedMatrix[2] - |2| + +// For VecElems = 32 +// Invec[0] - |0|3| TransposedMatrix[0] - |0|1| +// Invec[1] - |1|4| => TransposedMatrix[1] - |2|3| +// Invec[2] - |2|5| TransposedMatrix[2] - |4|5| + +// For VecElems = 64 +// Invec[0] - |0|3|6|9 | TransposedMatrix[0] - |0|1|2 |3 | +// Invec[1] - |1|4|7|10| => TransposedMatrix[1] - |4|5|6 |7 | +// Invec[2] - |2|5|8|11| TransposedMatrix[2] - |8|9|10|11| + +static void reorderSubVector(MVT VT, SmallVectorImpl &TransposedMatrix, + ArrayRef Vec, ArrayRef VPShuf, + unsigned VecElems, unsigned Stride, + IRBuilder<> Builder) { + + if (VecElems == 16) { + for (unsigned i = 0; i < Stride; i++) + TransposedMatrix[i] = Builder.CreateShuffleVector( + Vec[i], UndefValue::get(Vec[i]->getType()), VPShuf); + return; + } + + SmallVector OptimizeShuf; + Value *Temp[8]; + + for (unsigned i = 0; i < (VecElems / 16) * Stride; i += 2) { + genShuffleBland(VT, VPShuf, OptimizeShuf, (i / Stride) * 16, + (i + 1) / Stride * 16); + Temp[i / 2] = Builder.CreateShuffleVector( + Vec[i % Stride], Vec[(i + 1) % Stride], OptimizeShuf); + OptimizeShuf.clear(); + } + + if (VecElems == 32) { + std::copy(Temp, Temp + Stride, TransposedMatrix.begin()); + return; + } + else + for (unsigned i = 0; i < Stride; i++) + TransposedMatrix[i] = + Builder.CreateShuffleVector(Temp[2 * i], Temp[2 * i + 1], Concat); +} + void X86InterleavedAccessGroup::interleave8bitStride4VF8( ArrayRef Matrix, SmallVectorImpl &TransposedMatrix) { @@ -265,92 +353,70 @@ void X86InterleavedAccessGroup::interleave8bitStride4VF8( void X86InterleavedAccessGroup::interleave8bitStride4( ArrayRef Matrix, SmallVectorImpl &TransposedMatrix, - unsigned numberOfElement) { - + unsigned NumOfElm) { // Example: Assuming we start from the following vectors: // Matrix[0]= c0 c1 c2 c3 c4 ... c31 // Matrix[1]= m0 m1 m2 m3 m4 ... m31 // Matrix[2]= y0 y1 y2 y3 y4 ... y31 // Matrix[3]= k0 k1 k2 k3 k4 ... k31 - MVT VT = MVT::getVectorVT(MVT::i8, numberOfElement); + MVT VT = MVT::getVectorVT(MVT::i8, NumOfElm); MVT HalfVT = scaleVectorType(VT); TransposedMatrix.resize(4); SmallVector MaskHigh; SmallVector MaskLow; - SmallVector MaskHighTemp1; - SmallVector MaskLowTemp1; - SmallVector MaskHighWord; - SmallVector MaskLowWord; - SmallVector ConcatLow; - SmallVector ConcatHigh; + SmallVector LowHighMask[2]; + SmallVector MaskHighTemp; + SmallVector MaskLowTemp; // MaskHighTemp and MaskLowTemp built in the vpunpckhbw and vpunpcklbw X86 // shuffle pattern. - createUnpackShuffleMask(VT, MaskHigh, false, false); createUnpackShuffleMask(VT, MaskLow, true, false); + createUnpackShuffleMask(VT, MaskHigh, false, false); // MaskHighTemp1 and MaskLowTemp1 built in the vpunpckhdw and vpunpckldw X86 // shuffle pattern. - createUnpackShuffleMask(HalfVT, MaskLowTemp1, true, false); - createUnpackShuffleMask(HalfVT, MaskHighTemp1, false, false); - scaleShuffleMask(2, MaskHighTemp1, MaskHighWord); - scaleShuffleMask(2, MaskLowTemp1, MaskLowWord); + createUnpackShuffleMask(HalfVT, MaskLowTemp, true, false); + createUnpackShuffleMask(HalfVT, MaskHighTemp, false, false); + scaleShuffleMask(2, MaskLowTemp, LowHighMask[0]); + scaleShuffleMask(2, MaskHighTemp, LowHighMask[1]); // IntrVec1Low = c0 m0 c1 m1 ... c7 m7 | c16 m16 c17 m17 ... c23 m23 // IntrVec1High = c8 m8 c9 m9 ... c15 m15 | c24 m24 c25 m25 ... c31 m31 // IntrVec2Low = y0 k0 y1 k1 ... y7 k7 | y16 k16 y17 k17 ... y23 k23 // IntrVec2High = y8 k8 y9 k9 ... y15 k15 | y24 k24 y25 k25 ... y31 k31 + Value *IntrVec[4]; - Value *IntrVec1Low = - Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskLow); - Value *IntrVec1High = - Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskHigh); - Value *IntrVec2Low = - Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskLow); - Value *IntrVec2High = - Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskHigh); + IntrVec[0] = Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskLow); + IntrVec[1] = Builder.CreateShuffleVector(Matrix[0], Matrix[1], MaskHigh); + IntrVec[2] = Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskLow); + IntrVec[3] = Builder.CreateShuffleVector(Matrix[2], Matrix[3], MaskHigh); // cmyk4 cmyk5 cmyk6 cmyk7 | cmyk20 cmyk21 cmyk22 cmyk23 // cmyk12 cmyk13 cmyk14 cmyk15 | cmyk28 cmyk29 cmyk30 cmyk31 // cmyk0 cmyk1 cmyk2 cmyk3 | cmyk16 cmyk17 cmyk18 cmyk19 // cmyk8 cmyk9 cmyk10 cmyk11 | cmyk24 cmyk25 cmyk26 cmyk27 - Value *High = - Builder.CreateShuffleVector(IntrVec1Low, IntrVec2Low, MaskHighWord); - Value *High1 = - Builder.CreateShuffleVector(IntrVec1High, IntrVec2High, MaskHighWord); - Value *Low = - Builder.CreateShuffleVector(IntrVec1Low, IntrVec2Low, MaskLowWord); - Value *Low1 = - Builder.CreateShuffleVector(IntrVec1High, IntrVec2High, MaskLowWord); - - if (VT == MVT::v16i8) { - TransposedMatrix[0] = Low; - TransposedMatrix[1] = High; - TransposedMatrix[2] = Low1; - TransposedMatrix[3] = High1; - return; - } + Value *VecOut[4]; + for (int i = 0; i < 4; i++) + VecOut[i] = Builder.CreateShuffleVector(IntrVec[i / 2], IntrVec[i / 2 + 2], + LowHighMask[i % 2]); // cmyk0 cmyk1 cmyk2 cmyk3 | cmyk4 cmyk5 cmyk6 cmyk7 // cmyk8 cmyk9 cmyk10 cmyk11 | cmyk12 cmyk13 cmyk14 cmyk15 // cmyk16 cmyk17 cmyk18 cmyk19 | cmyk20 cmyk21 cmyk22 cmyk23 // cmyk24 cmyk25 cmyk26 cmyk27 | cmyk28 cmyk29 cmyk30 cmyk31 - // ConcatHigh and ConcatLow built in the vperm2i128 and vinserti128 X86 - // shuffle pattern. - SmallVector ConcatHigh12, ConcatHigh13; - createConcatShuffleMask(numberOfElement, ConcatLow, true); - createConcatShuffleMask(numberOfElement, ConcatHigh, false); - - TransposedMatrix[0] = Builder.CreateShuffleVector(Low, High, ConcatLow); - TransposedMatrix[1] = Builder.CreateShuffleVector(Low1, High1, ConcatLow); - TransposedMatrix[2] = Builder.CreateShuffleVector(Low, High, ConcatHigh); - TransposedMatrix[3] = Builder.CreateShuffleVector(Low1, High1, ConcatHigh); + if (VT == MVT::v16i8) { + std::copy(VecOut, VecOut + 4, TransposedMatrix.begin()); + return; + } + + reorderSubVector(VT, TransposedMatrix, VecOut, makeArrayRef(Concat, 16), + NumOfElm, 4, Builder); } // createShuffleStride returns shuffle mask of size N. @@ -403,7 +469,6 @@ static void setGroupSize(MVT VT, SmallVectorImpl &SizeInfo) { static void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl &ShuffleMask, bool AlignDirection = true, bool Unary = false) { - unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1); unsigned NumLaneElts = NumElts / NumLanes; @@ -423,29 +488,71 @@ static void DecodePALIGNRMask(MVT VT, unsigned Imm, } } +// concatSubVector - The function rebuilds the data to a correct expected +// order. An assumption(The shape of the matrix) was taken for the +// deinterleaved to work with lane's instructions like 'vpalign' or 'vphuf'. +// This function ensures that the data is built in correct way for the lane +// instructions. Each lane inside the vector is a 128-bit length. +// +// The 'InVec' argument contains the data in increasing order. In InVec[0] You +// can find the first 128 bit data. The number of different lanes inside a +// vector depends on the 'VecElems'.In general, the formula is +// VecElems * type / 128. The size of the array 'InVec' depends and equal to +// 'VecElems'. + +// For VecElems = 16 +// Invec[0] - |0| Vec[0] - |0| +// Invec[1] - |1| => Vec[1] - |1| +// Invec[2] - |2| Vec[2] - |2| + +// For VecElems = 32 +// Invec[0] - |0|1| Vec[0] - |0|3| +// Invec[1] - |2|3| => Vec[1] - |1|4| +// Invec[2] - |4|5| Vec[2] - |2|5| + +// For VecElems = 64 +// Invec[0] - |0|1|2 |3 | Vec[0] - |0|3|6|9 | +// Invec[1] - |4|5|6 |7 | => Vec[1] - |1|4|7|10| +// Invec[2] - |8|9|10|11| Vec[2] - |2|5|8|11| + +static void concatSubVector(Value **Vec, ArrayRef InVec, + unsigned VecElems, IRBuilder<> Builder) { + if (VecElems == 16) { + for (int i = 0; i < 3; i++) + Vec[i] = InVec[i]; + return; + } + + for (unsigned j = 0; j < VecElems / 32; j++) + for (int i = 0; i < 3; i++) + Vec[i + j * 3] = Builder.CreateShuffleVector( + InVec[j * 6 + i], InVec[j * 6 + i + 3], makeArrayRef(Concat, 32)); + + if (VecElems == 32) + return; + + for (int i = 0; i < 3; i++) + Vec[i] = Builder.CreateShuffleVector(Vec[i], Vec[i + 3], Concat); +} + void X86InterleavedAccessGroup::deinterleave8bitStride3( ArrayRef InVec, SmallVectorImpl &TransposedMatrix, unsigned VecElems) { - // Example: Assuming we start from the following vectors: // Matrix[0]= a0 b0 c0 a1 b1 c1 a2 b2 // Matrix[1]= c2 a3 b3 c3 a4 b4 c4 a5 // Matrix[2]= b5 c5 a6 b6 c6 a7 b7 c7 TransposedMatrix.resize(3); - SmallVector Concat; SmallVector VPShuf; SmallVector VPAlign[2]; SmallVector VPAlign2; SmallVector VPAlign3; SmallVector GroupSize; - Value *Vec[3], *TempVector[3]; + Value *Vec[6], *TempVector[3]; MVT VT = MVT::getVT(Shuffles[0]->getType()); - for (unsigned i = 0; i < VecElems && VecElems == 32; ++i) - Concat.push_back(i); - createShuffleStride(VT, 3, VPShuf); setGroupSize(VT, GroupSize); @@ -455,11 +562,7 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3( DecodePALIGNRMask(VT, GroupSize[2] + GroupSize[1], VPAlign2, true, true); DecodePALIGNRMask(VT, GroupSize[1], VPAlign3, true, true); - for (int i = 0; i < 3; i++) - Vec[i] = VecElems == 32 - ? Builder.CreateShuffleVector(InVec[i], InVec[i + 3], Concat) - : InVec[i]; - + concatSubVector(Vec, InVec, VecElems, Builder); // Vec[0]= a0 a1 a2 b0 b1 b2 c0 c1 // Vec[1]= c2 c3 c4 a3 a4 a5 b3 b4 // Vec[2]= b5 b6 b7 c5 c6 c7 a6 a7 @@ -494,8 +597,6 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3( Vec[0], UndefValue::get(Vec[1]->getType()), VPAlign2); TransposedMatrix[1] = VecElems == 8 ? Vec[2] : TempVec; TransposedMatrix[2] = VecElems == 8 ? TempVec : Vec[2]; - - return; } // group2Shuffle reorder the shuffle stride back into continuous order. @@ -520,38 +621,9 @@ static void group2Shuffle(MVT VT, SmallVectorImpl &Mask, } } -// genShuffleBland - Creates shuffle according to two vectors.This function is -// only works on instructions with lane inside 256 registers. According to -// the mask 'Mask' creates a new Mask 'Out' by the offset of the mask. The -// offset amount depends on the two integer, 'LowOffset' and 'HighOffset'. -// Where the 'LowOffset' refers to the first vector and the highOffset refers to -// the second vector. -// |a0....a5,b0....b4,c0....c4|a16..a21,b16..b20,c16..c20| -// |c5...c10,a5....a9,b5....b9|c21..c26,a22..a26,b21..b25| -// |b10..b15,c11..c15,a10..a15|b26..b31,c27..c31,a27..a31| -// For the sequence to work as a mirror to the load. -// We must consider the elements order as above. -// In this function we are combining two types of shuffles. -// The first one is vpshufed and the second is a type of "blend" shuffle. -// By computing the shuffle on a sequence of 16 elements(one lane) and add the -// correct offset. We are creating a vpsuffed + blend sequence between two -// shuffles. -static void genShuffleBland(MVT VT, SmallVectorImpl &Mask, - SmallVectorImpl &Out, int LowOffset, - int HighOffset) { - assert(VT.getSizeInBits() == 256 && - "This function works on only width of 256"); - unsigned NumOfElm = VT.getVectorNumElements(); - for (unsigned i = 0; i < Mask.size(); i++) - Out.push_back(Mask[i] + LowOffset); - for (unsigned i = 0; i < Mask.size(); i++) - Out.push_back(Mask[i] + HighOffset + NumOfElm); -} - void X86InterleavedAccessGroup::interleave8bitStride3( ArrayRef InVec, SmallVectorImpl &TransposedMatrix, unsigned VecElems) { - // Example: Assuming we start from the following vectors: // Matrix[0]= a0 a1 a2 a3 a4 a5 a6 a7 // Matrix[1]= b0 b1 b2 b3 b4 b5 b6 b7 @@ -563,7 +635,7 @@ void X86InterleavedAccessGroup::interleave8bitStride3( SmallVector VPAlign[3]; SmallVector VPAlign2; SmallVector VPAlign3; - SmallVector OptimizeShuf[3]; + Value *Vec[3], *TempVector[3]; MVT VT = MVT::getVectorVT(MVT::i8, VecElems); @@ -605,25 +677,9 @@ void X86InterleavedAccessGroup::interleave8bitStride3( // TransposedMatrix[1] = c2 a3 b3 c3 a4 b4 c4 a5 // TransposedMatrix[2] = b5 c5 a6 b6 c6 a7 b7 c7 - group2Shuffle(VT, GroupSize, VPShuf); - - if (VT.getSizeInBits() <= 128) { - for (int i = 0; i < 3; i++) - TransposedMatrix[i] = Builder.CreateShuffleVector( - Vec[i], UndefValue::get(Vec[i]->getType()), VPShuf); - return; - } - unsigned NumOfElm = VT.getVectorNumElements(); - genShuffleBland(VT, VPShuf, OptimizeShuf[0], 0, 0); - genShuffleBland(VT, VPShuf, OptimizeShuf[1], 0, NumOfElm / 2); - genShuffleBland(VT, VPShuf, OptimizeShuf[2], NumOfElm / 2, NumOfElm / 2); - - for (int i = 0; i < 3; i++) - TransposedMatrix[i] = Builder.CreateShuffleVector( - Vec[(i * 2) % 3], Vec[(i * 2 + 1) % 3], OptimizeShuf[i]); - - return; + group2Shuffle(VT, GroupSize, VPShuf); + reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm,3, Builder); } void X86InterleavedAccessGroup::transpose_4x4( @@ -683,6 +739,7 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() { case 8: case 16: case 32: + case 64: deinterleave8bitStride3(DecomposedVectors, TransposedVectors, NumSubVecElems); break; @@ -716,6 +773,7 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() { break; case 16: case 32: + case 64: if (Factor == 4) interleave8bitStride4(DecomposedVectors, TransposedVectors, NumSubVecElems); @@ -784,4 +842,3 @@ bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI, return Grp.isSupported() && Grp.lowerIntoOptimizedSequence(); } - diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 36d81128acf03..78e6e5f1b2e99 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -15,6 +15,7 @@ #include "InstPrinter/X86ATTInstPrinter.h" #include "InstPrinter/X86InstComments.h" #include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86TargetStreamer.h" #include "Utils/X86ShuffleDecode.h" #include "X86AsmPrinter.h" #include "X86RegisterInfo.h" @@ -1363,6 +1364,82 @@ static void printConstant(const Constant *COp, raw_ostream &CS) { } } +void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) { + assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); + assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only"); + const X86RegisterInfo *RI = + MF->getSubtarget().getRegisterInfo(); + + // Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86. + if (EmitFPOData) { + X86TargetStreamer *XTS = + static_cast(OutStreamer->getTargetStreamer()); + switch (MI->getOpcode()) { + case X86::SEH_PushReg: + XTS->emitFPOPushReg(MI->getOperand(0).getImm()); + break; + case X86::SEH_StackAlloc: + XTS->emitFPOStackAlloc(MI->getOperand(0).getImm()); + break; + case X86::SEH_SetFrame: + assert(MI->getOperand(1).getImm() == 0 && + ".cv_fpo_setframe takes no offset"); + XTS->emitFPOSetFrame(MI->getOperand(0).getImm()); + break; + case X86::SEH_EndPrologue: + XTS->emitFPOEndPrologue(); + break; + case X86::SEH_SaveReg: + case X86::SEH_SaveXMM: + case X86::SEH_PushFrame: + llvm_unreachable("SEH_ directive incompatible with FPO"); + break; + default: + llvm_unreachable("expected SEH_ instruction"); + } + return; + } + + // Otherwise, use the .seh_ directives for all other Windows platforms. + switch (MI->getOpcode()) { + case X86::SEH_PushReg: + OutStreamer->EmitWinCFIPushReg( + RI->getSEHRegNum(MI->getOperand(0).getImm())); + break; + + case X86::SEH_SaveReg: + OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + break; + + case X86::SEH_SaveXMM: + OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + break; + + case X86::SEH_StackAlloc: + OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm()); + break; + + case X86::SEH_SetFrame: + OutStreamer->EmitWinCFISetFrame( + RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + break; + + case X86::SEH_PushFrame: + OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm()); + break; + + case X86::SEH_EndPrologue: + OutStreamer->EmitWinCFIEndProlog(); + break; + + default: + llvm_unreachable("expected SEH_ instruction"); + } +} + void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(*MF, *this); const X86RegisterInfo *RI = MF->getSubtarget().getRegisterInfo(); @@ -1540,41 +1617,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { return; case X86::SEH_PushReg: - assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); - OutStreamer->EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm())); - return; - case X86::SEH_SaveReg: - assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); - OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()), - MI->getOperand(1).getImm()); - return; - case X86::SEH_SaveXMM: - assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); - OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()), - MI->getOperand(1).getImm()); - return; - case X86::SEH_StackAlloc: - assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); - OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm()); - return; - case X86::SEH_SetFrame: - assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); - OutStreamer->EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()), - MI->getOperand(1).getImm()); - return; - case X86::SEH_PushFrame: - assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); - OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm()); - return; - case X86::SEH_EndPrologue: - assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?"); - OutStreamer->EmitWinCFIEndProlog(); + EmitSEHInstruction(MI); return; case X86::SEH_Epilogue: { diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp index d3ef7aa8d6c63..0dd13077c37ed 100644 --- a/lib/Target/X86/X86MacroFusion.cpp +++ b/lib/Target/X86/X86MacroFusion.cpp @@ -82,10 +82,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, case X86::TEST32i32: case X86::TEST64i32: case X86::TEST64ri32: - case X86::TEST8rm: - case X86::TEST16rm: - case X86::TEST32rm: - case X86::TEST64rm: + case X86::TEST8mr: + case X86::TEST16mr: + case X86::TEST32mr: + case X86::TEST64mr: case X86::TEST8ri_NOREX: case X86::AND16i16: case X86::AND16ri: diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp index 896f625188919..ad2d3c9453ef5 100644 --- a/lib/Target/X86/X86OptimizeLEAs.cpp +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -1,4 +1,4 @@ -//===-- X86OptimizeLEAs.cpp - optimize usage of LEA instructions ----------===// +//===- X86OptimizeLEAs.cpp - optimize usage of LEA instructions -----------===// // // The LLVM Compiler Infrastructure // @@ -17,22 +17,36 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/X86BaseInfo.h" #include "X86.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include +#include using namespace llvm; @@ -60,6 +74,7 @@ static bool isSimilarDispOp(const MachineOperand &MO1, static inline bool isLEA(const MachineInstr &MI); namespace { + /// A key based on instruction's memory operands. class MemOpKey { public: @@ -92,12 +107,14 @@ class MemOpKey { // Address' displacement operand. const MachineOperand *Disp; }; + } // end anonymous namespace /// Provide DenseMapInfo for MemOpKey. namespace llvm { + template <> struct DenseMapInfo { - typedef DenseMapInfo PtrInfo; + using PtrInfo = DenseMapInfo; static inline MemOpKey getEmptyKey() { return MemOpKey(PtrInfo::getEmptyKey(), PtrInfo::getEmptyKey(), @@ -164,7 +181,8 @@ template <> struct DenseMapInfo { return LHS == RHS; } }; -} + +} // end namespace llvm /// \brief Returns a hash table key based on memory operands of \p MI. The /// number of the first memory operand of \p MI is specified through \p N. @@ -217,6 +235,7 @@ static inline bool isLEA(const MachineInstr &MI) { } namespace { + class OptimizeLEAPass : public MachineFunctionPass { public: OptimizeLEAPass() : MachineFunctionPass(ID) {} @@ -229,7 +248,7 @@ class OptimizeLEAPass : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; private: - typedef DenseMap> MemOpMap; + using MemOpMap = DenseMap>; /// \brief Returns a distance between two instructions inside one basic block. /// Negative result means, that instructions occur in reverse order. @@ -281,8 +300,10 @@ class OptimizeLEAPass : public MachineFunctionPass { static char ID; }; + +} // end anonymous namespace + char OptimizeLEAPass::ID = 0; -} FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); } diff --git a/lib/Target/X86/X86SchedBroadwell.td b/lib/Target/X86/X86SchedBroadwell.td new file mode 100755 index 0000000000000..c70af22d060cc --- /dev/null +++ b/lib/Target/X86/X86SchedBroadwell.td @@ -0,0 +1,4076 @@ +//=- X86SchedBroadwell.td - X86 Broadwell Scheduling ---------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Broadwell to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// +def BroadwellModel : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and HW can decode 4 + // instructions per cycle. + let IssueWidth = 4; + let MicroOpBufferSize = 192; // Based on the reorder buffer. + let LoadLatency = 5; + let MispredictPenalty = 16; + + // Based on the LSD (loop-stream detector) queue size and benchmarking data. + let LoopMicroOpBufferSize = 50; + + // This flag is set to allow the scheduler to assign a default model to + // unrecognized opcodes. + let CompleteModel = 0; +} + +let SchedModel = BroadwellModel in { + +// Broadwell can issue micro-ops to 8 different ports in one cycle. + +// Ports 0, 1, 5, and 6 handle all computation. +// Port 4 gets the data half of stores. Store data can be available later than +// the store address, but since we don't model the latency of stores, we can +// ignore that. +// Ports 2 and 3 are identical. They handle loads and the address half of +// stores. Port 7 can handle address calculations. +def BWPort0 : ProcResource<1>; +def BWPort1 : ProcResource<1>; +def BWPort2 : ProcResource<1>; +def BWPort3 : ProcResource<1>; +def BWPort4 : ProcResource<1>; +def BWPort5 : ProcResource<1>; +def BWPort6 : ProcResource<1>; +def BWPort7 : ProcResource<1>; + +// Many micro-ops are capable of issuing on multiple ports. +def BWPort01 : ProcResGroup<[BWPort0, BWPort1]>; +def BWPort23 : ProcResGroup<[BWPort2, BWPort3]>; +def BWPort237 : ProcResGroup<[BWPort2, BWPort3, BWPort7]>; +def BWPort04 : ProcResGroup<[BWPort0, BWPort4]>; +def BWPort05 : ProcResGroup<[BWPort0, BWPort5]>; +def BWPort06 : ProcResGroup<[BWPort0, BWPort6]>; +def BWPort15 : ProcResGroup<[BWPort1, BWPort5]>; +def BWPort16 : ProcResGroup<[BWPort1, BWPort6]>; +def BWPort56 : ProcResGroup<[BWPort5, BWPort6]>; +def BWPort015 : ProcResGroup<[BWPort0, BWPort1, BWPort5]>; +def BWPort056 : ProcResGroup<[BWPort0, BWPort5, BWPort6]>; +def BWPort0156: ProcResGroup<[BWPort0, BWPort1, BWPort5, BWPort6]>; + +// 60 Entry Unified Scheduler +def BWPortAny : ProcResGroup<[BWPort0, BWPort1, BWPort2, BWPort3, BWPort4, + BWPort5, BWPort6, BWPort7]> { + let BufferSize=60; +} + +// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 +// cycles after the memory operand. +def : ReadAdvance; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when queued in the reservation station. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass BWWriteResPair { + // Register variant is using a single cycle on ExePort. + def : WriteRes { let Latency = Lat; } + + // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the + // latency. + def : WriteRes { + let Latency = !add(Lat, 5); + } +} + +// A folded store needs a cycle on port 4 for the store data, but it does not +// need an extra port 2/3 cycle to recompute the address. +def : WriteRes; + +// Arithmetic. +defm : BWWriteResPair; // Simple integer ALU op. +defm : BWWriteResPair; // Integer multiplication. +def : WriteRes { let Latency = 3; } // Integer multiplication, high part. +def BWDivider : ProcResource<1>; // Integer division issued on port 0. +def : WriteRes { // Integer division. + let Latency = 25; + let ResourceCycles = [1, 10]; +} +def : WriteRes { + let Latency = 29; + let ResourceCycles = [1, 1, 10]; +} + +def : WriteRes; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm : BWWriteResPair; + +// Loads, stores, and moves, not folded with other operations. +def : WriteRes { let Latency = 5; } +def : WriteRes; +def : WriteRes; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def : WriteRes; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm : BWWriteResPair; + +// Floating point. This covers both scalar and vector operations. +defm : BWWriteResPair; // Floating point add/sub/compare. +defm : BWWriteResPair; // Floating point multiplication. +defm : BWWriteResPair; // 10-14 cycles. // Floating point division. +defm : BWWriteResPair; // Floating point square root. +defm : BWWriteResPair; // Floating point reciprocal estimate. +defm : BWWriteResPair; // Floating point reciprocal square root estimate. +// defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm : BWWriteResPair; // Floating point vector shuffles. +defm : BWWriteResPair; // Floating point vector blends. +def : WriteRes { // Fp vector variable blends. + let Latency = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +// FMA Scheduling helper class. +// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } + +// Vector integer operations. +defm : BWWriteResPair; // Vector integer ALU op, no logicals. +defm : BWWriteResPair; // Vector integer shifts. +defm : BWWriteResPair; // Vector integer multiply. +defm : BWWriteResPair; // Vector shuffles. +defm : BWWriteResPair; // Vector blends. + +def : WriteRes { // Vector variable blends. + let Latency = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +def : WriteRes { // Vector MPSAD. + let Latency = 6; + let ResourceCycles = [1, 2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [1, 1, 2]; +} + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm : BWWriteResPair; // Vector and/or/xor. + +// Conversion between integer and float. +defm : BWWriteResPair; // Float -> Integer. +defm : BWWriteResPair; // Integer -> Float. +defm : BWWriteResPair; // Float -> Float size conversion. + +// Strings instructions. +// Packed Compare Implicit Length Strings, Return Mask +// String instructions. +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3, 1]; +} +// Packed Compare Explicit Length Strings, Return Mask +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3, 2, 4]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [6, 2, 1]; +} + // Packed Compare Implicit Length Strings, Return Index +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3]; +} +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3, 1]; +} +// Packed Compare Explicit Length Strings, Return Index +def : WriteRes { + let Latency = 11; + let ResourceCycles = [6, 2]; +} +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3, 2, 2, 1]; +} + +// AES instructions. +def : WriteRes { // Decryption, encryption. + let Latency = 7; + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} +def : WriteRes { // InvMixColumn. + let Latency = 14; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 14; + let ResourceCycles = [2, 1]; +} +def : WriteRes { // Key Generation. + let Latency = 10; + let ResourceCycles = [2, 8]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [2, 7, 1]; +} + +// Carry-less multiplication instructions. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [2, 1]; +} +def : WriteRes { + let Latency = 7; + let ResourceCycles = [2, 1, 1]; +} + +// Catch-all for expensive system instructions. +def : WriteRes { let Latency = 100; } // def WriteSystem : SchedWrite; + +// AVX2. +defm : BWWriteResPair; // Fp 256-bit width vector shuffles. +defm : BWWriteResPair; // 256-bit width vector shuffles. +def : WriteRes { // Variable vector shifts. + let Latency = 2; + let ResourceCycles = [2, 1]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1, 1]; +} + +// Old microcoded instructions that nobody use. +def : WriteRes { let Latency = 100; } // def WriteMicrocoded : SchedWrite; + +// Fence instructions. +def : WriteRes; + +// Nop, not very useful expect it provides a model for nops! +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes; + +// v <- v,m. +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + +// Remaining instrs. + +def BWWriteResGroup1 : SchedWriteRes<[BWPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup1], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_MOVD64grr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PMOVMSKBrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLDrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLQrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSLLWrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRADri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRADrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRAWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRAWrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLDrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLQrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "MMX_PSRLWrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MOVPDI2DIrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "MOVPQIto64rr")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSLLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSLLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSLLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSRADri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSRAWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSRLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSRLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "PSRLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VMOVPDI2DIrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VMOVPQIto64rr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLDYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLQYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLVQYrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLVQrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLWYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRADYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRADri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRAWYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRAWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLDYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLDri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLQYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLQri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLVQYrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLVQrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLWYri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VPSRLWri")>; +def: InstRW<[BWWriteResGroup1], (instregex "VTESTPDYrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VTESTPDrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VTESTPSYrr")>; +def: InstRW<[BWWriteResGroup1], (instregex "VTESTPSrr")>; + +def BWWriteResGroup2 : SchedWriteRes<[BWPort1]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup2], (instregex "COMP_FST0r")>; +def: InstRW<[BWWriteResGroup2], (instregex "COM_FST0r")>; +def: InstRW<[BWWriteResGroup2], (instregex "MMX_MASKMOVQ64")>; +def: InstRW<[BWWriteResGroup2], (instregex "MMX_MASKMOVQ64")>; +def: InstRW<[BWWriteResGroup2], (instregex "UCOM_FPr")>; +def: InstRW<[BWWriteResGroup2], (instregex "UCOM_Fr")>; +def: InstRW<[BWWriteResGroup2], (instregex "VMASKMOVDQU")>; + +def BWWriteResGroup3 : SchedWriteRes<[BWPort5]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup3], (instregex "ANDNPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "ANDNPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "ANDPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "ANDPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "INSERTPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64to64rr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVQ2DQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PALIGNR64irr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PSHUFBrr64")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PSHUFWri")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKHBWirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKHDQirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKHWDirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKLBWirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKLDQirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_PUNPCKLWDirr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOV64toPQIrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVAPDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVAPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVDDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVDI2PDIrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVHLPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVLHPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVSDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVSHDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVSLDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVSSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVUPDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "MOVUPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "ORPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "ORPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PACKSSDWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PACKSSWBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PACKUSDWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PACKUSWBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PALIGNRrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PBLENDWrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXBDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXBQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVSXWQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXBDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXBQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PMOVZXWQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSHUFBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSHUFDri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSHUFHWri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSHUFLWri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSLLDQri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PSRLDQri")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKHBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKHDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKHQDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKHWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKLBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKLDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKLQDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "PUNPCKLWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "SHUFPDrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "SHUFPSrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "UNPCKHPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "UNPCKHPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "UNPCKLPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "UNPCKLPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDNPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDNPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDNPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDNPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VANDPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VBROADCASTSSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VINSERTPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOV64toPQIrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPDYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPSYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVAPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVDDUPYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVDDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVDI2PDIrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVHLPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVLHPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSHDUPYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSHDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSLDUPYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSLDUPrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVSSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPDYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPDrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPSYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VMOVUPSrr(_REV?)")>; +def: InstRW<[BWWriteResGroup3], (instregex "VORPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VORPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VORPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VORPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKSSDWYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKSSDWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKSSWBYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKSSWBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKUSDWYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKUSDWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKUSWBYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPACKUSWBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPALIGNRYrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPALIGNRrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPBLENDWYrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPBLENDWrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPBROADCASTDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPBROADCASTQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPDYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPDri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPSYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPSri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPERMILPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXBDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXBQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVSXWQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXBDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXBQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPMOVZXWQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFBYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFBrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFDYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFDri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFHWYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFHWri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFLWYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSHUFLWri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSLLDQYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSLLDQri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSRLDQYri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPSRLDQri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHBWYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHDQYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHQDQYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHQDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHWDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKHWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLBWYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLBWrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLDQYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLQDQYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLQDQrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLWDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VPUNPCKLWDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VSHUFPDYrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VSHUFPDrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VSHUFPSYrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VSHUFPSrri")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKHPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKHPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKHPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKHPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKLPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKLPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKLPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VUNPCKLPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VXORPDYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VXORPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VXORPSYrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "VXORPSrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "XORPDrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "XORPSrr")>; + +def BWWriteResGroup4 : SchedWriteRes<[BWPort6]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup4], (instregex "JMP(16|32|64)r")>; + +def BWWriteResGroup5 : SchedWriteRes<[BWPort01]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup5], (instregex "FINCSTP")>; +def: InstRW<[BWWriteResGroup5], (instregex "FNOP")>; + +def BWWriteResGroup6 : SchedWriteRes<[BWPort06]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup6], (instregex "ADC(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADC(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADC8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADCX32rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADCX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADOX32rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "ADOX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTC(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTC(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTR(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTR(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTS(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "BTS(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CDQ")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVAE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVB(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVG(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVGE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVL(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVLE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVNE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVNO(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVNP(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVNS(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVO(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVP(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CMOVS(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "CQO")>; +def: InstRW<[BWWriteResGroup6], (instregex "JAE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JAE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JA_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JA_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JBE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JBE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JB_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JB_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JGE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JGE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JG_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JG_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JLE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JLE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JL_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JL_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JMP_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JMP_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNE_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNE_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNO_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNO_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNP_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNP_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNS_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JNS_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JO_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JO_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JP_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JP_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "JS_1")>; +def: InstRW<[BWWriteResGroup6], (instregex "JS_4")>; +def: InstRW<[BWWriteResGroup6], (instregex "RORX32ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "RORX64ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SAR(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SAR(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SAR8r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SAR8ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SARX32rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SARX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SBB(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup6], (instregex "SBB(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup6], (instregex "SBB8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETAEr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETBr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETEr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETGEr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETGr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETLEr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETLr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETNEr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETNOr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETNPr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETNSr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETOr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETPr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SETSr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHL(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHL(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHL8r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHL8ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHLX32rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHLX64rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHR(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHR(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHR8r1")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHR8ri")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHRX32rr")>; +def: InstRW<[BWWriteResGroup6], (instregex "SHRX64rr")>; + +def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup7], (instregex "ANDN32rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "ANDN64rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSI32rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSI64rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSMSK32rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSMSK64rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSR32rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BLSR64rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BZHI32rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "BZHI64rr")>; +def: InstRW<[BWWriteResGroup7], (instregex "LEA(16|32|64)r")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PABSBrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PABSDrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PABSWrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDDirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDQirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDSBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDUSBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDUSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PADDWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PAVGBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PAVGWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPEQBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPEQDirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPEQWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPGTBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPGTDirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PCMPGTWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PMAXSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PMAXUBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PMINSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PMINUBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSIGNBrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSIGNDrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSIGNWrr64")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBDirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBQirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBSBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBUSBirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBUSWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "MMX_PSUBWirr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PABSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PABSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PABSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDUSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDUSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PADDWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PAVGBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PAVGWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPEQBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPEQDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPEQQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPEQWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPGTBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPGTDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PCMPGTWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXUBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXUDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMAXUWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINUBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINUDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PMINUWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSIGNBrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSIGNDrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSIGNWrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBUSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBUSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "PSUBWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPABSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDQYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDUSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDUSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDUSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDUSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPADDWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPAVGBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPAVGBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPAVGWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPAVGWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQQYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPEQWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPCMPGTWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMAXUWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPMINUWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNBYrr256")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNBrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNDYrr256")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNDrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNWYrr256")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSIGNWrr128")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBDYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBDrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBQYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBQrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBUSBYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBUSBrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBUSWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBUSWrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBWYrr")>; +def: InstRW<[BWWriteResGroup7], (instregex "VPSUBWrr")>; + +def BWWriteResGroup8 : SchedWriteRes<[BWPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup8], (instregex "BLENDPDrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "BLENDPSrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVQ64rr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_PANDNirr")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_PANDirr")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_PORirr")>; +def: InstRW<[BWWriteResGroup8], (instregex "MMX_PXORirr")>; +def: InstRW<[BWWriteResGroup8], (instregex "MOVDQArr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "MOVDQUrr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "MOVPQI2QIrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "PANDNrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "PANDrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "PORrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "PXORrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPDYrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPDrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPSYrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VBLENDPSrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQAYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQArr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQUYrr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVDQUrr(_REV?)")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVPQI2QIrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VMOVZPQILo2PQIrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPANDNYrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPANDNrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPANDYrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPANDrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPBLENDDYrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPBLENDDrri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPORYrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPORrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPXORYrr")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPXORrr")>; + +def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup9], (instregex "ADD(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "ADD8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "AND8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "CBW")>; +def: InstRW<[BWWriteResGroup9], (instregex "CLC")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMC")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "CMP8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "CWDE")>; +def: InstRW<[BWWriteResGroup9], (instregex "DEC(16|32|64)r")>; +def: InstRW<[BWWriteResGroup9], (instregex "DEC8r")>; +def: InstRW<[BWWriteResGroup9], (instregex "INC(16|32|64)r")>; +def: InstRW<[BWWriteResGroup9], (instregex "INC8r")>; +def: InstRW<[BWWriteResGroup9], (instregex "LAHF")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV8ri_alt")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOV8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOVSX(16|32|64)rr16")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOVSX(16|32|64)rr32")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOVSX(16|32|64)rr8")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOVZX(16|32|64)rr16")>; +def: InstRW<[BWWriteResGroup9], (instregex "MOVZX(16|32|64)rr8")>; +def: InstRW<[BWWriteResGroup9], (instregex "NEG(16|32|64)r")>; +def: InstRW<[BWWriteResGroup9], (instregex "NEG8r")>; +def: InstRW<[BWWriteResGroup9], (instregex "NOOP")>; +def: InstRW<[BWWriteResGroup9], (instregex "NOT(16|32|64)r")>; +def: InstRW<[BWWriteResGroup9], (instregex "NOT8r")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "OR8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "SAHF")>; +def: InstRW<[BWWriteResGroup9], (instregex "SGDT64m")>; +def: InstRW<[BWWriteResGroup9], (instregex "SIDT64m")>; +def: InstRW<[BWWriteResGroup9], (instregex "SLDT64m")>; +def: InstRW<[BWWriteResGroup9], (instregex "SMSW16m")>; +def: InstRW<[BWWriteResGroup9], (instregex "STC")>; +def: InstRW<[BWWriteResGroup9], (instregex "STRm")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "SUB8rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "SYSCALL")>; +def: InstRW<[BWWriteResGroup9], (instregex "TEST(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup9], (instregex "TEST8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "TEST8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "TEST8rr")>; +def: InstRW<[BWWriteResGroup9], (instregex "XCHG(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR(16|32|64)ri8")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR(16|32|64)rr(_REV?)")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR8i8")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR8ri")>; +def: InstRW<[BWWriteResGroup9], (instregex "XOR8rr(_REV?)")>; + +def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> { + let Latency = 1; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm")>; +def: InstRW<[BWWriteResGroup10], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[BWWriteResGroup10], (instregex "MMX_MOVD64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MMX_MOVNTQmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MMX_MOVQ64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOV(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOV8mi")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOV8mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVAPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVAPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVDQAmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVDQUmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVHPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVHPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVLPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVLPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVNTDQmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVNTI_64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVNTImr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVNTPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVNTPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVPDI2DImr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVPQI2QImr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVPQIto64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVSSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVUPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "MOVUPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "ST_FP32m")>; +def: InstRW<[BWWriteResGroup10], (instregex "ST_FP64m")>; +def: InstRW<[BWWriteResGroup10], (instregex "ST_FP80m")>; +def: InstRW<[BWWriteResGroup10], (instregex "VEXTRACTF128mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VEXTRACTI128mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVAPDYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVAPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVAPSYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVAPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVDQAYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVDQAmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVDQUYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVDQUmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVHPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVHPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVLPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVLPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTDQYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTDQmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTPDYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTPSYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVNTPSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVPDI2DImr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVPQI2QImr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVPQIto64mr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVSDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVSSmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVUPDYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVUPDmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVUPSYmr")>; +def: InstRW<[BWWriteResGroup10], (instregex "VMOVUPSmr")>; + +def BWWriteResGroup11 : SchedWriteRes<[BWPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup11], (instregex "BLENDVPDrr0")>; +def: InstRW<[BWWriteResGroup11], (instregex "BLENDVPSrr0")>; +def: InstRW<[BWWriteResGroup11], (instregex "MMX_PINSRWirri")>; +def: InstRW<[BWWriteResGroup11], (instregex "PBLENDVBrr0")>; +def: InstRW<[BWWriteResGroup11], (instregex "PINSRBrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "PINSRDrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "PINSRQrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "PINSRWrri")>; +def: InstRW<[BWWriteResGroup11], (instregex "VBLENDVPDYrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VBLENDVPDrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VBLENDVPSYrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VBLENDVPSrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPBLENDVBYrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPBLENDVBrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPINSRBrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPINSRDrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPINSRQrr")>; +def: InstRW<[BWWriteResGroup11], (instregex "VPINSRWrri")>; + +def BWWriteResGroup12 : SchedWriteRes<[BWPort01]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup12], (instregex "FDECSTP")>; + +def BWWriteResGroup13 : SchedWriteRes<[BWPort06]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup13], (instregex "ROL(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROL(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROL8r1")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROL8ri")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROR(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROR(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROR8r1")>; +def: InstRW<[BWWriteResGroup13], (instregex "ROR8ri")>; + +def BWWriteResGroup14 : SchedWriteRes<[BWPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup14], (instregex "LFENCE")>; +def: InstRW<[BWWriteResGroup14], (instregex "MFENCE")>; +def: InstRW<[BWWriteResGroup14], (instregex "WAIT")>; +def: InstRW<[BWWriteResGroup14], (instregex "XGETBV")>; + +def BWWriteResGroup15 : SchedWriteRes<[BWPort0,BWPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup15], (instregex "CVTPS2PDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "CVTSS2SDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "EXTRACTPSrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "MMX_PEXTRWirri")>; +def: InstRW<[BWWriteResGroup15], (instregex "PEXTRBrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PEXTRDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PEXTRQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PEXTRWri")>; +def: InstRW<[BWWriteResGroup15], (instregex "PEXTRWrr_REV")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSLLDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSLLQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSLLWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSRADrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSRAWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSRLDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSRLQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PSRLWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "PTESTrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PSYrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VCVTPH2PSrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VCVTPS2PDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VCVTSS2SDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VEXTRACTPSrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPEXTRBrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPEXTRDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPEXTRQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPEXTRWri")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPEXTRWrr_REV")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSLLDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSLLQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSLLWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSRADrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSRAWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSRLDrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSRLQrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPSRLWrr")>; +def: InstRW<[BWWriteResGroup15], (instregex "VPTESTrr")>; + +def BWWriteResGroup16 : SchedWriteRes<[BWPort6,BWPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup16], (instregex "CLFLUSH")>; + +def BWWriteResGroup17 : SchedWriteRes<[BWPort01,BWPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup17], (instregex "MMX_MOVDQ2Qrr")>; + +def BWWriteResGroup18 : SchedWriteRes<[BWPort237,BWPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup18], (instregex "SFENCE")>; + +def BWWriteResGroup19 : SchedWriteRes<[BWPort06,BWPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup19], (instregex "BEXTR32rr")>; +def: InstRW<[BWWriteResGroup19], (instregex "BEXTR64rr")>; +def: InstRW<[BWWriteResGroup19], (instregex "BSWAP(16|32|64)r")>; + +def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup20], (instregex "ADC8i8")>; +def: InstRW<[BWWriteResGroup20], (instregex "ADC8ri")>; +def: InstRW<[BWWriteResGroup20], (instregex "CMOVA(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup20], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup20], (instregex "CWD")>; +def: InstRW<[BWWriteResGroup20], (instregex "JRCXZ")>; +def: InstRW<[BWWriteResGroup20], (instregex "SBB8i8")>; +def: InstRW<[BWWriteResGroup20], (instregex "SBB8ri")>; +def: InstRW<[BWWriteResGroup20], (instregex "SETAr")>; +def: InstRW<[BWWriteResGroup20], (instregex "SETBEr")>; + +def BWWriteResGroup21 : SchedWriteRes<[BWPort4,BWPort5,BWPort237]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup21], (instregex "EXTRACTPSmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "PEXTRBmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "PEXTRDmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "PEXTRQmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "PEXTRWmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "STMXCSR")>; +def: InstRW<[BWWriteResGroup21], (instregex "VEXTRACTPSmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "VPEXTRBmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "VPEXTRDmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "VPEXTRQmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "VPEXTRWmr")>; +def: InstRW<[BWWriteResGroup21], (instregex "VSTMXCSR")>; + +def BWWriteResGroup22 : SchedWriteRes<[BWPort4,BWPort6,BWPort237]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup22], (instregex "FNSTCW16m")>; + +def BWWriteResGroup23 : SchedWriteRes<[BWPort4,BWPort237,BWPort06]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup23], (instregex "SETAEm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETBm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETEm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETGEm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETGm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETLEm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETLm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETNEm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETNOm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETNPm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETNSm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETOm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETPm")>; +def: InstRW<[BWWriteResGroup23], (instregex "SETSm")>; + +def BWWriteResGroup24 : SchedWriteRes<[BWPort4,BWPort237,BWPort15]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup24], (instregex "MOVBE(16|32|64)mr")>; + +def BWWriteResGroup25 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)r")>; +def: InstRW<[BWWriteResGroup25], (instregex "PUSH(16|32|64)rmr")>; +def: InstRW<[BWWriteResGroup25], (instregex "PUSH64i8")>; +def: InstRW<[BWWriteResGroup25], (instregex "STOSB")>; +def: InstRW<[BWWriteResGroup25], (instregex "STOSL")>; +def: InstRW<[BWWriteResGroup25], (instregex "STOSQ")>; +def: InstRW<[BWWriteResGroup25], (instregex "STOSW")>; + +def BWWriteResGroup26 : SchedWriteRes<[BWPort0]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup26], (instregex "MOVMSKPDrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "MOVMSKPSrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "PMOVMSKBrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VMOVMSKPDYrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VMOVMSKPDrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VMOVMSKPSYrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VMOVMSKPSrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VPMOVMSKBYrr")>; +def: InstRW<[BWWriteResGroup26], (instregex "VPMOVMSKBrr")>; + +def BWWriteResGroup27 : SchedWriteRes<[BWPort1]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup27], (instregex "ADDPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADDPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADDSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADDSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADDSUBPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADDSUBPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADD_FPrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADD_FST0r")>; +def: InstRW<[BWWriteResGroup27], (instregex "ADD_FrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "BSF(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "BSR(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "CMPPDrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "CMPPSrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "CMPSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "COMISDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "COMISSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "CVTDQ2PSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "CVTPS2DQrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "CVTTPS2DQrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "IMUL(32|64)rr(i8?)")>; +def: InstRW<[BWWriteResGroup27], (instregex "IMUL8r")>; +def: InstRW<[BWWriteResGroup27], (instregex "LZCNT(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAXPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAXPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAXSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MAXSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MINPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MINPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MINSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MINSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MMX_CVTPI2PSirr")>; +def: InstRW<[BWWriteResGroup27], (instregex "MUL8r")>; +def: InstRW<[BWWriteResGroup27], (instregex "PDEP32rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "PDEP64rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "PEXT32rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "PEXT64rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "POPCNT(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "SHLD(16|32|64)rri8")>; +def: InstRW<[BWWriteResGroup27], (instregex "SHRD(16|32|64)rri8")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBR_FPrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBR_FST0r")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBR_FrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUBSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUB_FPrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUB_FST0r")>; +def: InstRW<[BWWriteResGroup27], (instregex "SUB_FrST0")>; +def: InstRW<[BWWriteResGroup27], (instregex "TZCNT(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup27], (instregex "UCOMISDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "UCOMISSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDPDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDPSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSUBPDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSUBPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSUBPSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VADDSUBPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPPDYrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPPDrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPPSYrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPPSrri")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCMPSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCOMISDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCOMISSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTDQ2PSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTDQ2PSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTPS2DQYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTPS2DQrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTTPS2DQYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VCVTTPS2DQrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXPDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXPSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMAXSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINPDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINPSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VMINSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBPDYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBPDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBPSYrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBPSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBSDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VSUBSSrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VUCOMISDrr")>; +def: InstRW<[BWWriteResGroup27], (instregex "VUCOMISSrr")>; + +def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup27_16], (instregex "IMUL16rr(i8?)")>; + +def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup28], (instregex "VBROADCASTSDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VBROADCASTSSYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VEXTRACTF128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VEXTRACTI128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VINSERTF128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VINSERTI128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTWYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTWrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERM2F128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERM2I128rr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERMDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERMPDYri")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERMPSYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPERMQYri")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXBDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXBQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXBWYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXDQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXWDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVSXWQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXBDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXBQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXBWYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXDQYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXWDYrr")>; +def: InstRW<[BWWriteResGroup28], (instregex "VPMOVZXWQYrr")>; + +def BWWriteResGroup29 : SchedWriteRes<[BWPort01]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup29], (instregex "MULPDrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "MULPSrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "MULSDrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "MULSSrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULPDYrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULPDrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULPSYrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULPSrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULSDrr")>; +def: InstRW<[BWWriteResGroup29], (instregex "VMULSSrr")>; + +def BWWriteResGroup30 : SchedWriteRes<[BWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[BWWriteResGroup30], (instregex "XADD(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup30], (instregex "XADD8rr")>; +def: InstRW<[BWWriteResGroup30], (instregex "XCHG8rr")>; + +def BWWriteResGroup31 : SchedWriteRes<[BWPort0,BWPort5]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup31], (instregex "VPSLLVDYrr")>; +def: InstRW<[BWWriteResGroup31], (instregex "VPSLLVDrr")>; +def: InstRW<[BWWriteResGroup31], (instregex "VPSRAVDYrr")>; +def: InstRW<[BWWriteResGroup31], (instregex "VPSRAVDrr")>; +def: InstRW<[BWWriteResGroup31], (instregex "VPSRLVDYrr")>; +def: InstRW<[BWWriteResGroup31], (instregex "VPSRLVDrr")>; + +def BWWriteResGroup32 : SchedWriteRes<[BWPort5,BWPort15]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHADDSWrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHADDWrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHADDrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHSUBDrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHSUBSWrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "MMX_PHSUBWrr64")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHADDDrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHADDSWrr128")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHADDWrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHSUBDrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHSUBSWrr128")>; +def: InstRW<[BWWriteResGroup32], (instregex "PHSUBWrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDDYrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDDrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDSWrr128")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDSWrr256")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDWYrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHADDWrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBDYrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBDrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBSWrr128")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBSWrr256")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBWYrr")>; +def: InstRW<[BWWriteResGroup32], (instregex "VPHSUBWrr")>; + +def BWWriteResGroup33 : SchedWriteRes<[BWPort5,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup33], (instregex "MMX_PACKSSDWirr")>; +def: InstRW<[BWWriteResGroup33], (instregex "MMX_PACKSSWBirr")>; +def: InstRW<[BWWriteResGroup33], (instregex "MMX_PACKUSWBirr")>; + +def BWWriteResGroup34 : SchedWriteRes<[BWPort6,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[BWWriteResGroup34], (instregex "CLD")>; + +def BWWriteResGroup35 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[BWWriteResGroup35], (instregex "RCL(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCL(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCL8r1")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCL8ri")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCR(16|32|64)r1")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCR(16|32|64)ri")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCR8r1")>; +def: InstRW<[BWWriteResGroup35], (instregex "RCR8ri")>; + +def BWWriteResGroup36 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup36], (instregex "ROL(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "ROL8rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "ROR(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "ROR8rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SAR(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SAR8rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SHL(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SHL8rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SHR(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup36], (instregex "SHR8rCL")>; + +def BWWriteResGroup37 : SchedWriteRes<[BWPort4,BWPort6,BWPort237,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup37], (instregex "CALL(16|32|64)r")>; + +def BWWriteResGroup38 : SchedWriteRes<[BWPort4,BWPort237,BWPort06,BWPort0156]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup38], (instregex "CALL64pcrel32")>; +def: InstRW<[BWWriteResGroup38], (instregex "SETAm")>; +def: InstRW<[BWWriteResGroup38], (instregex "SETBEm")>; + +def BWWriteResGroup39 : SchedWriteRes<[BWPort0,BWPort1]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup39], (instregex "CVTSD2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTSD2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTSS2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTSS2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTTSD2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTTSD2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTTSS2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "CVTTSS2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTSD2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTSS2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTSS2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTTSD2SIrr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTTSS2SI64rr")>; +def: InstRW<[BWWriteResGroup39], (instregex "VCVTTSS2SIrr")>; + +def BWWriteResGroup40 : SchedWriteRes<[BWPort0,BWPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup40], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSLLDYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSLLQYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSLLWYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSRADYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSRAWYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSRLDYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSRLQYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPSRLWYrr")>; +def: InstRW<[BWWriteResGroup40], (instregex "VPTESTYrr")>; + +def BWWriteResGroup41 : SchedWriteRes<[BWPort0,BWPort0156]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup41], (instregex "FNSTSW16r")>; + +def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup42], (instregex "CVTDQ2PDrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTPD2DQrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTPD2PSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTSD2SSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTSI2SD64rr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTSI2SDrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTSI2SSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "CVTTPD2DQrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "IMUL(32|64)r")>; +def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPD2PIirr")>; +def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPI2PDirr")>; +def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPS2PIirr")>; +def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTTPD2PIirr")>; +def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTTPS2PIirr")>; +def: InstRW<[BWWriteResGroup42], (instregex "MUL(32|64)r")>; +def: InstRW<[BWWriteResGroup42], (instregex "MULX64rr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTDQ2PDrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTPD2DQrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTPD2PSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTPS2PHrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTSD2SSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTSI2SDrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTSI2SSrr")>; +def: InstRW<[BWWriteResGroup42], (instregex "VCVTTPD2DQrr")>; + +def BWWriteResGroup42_16 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { + let Latency = 4; + let NumMicroOps = 4; +} +def: InstRW<[BWWriteResGroup42_16], (instregex "IMUL16r")>; +def: InstRW<[BWWriteResGroup42_16], (instregex "MUL16r")>; + +def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup43], (instregex "FNSTSWm")>; + +def BWWriteResGroup44 : SchedWriteRes<[BWPort1,BWPort4,BWPort237]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup44], (instregex "ISTT_FP16m")>; +def: InstRW<[BWWriteResGroup44], (instregex "ISTT_FP32m")>; +def: InstRW<[BWWriteResGroup44], (instregex "ISTT_FP64m")>; +def: InstRW<[BWWriteResGroup44], (instregex "IST_F16m")>; +def: InstRW<[BWWriteResGroup44], (instregex "IST_F32m")>; +def: InstRW<[BWWriteResGroup44], (instregex "IST_FP16m")>; +def: InstRW<[BWWriteResGroup44], (instregex "IST_FP32m")>; +def: InstRW<[BWWriteResGroup44], (instregex "IST_FP64m")>; +def: InstRW<[BWWriteResGroup44], (instregex "VCVTPS2PHYmr")>; +def: InstRW<[BWWriteResGroup44], (instregex "VCVTPS2PHmr")>; + +def BWWriteResGroup45 : SchedWriteRes<[BWPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [4]; +} +def: InstRW<[BWWriteResGroup45], (instregex "FNCLEX")>; + +def BWWriteResGroup46 : SchedWriteRes<[BWPort015,BWPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[BWWriteResGroup46], (instregex "VZEROUPPER")>; + +def BWWriteResGroup47 : SchedWriteRes<[BWPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMADDUBSWrr64")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMADDWDirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMULHRSWrr64")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMULHUWirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMULHWirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMULLWirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PMULUDQirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MMX_PSADBWirr")>; +def: InstRW<[BWWriteResGroup47], (instregex "MUL_FPrST0")>; +def: InstRW<[BWWriteResGroup47], (instregex "MUL_FST0r")>; +def: InstRW<[BWWriteResGroup47], (instregex "MUL_FrST0")>; +def: InstRW<[BWWriteResGroup47], (instregex "PCLMULQDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PCMPGTQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PHMINPOSUWrr128")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMADDUBSWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMADDWDrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULHRSWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULHUWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULHWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULLWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PMULUDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "PSADBWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "RCPPSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "RCPSSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "RSQRTPSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "RSQRTSSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPCLMULQDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPCMPGTQYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPCMPGTQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPHMINPOSUWrr128")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMADDUBSWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMADDUBSWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMADDWDYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMADDWDrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULDQYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHRSWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHRSWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHUWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHUWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULHWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULLWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULLWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULUDQYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPMULUDQrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPSADBWYrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VPSADBWrr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VRCPPSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VRCPSSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VRSQRTPSr")>; +def: InstRW<[BWWriteResGroup47], (instregex "VRSQRTSSr")>; + +def BWWriteResGroup48 : SchedWriteRes<[BWPort01]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD132SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD213SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADD231SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMADDSUB231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB132SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB213SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUB231SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFMSUBADD231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD132SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD213SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMADD231SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB132SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB213SSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PDYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PSYr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231PSr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231SDr")>; +def: InstRW<[BWWriteResGroup48], (instregex "VFNMSUB231SSr")>; + +def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup49], (instregex "LDDQUrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MMX_MOVD64rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MMX_MOVD64to64rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MMX_MOVQ64rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOV(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOV64toPQIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOV8rm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVAPDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVAPSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVDDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVDI2PDIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVDQArm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVDQUrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVNTDQArm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSHDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSLDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm16")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm32")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVSX(16|32|64)rm8")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVUPDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVUPSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVZX(16|32|64)rm16")>; +def: InstRW<[BWWriteResGroup49], (instregex "MOVZX(16|32|64)rm8")>; +def: InstRW<[BWWriteResGroup49], (instregex "PREFETCHNTA")>; +def: InstRW<[BWWriteResGroup49], (instregex "PREFETCHT0")>; +def: InstRW<[BWWriteResGroup49], (instregex "PREFETCHT1")>; +def: InstRW<[BWWriteResGroup49], (instregex "PREFETCHT2")>; +def: InstRW<[BWWriteResGroup49], (instregex "VBROADCASTSSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VLDDQUrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOV64toPQIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVAPDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVAPSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVDDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVDQArm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVDQUrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVNTDQArm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVSDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVSHDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVSLDUPrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVSSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVUPDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VMOVUPSrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VPBROADCASTDrm")>; +def: InstRW<[BWWriteResGroup49], (instregex "VPBROADCASTQrm")>; + +def BWWriteResGroup50 : SchedWriteRes<[BWPort1,BWPort5]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[BWWriteResGroup50], (instregex "CVTSI2SS64rr")>; +def: InstRW<[BWWriteResGroup50], (instregex "HADDPDrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "HADDPSrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "HSUBPDrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "HSUBPSrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHADDPDYrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHADDPDrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHADDPSYrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHADDPSrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHSUBPDYrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHSUBPDrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHSUBPSYrr")>; +def: InstRW<[BWWriteResGroup50], (instregex "VHSUBPSrr")>; + +def BWWriteResGroup51 : SchedWriteRes<[BWPort1,BWPort6,BWPort06]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup51], (instregex "STR(16|32|64)r")>; + +def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup52], (instregex "MULX32rr")>; + +def BWWriteResGroup53 : SchedWriteRes<[BWPort0,BWPort4,BWPort237,BWPort15]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPDYmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPDmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPSYmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPSmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VPMASKMOVDYmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VPMASKMOVDmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VPMASKMOVQYmr")>; +def: InstRW<[BWWriteResGroup53], (instregex "VPMASKMOVQmr")>; + +def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,4]; +} +def: InstRW<[BWWriteResGroup54], (instregex "PAUSE")>; + +def BWWriteResGroup55 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,4]; +} +def: InstRW<[BWWriteResGroup55], (instregex "XSETBV")>; + +def BWWriteResGroup56 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [2,3]; +} +def: InstRW<[BWWriteResGroup56], (instregex "CMPXCHG(16|32|64)rr")>; +def: InstRW<[BWWriteResGroup56], (instregex "CMPXCHG8rr")>; + +def BWWriteResGroup57 : SchedWriteRes<[BWPort4,BWPort237,BWPort0156]> { + let Latency = 5; + let NumMicroOps = 6; + let ResourceCycles = [1,1,4]; +} +def: InstRW<[BWWriteResGroup57], (instregex "PUSHF16")>; +def: InstRW<[BWWriteResGroup57], (instregex "PUSHF64")>; + +def BWWriteResGroup58 : SchedWriteRes<[BWPort23]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup58], (instregex "LD_F32m")>; +def: InstRW<[BWWriteResGroup58], (instregex "LD_F64m")>; +def: InstRW<[BWWriteResGroup58], (instregex "LD_F80m")>; +def: InstRW<[BWWriteResGroup58], (instregex "VBROADCASTF128")>; +def: InstRW<[BWWriteResGroup58], (instregex "VBROADCASTI128")>; +def: InstRW<[BWWriteResGroup58], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VBROADCASTSSYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VLDDQUYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVAPDYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVAPSYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVDDUPYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVDQAYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVDQUYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVNTDQAYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVUPDYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VMOVUPSYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VPBROADCASTDYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "VPBROADCASTQYrm")>; +def: InstRW<[BWWriteResGroup58], (instregex "ROUNDPDr")>; +def: InstRW<[BWWriteResGroup58], (instregex "ROUNDPSr")>; +def: InstRW<[BWWriteResGroup58], (instregex "ROUNDSDr")>; +def: InstRW<[BWWriteResGroup58], (instregex "ROUNDSSr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDPDr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDPSr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDSDr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDSSr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDYPDr")>; +def: InstRW<[BWWriteResGroup58], (instregex "VROUNDYPSr")>; + +def BWWriteResGroup59 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup59], (instregex "CVTPS2PDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "CVTSS2SDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSLLDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSLLQrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSLLWrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSRADrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSRAWrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSRLDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSRLQrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "MMX_PSRLWrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VCVTPH2PSYrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VCVTPH2PSrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VCVTPS2PDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VCVTSS2SDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VPSLLVQrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VPSRLVQrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VTESTPDrm")>; +def: InstRW<[BWWriteResGroup59], (instregex "VTESTPSrm")>; + +def BWWriteResGroup60 : SchedWriteRes<[BWPort1,BWPort5]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup60], (instregex "VCVTDQ2PDYrr")>; +def: InstRW<[BWWriteResGroup60], (instregex "VCVTPD2DQYrr")>; +def: InstRW<[BWWriteResGroup60], (instregex "VCVTPD2PSYrr")>; +def: InstRW<[BWWriteResGroup60], (instregex "VCVTPS2PHYrr")>; +def: InstRW<[BWWriteResGroup60], (instregex "VCVTTPD2DQYrr")>; + +def BWWriteResGroup61 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup61], (instregex "ANDNPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "ANDNPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "ANDPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "ANDPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "INSERTPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PINSRWirmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PSHUFWmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKHBWirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKHDQirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKHWDirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKLBWirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKLDQirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MMX_PUNPCKLWDirm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MOVHPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MOVHPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MOVLPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "MOVLPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "ORPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "ORPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PACKSSDWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PACKSSWBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PACKUSDWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PACKUSWBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PALIGNRrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PBLENDWrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PINSRBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PINSRDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PINSRQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PINSRWrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXBDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXBQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVSXWQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXBDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXBQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PMOVZXWQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PSHUFBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PSHUFDmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PSHUFHWmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PSHUFLWmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKHBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKHDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKHQDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKHWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKLBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKLDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKLQDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "PUNPCKLWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "SHUFPDrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "SHUFPSrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "UNPCKHPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "UNPCKHPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "UNPCKLPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "UNPCKLPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VANDNPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VANDNPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VANDPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VANDPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VINSERTPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VMOVHPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VMOVHPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VMOVLPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VMOVLPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VORPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VORPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPACKSSDWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPACKSSWBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPACKUSDWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPACKUSWBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPALIGNRrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPBLENDWrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPERMILPDmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPERMILPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPERMILPSmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPERMILPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPINSRBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPINSRDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPINSRQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPINSRWrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXBDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXBQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVSXWQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXBDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXBQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPMOVZXWQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPSHUFBrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPSHUFDmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPSHUFHWmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPSHUFLWmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKHBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKHDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKHWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKLDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKLQDQrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VPUNPCKLWDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VSHUFPDrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VSHUFPSrmi")>; +def: InstRW<[BWWriteResGroup61], (instregex "VUNPCKHPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VUNPCKHPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VUNPCKLPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VUNPCKLPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VXORPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "VXORPSrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "XORPDrm")>; +def: InstRW<[BWWriteResGroup61], (instregex "XORPSrm")>; + +def BWWriteResGroup62 : SchedWriteRes<[BWPort6,BWPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup62], (instregex "FARJMP64")>; +def: InstRW<[BWWriteResGroup62], (instregex "JMP(16|32|64)m")>; + +def BWWriteResGroup63 : SchedWriteRes<[BWPort23,BWPort06]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup63], (instregex "ADC(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADC8rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADCX32rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADCX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADOX32rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "ADOX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "BT(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVAE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVB(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVG(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVGE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVL(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVLE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVNE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVNO(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVNP(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVNS(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVO(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVP(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "CMOVS(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "RORX32mi")>; +def: InstRW<[BWWriteResGroup63], (instregex "RORX64mi")>; +def: InstRW<[BWWriteResGroup63], (instregex "SARX32rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SARX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SBB(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SBB8rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SHLX32rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SHLX64rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SHRX32rm")>; +def: InstRW<[BWWriteResGroup63], (instregex "SHRX64rm")>; + +def BWWriteResGroup64 : SchedWriteRes<[BWPort23,BWPort15]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup64], (instregex "ANDN32rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "ANDN64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSI32rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSI64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSMSK32rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSMSK64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSR32rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BLSR64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BZHI32rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "BZHI64rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PABSBrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PABSDrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PABSWrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDDirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDQirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDSBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDUSBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDUSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PADDWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PAVGBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PAVGWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPEQBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPEQDirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPEQWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPGTBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPGTDirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PCMPGTWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PMAXSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PMAXUBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PMINSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PMINUBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSIGNBrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSIGNDrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSIGNWrm64")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBDirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBQirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBSBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBUSBirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBUSWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MMX_PSUBWirm")>; +def: InstRW<[BWWriteResGroup64], (instregex "MOVBE(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PABSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PABSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PABSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDUSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDUSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PADDWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PAVGBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PAVGWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPEQBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPEQDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPEQQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPEQWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPGTBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPGTDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PCMPGTWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXUBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXUDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMAXUWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINUBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINUDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PMINUWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSIGNBrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSIGNDrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSIGNWrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBUSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBUSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "PSUBWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPABSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPABSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPABSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDUSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDUSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPADDWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPAVGBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPAVGWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPEQBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPEQDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPEQQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPEQWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPGTBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPGTDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPCMPGTWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXUBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXUDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMAXUWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINSDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINUBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINUDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPMINUWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSIGNBrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSIGNDrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSIGNWrm128")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBDrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBQrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBUSBrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBUSWrm")>; +def: InstRW<[BWWriteResGroup64], (instregex "VPSUBWrm")>; + +def BWWriteResGroup65 : SchedWriteRes<[BWPort23,BWPort015]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup65], (instregex "BLENDPDrmi")>; +def: InstRW<[BWWriteResGroup65], (instregex "BLENDPSrmi")>; +def: InstRW<[BWWriteResGroup65], (instregex "MMX_PANDNirm")>; +def: InstRW<[BWWriteResGroup65], (instregex "MMX_PANDirm")>; +def: InstRW<[BWWriteResGroup65], (instregex "MMX_PORirm")>; +def: InstRW<[BWWriteResGroup65], (instregex "MMX_PXORirm")>; +def: InstRW<[BWWriteResGroup65], (instregex "PANDNrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "PANDrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "PORrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "PXORrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VBLENDPDrmi")>; +def: InstRW<[BWWriteResGroup65], (instregex "VBLENDPSrmi")>; +def: InstRW<[BWWriteResGroup65], (instregex "VINSERTF128rm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VINSERTI128rm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VPANDNrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VPANDrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VPBLENDDrmi")>; +def: InstRW<[BWWriteResGroup65], (instregex "VPORrm")>; +def: InstRW<[BWWriteResGroup65], (instregex "VPXORrm")>; + +def BWWriteResGroup66 : SchedWriteRes<[BWPort23,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup66], (instregex "ADD(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "ADD8rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "AND(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "AND8rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP8mi")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP8mr")>; +def: InstRW<[BWWriteResGroup66], (instregex "CMP8rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "OR(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "OR8rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)r")>; +def: InstRW<[BWWriteResGroup66], (instregex "POP(16|32|64)rmr")>; +def: InstRW<[BWWriteResGroup66], (instregex "SUB(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "SUB8rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "TEST(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup66], (instregex "TEST8mi")>; +def: InstRW<[BWWriteResGroup66], (instregex "TEST8mr")>; +def: InstRW<[BWWriteResGroup66], (instregex "XOR(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup66], (instregex "XOR8rm")>; + +def BWWriteResGroup67 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[BWWriteResGroup67], (instregex "SHLD(16|32|64)rrCL")>; +def: InstRW<[BWWriteResGroup67], (instregex "SHRD(16|32|64)rrCL")>; + +def BWWriteResGroup68 : SchedWriteRes<[BWPort1,BWPort6,BWPort06,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup68], (instregex "SLDT(16|32|64)r")>; + +def BWWriteResGroup69 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup69], (instregex "BTC(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup69], (instregex "BTR(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup69], (instregex "BTS(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup69], (instregex "SAR(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SAR(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup69], (instregex "SAR8m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SAR8mi")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHL(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHL(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHL8m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHL8mi")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHR(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHR(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHR8m1")>; +def: InstRW<[BWWriteResGroup69], (instregex "SHR8mi")>; + +def BWWriteResGroup70 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup70], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "ADD(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "ADD8mi")>; +def: InstRW<[BWWriteResGroup70], (instregex "ADD8mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "AND(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "AND8mi")>; +def: InstRW<[BWWriteResGroup70], (instregex "AND8mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "DEC(16|32|64)m")>; +def: InstRW<[BWWriteResGroup70], (instregex "DEC8m")>; +def: InstRW<[BWWriteResGroup70], (instregex "INC(16|32|64)m")>; +def: InstRW<[BWWriteResGroup70], (instregex "INC8m")>; +def: InstRW<[BWWriteResGroup70], (instregex "NEG(16|32|64)m")>; +def: InstRW<[BWWriteResGroup70], (instregex "NEG8m")>; +def: InstRW<[BWWriteResGroup70], (instregex "NOT(16|32|64)m")>; +def: InstRW<[BWWriteResGroup70], (instregex "NOT8m")>; +def: InstRW<[BWWriteResGroup70], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "OR(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "OR8mi")>; +def: InstRW<[BWWriteResGroup70], (instregex "OR8mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "POP(16|32|64)rmm")>; +def: InstRW<[BWWriteResGroup70], (instregex "PUSH(16|32|64)rmm")>; +def: InstRW<[BWWriteResGroup70], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "SUB(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "SUB8mi")>; +def: InstRW<[BWWriteResGroup70], (instregex "SUB8mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup70], (instregex "XOR(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup70], (instregex "XOR8mi")>; +def: InstRW<[BWWriteResGroup70], (instregex "XOR8mr")>; + +def BWWriteResGroup71 : SchedWriteRes<[BWPort6,BWPort0156]> { + let Latency = 6; + let NumMicroOps = 6; + let ResourceCycles = [1,5]; +} +def: InstRW<[BWWriteResGroup71], (instregex "STD")>; + +def BWWriteResGroup72 : SchedWriteRes<[BWPort5]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup72], (instregex "AESDECLASTrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "AESDECrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "AESENCLASTrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "AESENCrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "VAESDECLASTrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "VAESDECrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "VAESENCLASTrr")>; +def: InstRW<[BWWriteResGroup72], (instregex "VAESENCrr")>; + +def BWWriteResGroup73 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup73], (instregex "VPSLLDYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSLLQYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSLLVQYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSLLWYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRADYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRAWYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRLDYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRLQYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRLVQYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VPSRLWYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VTESTPDYrm")>; +def: InstRW<[BWWriteResGroup73], (instregex "VTESTPSYrm")>; + +def BWWriteResGroup74 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup74], (instregex "FCOM32m")>; +def: InstRW<[BWWriteResGroup74], (instregex "FCOM64m")>; +def: InstRW<[BWWriteResGroup74], (instregex "FCOMP32m")>; +def: InstRW<[BWWriteResGroup74], (instregex "FCOMP64m")>; + +def BWWriteResGroup75 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup75], (instregex "VANDNPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VANDNPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VANDPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VANDPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VORPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VORPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSDWYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPACKSSWBYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPACKUSDWYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPACKUSWBYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPALIGNRYrmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPBLENDWYrmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPERMILPDYmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPERMILPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPERMILPSYmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPERMILPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPSHUFBYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPSHUFDYmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPSHUFHWYmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPSHUFLWYmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKHBWYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKHDQYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKHQDQYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKHWDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKLBWYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKLDQYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKLQDQYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VPUNPCKLWDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VSHUFPDYrmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VSHUFPSYrmi")>; +def: InstRW<[BWWriteResGroup75], (instregex "VUNPCKHPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VUNPCKHPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VUNPCKLPSYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VXORPDYrm")>; +def: InstRW<[BWWriteResGroup75], (instregex "VXORPSYrm")>; + +def BWWriteResGroup76 : SchedWriteRes<[BWPort23,BWPort15]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup76], (instregex "VPABSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPABSDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPABSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDQYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDUSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDUSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPADDWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPAVGBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPAVGWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPEQBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPEQDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPEQQYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPEQWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPGTBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPGTDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPCMPGTWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXSDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXUBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXUDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMAXUWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINSDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINUBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINUDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPMINUWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSIGNBYrm256")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSIGNDYrm256")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSIGNWYrm256")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBDYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBQYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBUSBYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBUSWYrm")>; +def: InstRW<[BWWriteResGroup76], (instregex "VPSUBWYrm")>; + +def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup77], (instregex "VBLENDPDYrmi")>; +def: InstRW<[BWWriteResGroup77], (instregex "VBLENDPSYrmi")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPANDNYrm")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPANDYrm")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPBLENDDYrmi")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPORYrm")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPXORYrm")>; + +def BWWriteResGroup78 : SchedWriteRes<[BWPort0,BWPort5]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[BWWriteResGroup78], (instregex "MPSADBWrri")>; +def: InstRW<[BWWriteResGroup78], (instregex "VMPSADBWYrri")>; +def: InstRW<[BWWriteResGroup78], (instregex "VMPSADBWrri")>; + +def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup79], (instregex "BLENDVPDrm0")>; +def: InstRW<[BWWriteResGroup79], (instregex "BLENDVPSrm0")>; +def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm")>; +def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSWBirm")>; +def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKUSWBirm")>; +def: InstRW<[BWWriteResGroup79], (instregex "PBLENDVBrm0")>; +def: InstRW<[BWWriteResGroup79], (instregex "VBLENDVPDrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VBLENDVPSrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VMASKMOVPDrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VMASKMOVPSrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VPBLENDVBrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VPMASKMOVDrm")>; +def: InstRW<[BWWriteResGroup79], (instregex "VPMASKMOVQrm")>; + +def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[BWWriteResGroup80], (instregex "LEAVE64")>; +def: InstRW<[BWWriteResGroup80], (instregex "SCASB")>; +def: InstRW<[BWWriteResGroup80], (instregex "SCASL")>; +def: InstRW<[BWWriteResGroup80], (instregex "SCASQ")>; +def: InstRW<[BWWriteResGroup80], (instregex "SCASW")>; + +def BWWriteResGroup81 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup81], (instregex "PSLLDrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSLLQrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSLLWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSRADrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSRAWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSRLDrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSRLQrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PSRLWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "PTESTrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSLLDrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSLLQrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSLLWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSRADrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSRAWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSRLDrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSRLQrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPSRLWrm")>; +def: InstRW<[BWWriteResGroup81], (instregex "VPTESTrm")>; + +def BWWriteResGroup82 : SchedWriteRes<[BWPort0,BWPort01,BWPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup82], (instregex "FLDCW16m")>; + +def BWWriteResGroup83 : SchedWriteRes<[BWPort0,BWPort23,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup83], (instregex "LDMXCSR")>; +def: InstRW<[BWWriteResGroup83], (instregex "VLDMXCSR")>; + +def BWWriteResGroup84 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup84], (instregex "LRETQ")>; +def: InstRW<[BWWriteResGroup84], (instregex "RETQ")>; + +def BWWriteResGroup85 : SchedWriteRes<[BWPort23,BWPort06,BWPort15]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup85], (instregex "BEXTR32rm")>; +def: InstRW<[BWWriteResGroup85], (instregex "BEXTR64rm")>; + +def BWWriteResGroup86 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup86], (instregex "CMOVA(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup86], (instregex "CMOVBE(16|32|64)rm")>; + +def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[BWWriteResGroup87], (instregex "ROL(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROL(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROL8m1")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROL8mi")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROR(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROR(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROR8m1")>; +def: InstRW<[BWWriteResGroup87], (instregex "ROR8mi")>; + +def BWWriteResGroup88 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[BWWriteResGroup88], (instregex "XADD(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup88], (instregex "XADD8rm")>; + +def BWWriteResGroup89 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,1,1]; +} +def: InstRW<[BWWriteResGroup89], (instregex "CALL(16|32|64)m")>; +def: InstRW<[BWWriteResGroup89], (instregex "FARCALL64")>; + +def BWWriteResGroup90 : SchedWriteRes<[BWPort6,BWPort06,BWPort15,BWPort0156]> { + let Latency = 7; + let NumMicroOps = 7; + let ResourceCycles = [2,2,1,2]; +} +def: InstRW<[BWWriteResGroup90], (instregex "LOOP")>; + +def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup91], (instregex "ADDPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "ADDPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "ADDSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "ADDSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "ADDSUBPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "ADDSUBPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "BSF(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "BSR(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "CMPPDrmi")>; +def: InstRW<[BWWriteResGroup91], (instregex "CMPPSrmi")>; +def: InstRW<[BWWriteResGroup91], (instregex "CMPSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "COMISDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "COMISSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "CVTDQ2PSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "CVTPS2DQrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "CVTTPS2DQrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "IMUL64m")>; +def: InstRW<[BWWriteResGroup91], (instregex "IMUL(32|64)rm(i8?)")>; +def: InstRW<[BWWriteResGroup91], (instregex "IMUL8m")>; +def: InstRW<[BWWriteResGroup91], (instregex "LZCNT(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAXPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAXPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAXSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MAXSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MINPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MINPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MINSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MINSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[BWWriteResGroup91], (instregex "MUL64m")>; +def: InstRW<[BWWriteResGroup91], (instregex "MUL8m")>; +def: InstRW<[BWWriteResGroup91], (instregex "PDEP32rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "PDEP64rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "PEXT32rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "PEXT64rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "POPCNT(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "SUBPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "SUBPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "SUBSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "SUBSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "TZCNT(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup91], (instregex "UCOMISDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "UCOMISSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDSUBPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VADDSUBPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCMPPDrmi")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCMPPSrmi")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCMPSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCMPSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCOMISDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCOMISSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCVTDQ2PSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCVTPS2DQrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAXPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAXPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAXSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMAXSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMINPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMINPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMINSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VMINSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VSUBPDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VSUBPSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VSUBSDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VSUBSSrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VUCOMISDrm")>; +def: InstRW<[BWWriteResGroup91], (instregex "VUCOMISSrm")>; + +def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup91_16], (instregex "IMUL16rm(i8?)")>; + +def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { + let Latency = 8; + let NumMicroOps = 5; +} +def: InstRW<[BWWriteResGroup91_16_2], (instregex "IMUL16m")>; +def: InstRW<[BWWriteResGroup91_16_2], (instregex "MUL16m")>; + +def BWWriteResGroup91_32 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup91_32], (instregex "IMUL32m")>; +def: InstRW<[BWWriteResGroup91_32], (instregex "MUL32m")>; + +def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBDYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBQYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBWYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXDQYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXWDYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXWQYrm")>; +def: InstRW<[BWWriteResGroup92], (instregex "VPMOVZXWDYrm")>; + +def BWWriteResGroup93 : SchedWriteRes<[BWPort01,BWPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup93], (instregex "MULPDrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "MULPSrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "MULSDrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "MULSSrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "VMULPDrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "VMULPSrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "VMULSDrm")>; +def: InstRW<[BWWriteResGroup93], (instregex "VMULSSrm")>; + +def BWWriteResGroup94 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup94], (instregex "VBLENDVPDYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VBLENDVPSYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VPBLENDVBYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VPMASKMOVDYrm")>; +def: InstRW<[BWWriteResGroup94], (instregex "VPMASKMOVQYrm")>; + +def BWWriteResGroup95 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup95], (instregex "VPSLLVDrm")>; +def: InstRW<[BWWriteResGroup95], (instregex "VPSRAVDrm")>; +def: InstRW<[BWWriteResGroup95], (instregex "VPSRLVDrm")>; + +def BWWriteResGroup96 : SchedWriteRes<[BWPort5,BWPort23,BWPort15]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHADDSWrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHADDWrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHADDrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHSUBDrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHSUBSWrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "MMX_PHSUBWrm64")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHADDDrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHADDSWrm128")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHADDWrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHSUBDrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHSUBSWrm128")>; +def: InstRW<[BWWriteResGroup96], (instregex "PHSUBWrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHADDDrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHADDSWrm128")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHADDWrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHSUBDrm")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHSUBSWrm128")>; +def: InstRW<[BWWriteResGroup96], (instregex "VPHSUBWrm")>; + +def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[BWWriteResGroup97], (instregex "RCL(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCL(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCL8m1")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCL8mi")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCR(16|32|64)m1")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCR(16|32|64)mi")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCR8m1")>; +def: InstRW<[BWWriteResGroup97], (instregex "RCR8mi")>; + +def BWWriteResGroup98 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,2,1]; +} +def: InstRW<[BWWriteResGroup98], (instregex "ROR(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup98], (instregex "ROR8mCL")>; + +def BWWriteResGroup99 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; +} +def: InstRW<[BWWriteResGroup99], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup99], (instregex "ADC8mi")>; +def: InstRW<[BWWriteResGroup99], (instregex "ADD8mi")>; +def: InstRW<[BWWriteResGroup99], (instregex "AND8mi")>; +def: InstRW<[BWWriteResGroup99], (instregex "OR8mi")>; +def: InstRW<[BWWriteResGroup99], (instregex "SUB8mi")>; +def: InstRW<[BWWriteResGroup99], (instregex "XCHG(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup99], (instregex "XCHG8rm")>; +def: InstRW<[BWWriteResGroup99], (instregex "XOR8mi")>; + +def BWWriteResGroup100 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} +def: InstRW<[BWWriteResGroup100], (instregex "ADC(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup100], (instregex "ADC8mr")>; +def: InstRW<[BWWriteResGroup100], (instregex "CMPXCHG(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup100], (instregex "CMPXCHG8rm")>; +def: InstRW<[BWWriteResGroup100], (instregex "ROL(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "ROL8mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SAR(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SAR8mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[BWWriteResGroup100], (instregex "SBB(16|32|64)mr")>; +def: InstRW<[BWWriteResGroup100], (instregex "SBB8mi")>; +def: InstRW<[BWWriteResGroup100], (instregex "SBB8mr")>; +def: InstRW<[BWWriteResGroup100], (instregex "SHL(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SHL8mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SHR(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup100], (instregex "SHR8mCL")>; + +def BWWriteResGroup101 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup101], (instregex "ADD_F32m")>; +def: InstRW<[BWWriteResGroup101], (instregex "ADD_F64m")>; +def: InstRW<[BWWriteResGroup101], (instregex "ILD_F16m")>; +def: InstRW<[BWWriteResGroup101], (instregex "ILD_F32m")>; +def: InstRW<[BWWriteResGroup101], (instregex "ILD_F64m")>; +def: InstRW<[BWWriteResGroup101], (instregex "SUBR_F32m")>; +def: InstRW<[BWWriteResGroup101], (instregex "SUBR_F64m")>; +def: InstRW<[BWWriteResGroup101], (instregex "SUB_F32m")>; +def: InstRW<[BWWriteResGroup101], (instregex "SUB_F64m")>; +def: InstRW<[BWWriteResGroup101], (instregex "VADDPDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VADDPSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VADDSUBPDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VADDSUBPSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VCMPPDYrmi")>; +def: InstRW<[BWWriteResGroup101], (instregex "VCMPPSYrmi")>; +def: InstRW<[BWWriteResGroup101], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VCVTTPS2DQYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMAXPDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMAXPSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMINPDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VMINPSYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VSUBPDYrm")>; +def: InstRW<[BWWriteResGroup101], (instregex "VSUBPSYrm")>; + +def BWWriteResGroup102 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup102], (instregex "VPERM2F128rm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPERM2I128rm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPERMDYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPERMPDYmi")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPERMPSYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPERMQYmi")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPMOVZXBDYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPMOVZXBQYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPMOVZXBWYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPMOVZXDQYrm")>; +def: InstRW<[BWWriteResGroup102], (instregex "VPMOVZXWQYrm")>; + +def BWWriteResGroup103 : SchedWriteRes<[BWPort01,BWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup103], (instregex "VMULPDYrm")>; +def: InstRW<[BWWriteResGroup103], (instregex "VMULPSYrm")>; + +def BWWriteResGroup104 : SchedWriteRes<[BWPort0,BWPort1,BWPort5]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup104], (instregex "DPPDrri")>; +def: InstRW<[BWWriteResGroup104], (instregex "VDPPDrri")>; + +def BWWriteResGroup105 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup105], (instregex "CVTSD2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTSD2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTSS2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTSS2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTTSD2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTTSD2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "CVTTSS2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTSD2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTSD2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTSS2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTSS2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTTSD2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTTSD2SIrm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTTSS2SI64rm")>; +def: InstRW<[BWWriteResGroup105], (instregex "VCVTTSS2SIrm")>; + +def BWWriteResGroup106 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup106], (instregex "VCVTPS2PDYrm")>; + +def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup107], (instregex "CVTDQ2PDrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "CVTPD2DQrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "CVTPD2PSrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "CVTSD2SSrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "CVTTPD2DQrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVTPD2PIirm")>; +def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[BWWriteResGroup107], (instregex "MMX_CVTTPD2PIirm")>; +def: InstRW<[BWWriteResGroup107], (instregex "MULX64rm")>; +def: InstRW<[BWWriteResGroup107], (instregex "VCVTDQ2PDrm")>; +def: InstRW<[BWWriteResGroup107], (instregex "VCVTSD2SSrm")>; + +def BWWriteResGroup108 : SchedWriteRes<[BWPort5,BWPort23,BWPort015]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTBYrm")>; +def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTBrm")>; +def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTWYrm")>; +def: InstRW<[BWWriteResGroup108], (instregex "VPBROADCASTWrm")>; + +def BWWriteResGroup109 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup109], (instregex "VPSLLVDYrm")>; +def: InstRW<[BWWriteResGroup109], (instregex "VPSRAVDYrm")>; +def: InstRW<[BWWriteResGroup109], (instregex "VPSRLVDYrm")>; + +def BWWriteResGroup110 : SchedWriteRes<[BWPort5,BWPort23,BWPort15]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup110], (instregex "VPHADDDYrm")>; +def: InstRW<[BWWriteResGroup110], (instregex "VPHADDSWrm256")>; +def: InstRW<[BWWriteResGroup110], (instregex "VPHADDWYrm")>; +def: InstRW<[BWWriteResGroup110], (instregex "VPHSUBDYrm")>; +def: InstRW<[BWWriteResGroup110], (instregex "VPHSUBSWrm256")>; +def: InstRW<[BWWriteResGroup110], (instregex "VPHSUBWYrm")>; + +def BWWriteResGroup111 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort0156]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup111], (instregex "SHLD(16|32|64)mri8")>; +def: InstRW<[BWWriteResGroup111], (instregex "SHRD(16|32|64)mri8")>; + +def BWWriteResGroup112 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { + let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[BWWriteResGroup112], (instregex "RDRAND(16|32|64)r")>; + +def BWWriteResGroup113 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> { + let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[BWWriteResGroup113], (instregex "LAR(16|32|64)rm")>; +def: InstRW<[BWWriteResGroup113], (instregex "LSL(16|32|64)rm")>; + +def BWWriteResGroup114 : SchedWriteRes<[BWPort0]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup114], (instregex "PMULLDrr")>; +def: InstRW<[BWWriteResGroup114], (instregex "VPMULLDYrr")>; +def: InstRW<[BWWriteResGroup114], (instregex "VPMULLDrr")>; + +def BWWriteResGroup115 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMADDUBSWrm64")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMADDWDirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMULHRSWrm64")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMULHUWirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMULHWirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMULLWirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PMULUDQirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "MMX_PSADBWirm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PCLMULQDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PCMPGTQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMADDUBSWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMADDWDrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULHRSWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULHUWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULHWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULLWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PMULUDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "PSADBWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "RCPPSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "RCPSSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "RSQRTPSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "RSQRTSSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPCLMULQDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPCMPGTQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMADDUBSWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMADDWDrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULHRSWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULHUWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULHWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULLWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPMULUDQrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VPSADBWrm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VRCPPSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VRCPSSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VRSQRTPSm")>; +def: InstRW<[BWWriteResGroup115], (instregex "VRSQRTSSm")>; + +def BWWriteResGroup116 : SchedWriteRes<[BWPort01,BWPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD132SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD213SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADD231SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMADDSUB231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB132SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB213SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUB231SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFMSUBADD231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD132SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD213SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMADD231SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB132SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB213SSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231PDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231PSm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231SDm")>; +def: InstRW<[BWWriteResGroup116], (instregex "VFNMSUB231SSm")>; + +def BWWriteResGroup117 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup117], (instregex "FICOM16m")>; +def: InstRW<[BWWriteResGroup117], (instregex "FICOM32m")>; +def: InstRW<[BWWriteResGroup117], (instregex "FICOMP16m")>; +def: InstRW<[BWWriteResGroup117], (instregex "FICOMP32m")>; + +def BWWriteResGroup118 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup118], (instregex "VPTESTYrm")>; + +def BWWriteResGroup119 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[BWWriteResGroup119], (instregex "HADDPDrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "HADDPSrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "HSUBPDrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "HSUBPSrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "VHADDPDrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "VHADDPSrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "VHSUBPDrm")>; +def: InstRW<[BWWriteResGroup119], (instregex "VHSUBPSrm")>; + +def BWWriteResGroup120 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup120], (instregex "CVTTSS2SI64rm")>; + +def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup121], (instregex "MULX32rm")>; + +def BWWriteResGroup122 : SchedWriteRes<[BWPort0]> { + let Latency = 11; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup122], (instregex "DIVPSrr")>; +def: InstRW<[BWWriteResGroup122], (instregex "DIVSSrr")>; +def: InstRW<[BWWriteResGroup122], (instregex "VDIVPSrr")>; +def: InstRW<[BWWriteResGroup122], (instregex "VDIVSSrr")>; + +def BWWriteResGroup123 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup123], (instregex "MUL_F32m")>; +def: InstRW<[BWWriteResGroup123], (instregex "MUL_F64m")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPCMPGTQYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMADDUBSWYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMADDWDYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULDQYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULHRSWYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULHUWYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULHWYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULLWYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPMULUDQYrm")>; +def: InstRW<[BWWriteResGroup123], (instregex "VPSADBWYrm")>; + +def BWWriteResGroup124 : SchedWriteRes<[BWPort01,BWPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADD231PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMADDSUB231PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUB231PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFMSUBADD231PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMADD231PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB132PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB132PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB213PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB213PSYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB231PDYm")>; +def: InstRW<[BWWriteResGroup124], (instregex "VFNMSUB231PSYm")>; + +def BWWriteResGroup125 : SchedWriteRes<[BWPort0]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[BWWriteResGroup125], (instregex "PCMPISTRIrr")>; +def: InstRW<[BWWriteResGroup125], (instregex "PCMPISTRM128rr")>; +def: InstRW<[BWWriteResGroup125], (instregex "VPCMPISTRIrr")>; +def: InstRW<[BWWriteResGroup125], (instregex "VPCMPISTRM128rr")>; + +def BWWriteResGroup126 : SchedWriteRes<[BWPort0,BWPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup126], (instregex "VRCPPSYr")>; +def: InstRW<[BWWriteResGroup126], (instregex "VRSQRTPSYr")>; + +def BWWriteResGroup127 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup127], (instregex "ROUNDPDm")>; +def: InstRW<[BWWriteResGroup127], (instregex "ROUNDPSm")>; +def: InstRW<[BWWriteResGroup127], (instregex "ROUNDSDm")>; +def: InstRW<[BWWriteResGroup127], (instregex "ROUNDSSm")>; +def: InstRW<[BWWriteResGroup127], (instregex "VROUNDPDm")>; +def: InstRW<[BWWriteResGroup127], (instregex "VROUNDPSm")>; +def: InstRW<[BWWriteResGroup127], (instregex "VROUNDSDm")>; +def: InstRW<[BWWriteResGroup127], (instregex "VROUNDSSm")>; + +def BWWriteResGroup128 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup128], (instregex "VCVTDQ2PDYrm")>; + +def BWWriteResGroup129 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[BWWriteResGroup129], (instregex "VHADDPDYrm")>; +def: InstRW<[BWWriteResGroup129], (instregex "VHADDPSYrm")>; +def: InstRW<[BWWriteResGroup129], (instregex "VHSUBPDYrm")>; +def: InstRW<[BWWriteResGroup129], (instregex "VHSUBPSYrm")>; + +def BWWriteResGroup130 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 11; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,1,2]; +} +def: InstRW<[BWWriteResGroup130], (instregex "SHLD(16|32|64)mrCL")>; +def: InstRW<[BWWriteResGroup130], (instregex "SHRD(16|32|64)mrCL")>; + +def BWWriteResGroup131 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { + let Latency = 11; + let NumMicroOps = 7; + let ResourceCycles = [2,2,3]; +} +def: InstRW<[BWWriteResGroup131], (instregex "RCL(16|32|64)rCL")>; +def: InstRW<[BWWriteResGroup131], (instregex "RCR(16|32|64)rCL")>; + +def BWWriteResGroup132 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> { + let Latency = 11; + let NumMicroOps = 9; + let ResourceCycles = [1,4,1,3]; +} +def: InstRW<[BWWriteResGroup132], (instregex "RCL8rCL")>; + +def BWWriteResGroup133 : SchedWriteRes<[BWPort06,BWPort0156]> { + let Latency = 11; + let NumMicroOps = 11; + let ResourceCycles = [2,9]; +} +def: InstRW<[BWWriteResGroup133], (instregex "LOOPE")>; +def: InstRW<[BWWriteResGroup133], (instregex "LOOPNE")>; + +def BWWriteResGroup134 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup134], (instregex "AESDECLASTrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "AESDECrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "AESENCLASTrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "AESENCrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "VAESDECLASTrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "VAESDECrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "VAESENCLASTrm")>; +def: InstRW<[BWWriteResGroup134], (instregex "VAESENCrm")>; + +def BWWriteResGroup135 : SchedWriteRes<[BWPort1,BWPort23]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup135], (instregex "ADD_FI16m")>; +def: InstRW<[BWWriteResGroup135], (instregex "ADD_FI32m")>; +def: InstRW<[BWWriteResGroup135], (instregex "SUBR_FI16m")>; +def: InstRW<[BWWriteResGroup135], (instregex "SUBR_FI32m")>; +def: InstRW<[BWWriteResGroup135], (instregex "SUB_FI16m")>; +def: InstRW<[BWWriteResGroup135], (instregex "SUB_FI32m")>; +def: InstRW<[BWWriteResGroup135], (instregex "VROUNDYPDm")>; +def: InstRW<[BWWriteResGroup135], (instregex "VROUNDYPSm")>; + +def BWWriteResGroup136 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[BWWriteResGroup136], (instregex "MPSADBWrmi")>; +def: InstRW<[BWWriteResGroup136], (instregex "VMPSADBWrmi")>; + +def BWWriteResGroup137 : SchedWriteRes<[BWPort0]> { + let Latency = 13; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup137], (instregex "SQRTPSr")>; +def: InstRW<[BWWriteResGroup137], (instregex "SQRTSSr")>; + +def BWWriteResGroup138 : SchedWriteRes<[BWPort0,BWPort5,BWPort23]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[BWWriteResGroup138], (instregex "VMPSADBWYrmi")>; + +def BWWriteResGroup139 : SchedWriteRes<[BWPort0]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup139], (instregex "DIVPDrr")>; +def: InstRW<[BWWriteResGroup139], (instregex "DIVSDrr")>; +def: InstRW<[BWWriteResGroup139], (instregex "VDIVPDrr")>; +def: InstRW<[BWWriteResGroup139], (instregex "VDIVSDrr")>; +def: InstRW<[BWWriteResGroup139], (instregex "VSQRTPSr")>; +def: InstRW<[BWWriteResGroup139], (instregex "VSQRTSSr")>; + +def BWWriteResGroup140 : SchedWriteRes<[BWPort5]> { + let Latency = 14; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[BWWriteResGroup140], (instregex "AESIMCrr")>; +def: InstRW<[BWWriteResGroup140], (instregex "VAESIMCrr")>; + +def BWWriteResGroup141 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup141], (instregex "MUL_FI16m")>; +def: InstRW<[BWWriteResGroup141], (instregex "MUL_FI32m")>; + +def BWWriteResGroup142 : SchedWriteRes<[BWPort0,BWPort1,BWPort5]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup142], (instregex "DPPSrri")>; +def: InstRW<[BWWriteResGroup142], (instregex "VDPPSYrri")>; +def: InstRW<[BWWriteResGroup142], (instregex "VDPPSrri")>; + +def BWWriteResGroup143 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[BWWriteResGroup143], (instregex "DPPDrmi")>; +def: InstRW<[BWWriteResGroup143], (instregex "VDPPDrmi")>; + +def BWWriteResGroup144 : SchedWriteRes<[BWPort1,BWPort6,BWPort23,BWPort0156]> { + let Latency = 14; + let NumMicroOps = 8; + let ResourceCycles = [2,2,1,3]; +} +def: InstRW<[BWWriteResGroup144], (instregex "LAR(16|32|64)rr")>; + +def BWWriteResGroup145 : SchedWriteRes<[BWPort1,BWPort06,BWPort15,BWPort0156]> { + let Latency = 14; + let NumMicroOps = 10; + let ResourceCycles = [2,3,1,4]; +} +def: InstRW<[BWWriteResGroup145], (instregex "RCR8rCL")>; + +def BWWriteResGroup146 : SchedWriteRes<[BWPort0,BWPort1,BWPort6,BWPort0156]> { + let Latency = 14; + let NumMicroOps = 12; + let ResourceCycles = [2,1,4,5]; +} +def: InstRW<[BWWriteResGroup146], (instregex "XCH_F")>; + +def BWWriteResGroup147 : SchedWriteRes<[BWPort0]> { + let Latency = 15; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup147], (instregex "DIVR_FPrST0")>; +def: InstRW<[BWWriteResGroup147], (instregex "DIVR_FST0r")>; +def: InstRW<[BWWriteResGroup147], (instregex "DIVR_FrST0")>; + +def BWWriteResGroup148 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 15; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup148], (instregex "PMULLDrm")>; +def: InstRW<[BWWriteResGroup148], (instregex "VPMULLDrm")>; + +def BWWriteResGroup149 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> { + let Latency = 15; + let NumMicroOps = 10; + let ResourceCycles = [1,1,1,4,1,2]; +} +def: InstRW<[BWWriteResGroup149], (instregex "RCL(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup149], (instregex "RCL8mCL")>; + +def BWWriteResGroup150 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 16; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup150], (instregex "DIVPSrm")>; +def: InstRW<[BWWriteResGroup150], (instregex "DIVSSrm")>; +def: InstRW<[BWWriteResGroup150], (instregex "VDIVPSrm")>; +def: InstRW<[BWWriteResGroup150], (instregex "VDIVSSrm")>; + +def BWWriteResGroup151 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 16; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup151], (instregex "VPMULLDYrm")>; + +def BWWriteResGroup152 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 16; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; +} +def: InstRW<[BWWriteResGroup152], (instregex "PCMPISTRIrm")>; +def: InstRW<[BWWriteResGroup152], (instregex "PCMPISTRM128rm")>; +def: InstRW<[BWWriteResGroup152], (instregex "VPCMPISTRIrm")>; +def: InstRW<[BWWriteResGroup152], (instregex "VPCMPISTRM128rm")>; + +def BWWriteResGroup153 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> { + let Latency = 16; + let NumMicroOps = 14; + let ResourceCycles = [1,1,1,4,2,5]; +} +def: InstRW<[BWWriteResGroup153], (instregex "CMPXCHG8B")>; + +def BWWriteResGroup154 : SchedWriteRes<[BWPort5]> { + let Latency = 16; + let NumMicroOps = 16; + let ResourceCycles = [16]; +} +def: InstRW<[BWWriteResGroup154], (instregex "VZEROALL")>; + +def BWWriteResGroup155 : SchedWriteRes<[BWPort0,BWPort015]> { + let Latency = 17; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup155], (instregex "VDIVPSYrr")>; + +def BWWriteResGroup156 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { + let Latency = 17; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup156], (instregex "VRCPPSYm")>; +def: InstRW<[BWWriteResGroup156], (instregex "VRSQRTPSYm")>; + +def BWWriteResGroup157 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 18; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup157], (instregex "SQRTPSm")>; +def: InstRW<[BWWriteResGroup157], (instregex "SQRTSSm")>; + +def BWWriteResGroup158 : SchedWriteRes<[BWPort0,BWPort5,BWPort0156]> { + let Latency = 18; + let NumMicroOps = 8; + let ResourceCycles = [4,3,1]; +} +def: InstRW<[BWWriteResGroup158], (instregex "PCMPESTRIrr")>; +def: InstRW<[BWWriteResGroup158], (instregex "VPCMPESTRIrr")>; + +def BWWriteResGroup159 : SchedWriteRes<[BWPort5,BWPort6,BWPort06,BWPort0156]> { + let Latency = 18; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,5]; +} +def: InstRW<[BWWriteResGroup159], (instregex "CPUID")>; +def: InstRW<[BWWriteResGroup159], (instregex "RDTSC")>; + +def BWWriteResGroup160 : SchedWriteRes<[BWPort1,BWPort23,BWPort237,BWPort06,BWPort15,BWPort0156]> { + let Latency = 18; + let NumMicroOps = 11; + let ResourceCycles = [2,1,1,3,1,3]; +} +def: InstRW<[BWWriteResGroup160], (instregex "RCR(16|32|64)mCL")>; +def: InstRW<[BWWriteResGroup160], (instregex "RCR8mCL")>; + +def BWWriteResGroup161 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 19; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup161], (instregex "DIVPDrm")>; +def: InstRW<[BWWriteResGroup161], (instregex "DIVSDrm")>; +def: InstRW<[BWWriteResGroup161], (instregex "VDIVPDrm")>; +def: InstRW<[BWWriteResGroup161], (instregex "VDIVSDrm")>; +def: InstRW<[BWWriteResGroup161], (instregex "VSQRTPSm")>; +def: InstRW<[BWWriteResGroup161], (instregex "VSQRTSSm")>; + +def BWWriteResGroup162 : SchedWriteRes<[BWPort5,BWPort23]> { + let Latency = 19; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup162], (instregex "AESIMCrm")>; +def: InstRW<[BWWriteResGroup162], (instregex "VAESIMCrm")>; + +def BWWriteResGroup163 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { + let Latency = 19; + let NumMicroOps = 5; + let ResourceCycles = [2,1,1,1]; +} +def: InstRW<[BWWriteResGroup163], (instregex "DPPSrmi")>; +def: InstRW<[BWWriteResGroup163], (instregex "VDPPSrmi")>; + +def BWWriteResGroup164 : SchedWriteRes<[BWPort0,BWPort5,BWPort015,BWPort0156]> { + let Latency = 19; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; +} +def: InstRW<[BWWriteResGroup164], (instregex "PCMPESTRM128rr")>; +def: InstRW<[BWWriteResGroup164], (instregex "VPCMPESTRM128rr")>; + +def BWWriteResGroup165 : SchedWriteRes<[BWPort0]> { + let Latency = 20; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup165], (instregex "DIV_FPrST0")>; +def: InstRW<[BWWriteResGroup165], (instregex "DIV_FST0r")>; +def: InstRW<[BWWriteResGroup165], (instregex "DIV_FrST0")>; +def: InstRW<[BWWriteResGroup165], (instregex "SQRTPDr")>; +def: InstRW<[BWWriteResGroup165], (instregex "SQRTSDr")>; + +def BWWriteResGroup166 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { + let Latency = 20; + let NumMicroOps = 5; + let ResourceCycles = [2,1,1,1]; +} +def: InstRW<[BWWriteResGroup166], (instregex "VDPPSYrmi")>; + +def BWWriteResGroup167 : SchedWriteRes<[BWPort4,BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 20; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,1,2]; +} +def: InstRW<[BWWriteResGroup167], (instregex "INSB")>; +def: InstRW<[BWWriteResGroup167], (instregex "INSL")>; +def: InstRW<[BWWriteResGroup167], (instregex "INSW")>; + +def BWWriteResGroup168 : SchedWriteRes<[BWPort0]> { + let Latency = 21; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[BWWriteResGroup168], (instregex "VSQRTPDr")>; +def: InstRW<[BWWriteResGroup168], (instregex "VSQRTSDr")>; + +def BWWriteResGroup169 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 21; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup169], (instregex "DIV_F32m")>; +def: InstRW<[BWWriteResGroup169], (instregex "DIV_F64m")>; + +def BWWriteResGroup170 : SchedWriteRes<[BWPort0,BWPort015]> { + let Latency = 21; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup170], (instregex "VSQRTPSYr")>; + +def BWWriteResGroup171 : SchedWriteRes<[BWPort0,BWPort4,BWPort5,BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 21; + let NumMicroOps = 19; + let ResourceCycles = [2,1,4,1,1,4,6]; +} +def: InstRW<[BWWriteResGroup171], (instregex "CMPXCHG16B")>; + +def BWWriteResGroup172 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { + let Latency = 22; + let NumMicroOps = 18; + let ResourceCycles = [1,1,16]; +} +def: InstRW<[BWWriteResGroup172], (instregex "POPF64")>; + +def BWWriteResGroup173 : SchedWriteRes<[BWPort0,BWPort015]> { + let Latency = 23; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup173], (instregex "VDIVPDYrr")>; + +def BWWriteResGroup174 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { + let Latency = 23; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup174], (instregex "VDIVPSYrm")>; + +def BWWriteResGroup175 : SchedWriteRes<[BWPort0,BWPort5,BWPort23,BWPort0156]> { + let Latency = 23; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; +} +def: InstRW<[BWWriteResGroup175], (instregex "PCMPESTRIrm")>; +def: InstRW<[BWWriteResGroup175], (instregex "VPCMPESTRIrm")>; + +def BWWriteResGroup176 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { + let Latency = 23; + let NumMicroOps = 19; + let ResourceCycles = [3,1,15]; +} +def: InstRW<[BWWriteResGroup176], (instregex "XRSTOR(64?)")>; + +def BWWriteResGroup177 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { + let Latency = 24; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup177], (instregex "DIV_FI16m")>; +def: InstRW<[BWWriteResGroup177], (instregex "DIV_FI32m")>; + +def BWWriteResGroup178 : SchedWriteRes<[BWPort0,BWPort5,BWPort23,BWPort015,BWPort0156]> { + let Latency = 24; + let NumMicroOps = 10; + let ResourceCycles = [4,3,1,1,1]; +} +def: InstRW<[BWWriteResGroup178], (instregex "PCMPESTRM128rm")>; +def: InstRW<[BWWriteResGroup178], (instregex "VPCMPESTRM128rm")>; + +def BWWriteResGroup179 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 25; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup179], (instregex "SQRTPDm")>; +def: InstRW<[BWWriteResGroup179], (instregex "SQRTSDm")>; + +def BWWriteResGroup180 : SchedWriteRes<[BWPort0,BWPort23]> { + let Latency = 26; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F32m")>; +def: InstRW<[BWWriteResGroup180], (instregex "DIVR_F64m")>; +def: InstRW<[BWWriteResGroup180], (instregex "VSQRTPDm")>; +def: InstRW<[BWWriteResGroup180], (instregex "VSQRTSDm")>; + +def BWWriteResGroup181 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { + let Latency = 27; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup181], (instregex "VSQRTPSYm")>; + +def BWWriteResGroup182 : SchedWriteRes<[BWPort0,BWPort1,BWPort23]> { + let Latency = 29; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI16m")>; +def: InstRW<[BWWriteResGroup182], (instregex "DIVR_FI32m")>; + +def BWWriteResGroup183 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { + let Latency = 29; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup183], (instregex "VDIVPDYrm")>; + +def BWWriteResGroup183_1 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 22; + let NumMicroOps = 7; + let ResourceCycles = [1,3,2,1]; +} +def: InstRW<[BWWriteResGroup183_1], (instregex "VGATHERQPDrm")>; + +def BWWriteResGroup183_2 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 23; + let NumMicroOps = 9; + let ResourceCycles = [1,3,4,1]; +} +def: InstRW<[BWWriteResGroup183_2], (instregex "VGATHERQPDYrm")>; + +def BWWriteResGroup183_3 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 24; + let NumMicroOps = 9; + let ResourceCycles = [1,5,2,1]; +} +def: InstRW<[BWWriteResGroup183_3], (instregex "VGATHERQPSYrm")>; + +def BWWriteResGroup183_4 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 25; + let NumMicroOps = 7; + let ResourceCycles = [1,3,2,1]; +} +def: InstRW<[BWWriteResGroup183_4], (instregex "VGATHERDPDrm")>; +def: InstRW<[BWWriteResGroup183_4], (instregex "VGATHERDPSrm")>; + +def BWWriteResGroup183_5 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 26; + let NumMicroOps = 9; + let ResourceCycles = [1,5,2,1]; +} +def: InstRW<[BWWriteResGroup183_5], (instregex "VGATHERDPDYrm")>; + +def BWWriteResGroup183_6 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 26; + let NumMicroOps = 14; + let ResourceCycles = [1,4,8,1]; +} +def: InstRW<[BWWriteResGroup183_6], (instregex "VGATHERDPSYrm")>; + +def BWWriteResGroup183_7 : SchedWriteRes<[BWPort4, BWPort5, BWPort23, BWPort0156]> { + let Latency = 27; + let NumMicroOps = 9; + let ResourceCycles = [1,5,2,1]; +} +def: InstRW<[BWWriteResGroup183_7], (instregex "VGATHERQPSrm")>; + +def BWWriteResGroup184 : SchedWriteRes<[BWPort0,BWPort5,BWPort015]> { + let Latency = 29; + let NumMicroOps = 11; + let ResourceCycles = [2,7,2]; +} +def: InstRW<[BWWriteResGroup184], (instregex "AESKEYGENASSIST128rr")>; +def: InstRW<[BWWriteResGroup184], (instregex "VAESKEYGENASSIST128rr")>; + +def BWWriteResGroup185 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> { + let Latency = 29; + let NumMicroOps = 27; + let ResourceCycles = [1,5,1,1,19]; +} +def: InstRW<[BWWriteResGroup185], (instregex "XSAVE64")>; + +def BWWriteResGroup186 : SchedWriteRes<[BWPort4,BWPort6,BWPort23,BWPort237,BWPort0156]> { + let Latency = 30; + let NumMicroOps = 28; + let ResourceCycles = [1,6,1,1,19]; +} +def: InstRW<[BWWriteResGroup186], (instregex "XSAVE(OPT?)")>; + +def BWWriteResGroup187 : SchedWriteRes<[BWPort01,BWPort15,BWPort015,BWPort0156]> { + let Latency = 31; + let NumMicroOps = 31; + let ResourceCycles = [8,1,21,1]; +} +def: InstRW<[BWWriteResGroup187], (instregex "MMX_EMMS")>; + +def BWWriteResGroup188 : SchedWriteRes<[BWPort0,BWPort5,BWPort23,BWPort015]> { + let Latency = 33; + let NumMicroOps = 11; + let ResourceCycles = [2,7,1,1]; +} +def: InstRW<[BWWriteResGroup188], (instregex "AESKEYGENASSIST128rm")>; +def: InstRW<[BWWriteResGroup188], (instregex "VAESKEYGENASSIST128rm")>; + +def BWWriteResGroup189 : SchedWriteRes<[BWPort0,BWPort015]> { + let Latency = 34; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[BWWriteResGroup189], (instregex "VSQRTPDYr")>; + +def BWWriteResGroup190 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> { + let Latency = 34; + let NumMicroOps = 8; + let ResourceCycles = [2,2,2,1,1]; +} +def: InstRW<[BWWriteResGroup190], (instregex "DIV(16|32|64)m")>; +def: InstRW<[BWWriteResGroup190], (instregex "DIV8m")>; + +def BWWriteResGroup191 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort06,BWPort0156]> { + let Latency = 34; + let NumMicroOps = 23; + let ResourceCycles = [1,5,3,4,10]; +} +def: InstRW<[BWWriteResGroup191], (instregex "IN32ri")>; +def: InstRW<[BWWriteResGroup191], (instregex "IN32rr")>; +def: InstRW<[BWWriteResGroup191], (instregex "IN8ri")>; +def: InstRW<[BWWriteResGroup191], (instregex "IN8rr")>; + +def BWWriteResGroup193 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156]> { + let Latency = 35; + let NumMicroOps = 8; + let ResourceCycles = [2,2,2,1,1]; +} +def: InstRW<[BWWriteResGroup193], (instregex "IDIV(16|32|64)m")>; +def: InstRW<[BWWriteResGroup193], (instregex "IDIV8m")>; + +def BWWriteResGroup194 : SchedWriteRes<[BWPort5,BWPort6,BWPort23,BWPort237,BWPort06,BWPort0156]> { + let Latency = 35; + let NumMicroOps = 23; + let ResourceCycles = [1,5,2,1,4,10]; +} +def: InstRW<[BWWriteResGroup194], (instregex "OUT32ir")>; +def: InstRW<[BWWriteResGroup194], (instregex "OUT32rr")>; +def: InstRW<[BWWriteResGroup194], (instregex "OUT8ir")>; +def: InstRW<[BWWriteResGroup194], (instregex "OUT8rr")>; + +def BWWriteResGroup195 : SchedWriteRes<[BWPort0,BWPort23,BWPort015]> { + let Latency = 40; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[BWWriteResGroup195], (instregex "VSQRTPDYm")>; + +def BWWriteResGroup196 : SchedWriteRes<[BWPort5,BWPort0156]> { + let Latency = 42; + let NumMicroOps = 22; + let ResourceCycles = [2,20]; +} +def: InstRW<[BWWriteResGroup196], (instregex "RDTSCP")>; + +def BWWriteResGroup197 : SchedWriteRes<[BWPort0,BWPort01,BWPort23,BWPort05,BWPort06,BWPort015,BWPort0156]> { + let Latency = 60; + let NumMicroOps = 64; + let ResourceCycles = [2,2,8,1,10,2,39]; +} +def: InstRW<[BWWriteResGroup197], (instregex "FLDENVm")>; +def: InstRW<[BWWriteResGroup197], (instregex "FLDENVm")>; + +def BWWriteResGroup198 : SchedWriteRes<[BWPort0,BWPort6,BWPort23,BWPort05,BWPort06,BWPort15,BWPort0156]> { + let Latency = 63; + let NumMicroOps = 88; + let ResourceCycles = [4,4,31,1,2,1,45]; +} +def: InstRW<[BWWriteResGroup198], (instregex "FXRSTOR64")>; + +def BWWriteResGroup199 : SchedWriteRes<[BWPort0,BWPort6,BWPort23,BWPort05,BWPort06,BWPort15,BWPort0156]> { + let Latency = 63; + let NumMicroOps = 90; + let ResourceCycles = [4,2,33,1,2,1,47]; +} +def: InstRW<[BWWriteResGroup199], (instregex "FXRSTOR")>; + +def BWWriteResGroup200 : SchedWriteRes<[BWPort5,BWPort01,BWPort0156]> { + let Latency = 75; + let NumMicroOps = 15; + let ResourceCycles = [6,3,6]; +} +def: InstRW<[BWWriteResGroup200], (instregex "FNINIT")>; + +def BWWriteResGroup201 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156]> { + let Latency = 80; + let NumMicroOps = 32; + let ResourceCycles = [7,7,3,3,1,11]; +} +def: InstRW<[BWWriteResGroup201], (instregex "DIV(16|32|64)r")>; + +def BWWriteResGroup202 : SchedWriteRes<[BWPort0,BWPort1,BWPort4,BWPort5,BWPort6,BWPort237,BWPort06,BWPort0156]> { + let Latency = 115; + let NumMicroOps = 100; + let ResourceCycles = [9,9,11,8,1,11,21,30]; +} +def: InstRW<[BWWriteResGroup202], (instregex "FSTENVm")>; +def: InstRW<[BWWriteResGroup202], (instregex "FSTENVm")>; + +} // SchedModel + diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 3523601a4bd63..c2b188b8846a5 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -2099,9 +2099,9 @@ def: InstRW<[HWWriteResGroup18], (instregex "OR8rm")>; def: InstRW<[HWWriteResGroup18], (instregex "POP(16|32|64)r(mr?)")>; def: InstRW<[HWWriteResGroup18], (instregex "SUB(16|32|64)rm")>; def: InstRW<[HWWriteResGroup18], (instregex "SUB8rm")>; -def: InstRW<[HWWriteResGroup18], (instregex "TEST(16|32|64)rm")>; +def: InstRW<[HWWriteResGroup18], (instregex "TEST(16|32|64)mr")>; def: InstRW<[HWWriteResGroup18], (instregex "TEST8mi")>; -def: InstRW<[HWWriteResGroup18], (instregex "TEST8rm")>; +def: InstRW<[HWWriteResGroup18], (instregex "TEST8mr")>; def: InstRW<[HWWriteResGroup18], (instregex "XOR(16|32|64)rm")>; def: InstRW<[HWWriteResGroup18], (instregex "XOR8rm")>; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index 2717a409ef6bc..593e9b33aacdf 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -2010,9 +2010,9 @@ def: InstRW<[SBWriteResGroup70], (instregex "SUB(16|32|64)mi8")>; def: InstRW<[SBWriteResGroup70], (instregex "SUB(16|32|64)mr")>; def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>; def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>; -def: InstRW<[SBWriteResGroup70], (instregex "TEST(16|32|64)rm")>; +def: InstRW<[SBWriteResGroup70], (instregex "TEST(16|32|64)mr")>; def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>; -def: InstRW<[SBWriteResGroup70], (instregex "TEST8rm")>; +def: InstRW<[SBWriteResGroup70], (instregex "TEST8mr")>; def: InstRW<[SBWriteResGroup70], (instregex "XOR(16|32|64)mi8")>; def: InstRW<[SBWriteResGroup70], (instregex "XOR(16|32|64)mr")>; def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>; diff --git a/lib/Target/X86/X86SchedSkylakeClient.td b/lib/Target/X86/X86SchedSkylakeClient.td index 2aaa56e02dd97..aabb45be87c15 100644 --- a/lib/Target/X86/X86SchedSkylakeClient.td +++ b/lib/Target/X86/X86SchedSkylakeClient.td @@ -307,3705 +307,3904 @@ def : WriteRes { // Remaining instrs. -def SKLWriteResGroup0 : SchedWriteRes<[SKLPort23]> { +def SKLWriteResGroup1 : SchedWriteRes<[SKLPort0]> { let Latency = 1; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup0], (instregex "LDDQUrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "LD_F32m")>; -def: InstRW<[SKLWriteResGroup0], (instregex "LD_F64m")>; -def: InstRW<[SKLWriteResGroup0], (instregex "LD_F80m")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MMX_MOVD64from64rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MMX_MOVD64rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MMX_MOVD64to64rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MMX_MOVQ64rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOV(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOV64toPQIrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOV8rm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVAPDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVAPSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVDDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVDI2PDIrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVDQArm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVDQUrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVNTDQArm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSHDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSLDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSX(16|32|64)rm16")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSX(16|32|64)rm32")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVSX(16|32|64)rm8")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVUPDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVUPSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVZX(16|32|64)rm16")>; -def: InstRW<[SKLWriteResGroup0], (instregex "MOVZX(16|32|64)rm8")>; -def: InstRW<[SKLWriteResGroup0], (instregex "PREFETCHNTA")>; -def: InstRW<[SKLWriteResGroup0], (instregex "PREFETCHT0")>; -def: InstRW<[SKLWriteResGroup0], (instregex "PREFETCHT1")>; -def: InstRW<[SKLWriteResGroup0], (instregex "PREFETCHT2")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VBROADCASTF128")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VBROADCASTI128")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VBROADCASTSDYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VBROADCASTSSYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VBROADCASTSSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VLDDQUYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VLDDQUrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOV64toPQIrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVAPDYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVAPDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVAPSYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVAPSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDDUPYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDI2PDIrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDQAYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDQArm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDQUYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVDQUrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVNTDQAYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVNTDQArm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVQI2PQIrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSHDUPYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSHDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSLDUPYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSLDUPrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVSSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVUPDYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVUPDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVUPSYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VMOVUPSrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VPBROADCASTDYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VPBROADCASTDrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VPBROADCASTQYrm")>; -def: InstRW<[SKLWriteResGroup0], (instregex "VPBROADCASTQrm")>; - -def SKLWriteResGroup1 : SchedWriteRes<[SKLPort4,SKLPort237]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup1], (instregex "FBSTPm")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MMX_MOVD64from64rm")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MMX_MOVD64mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MMX_MOVNTQmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MMX_MOVQ64mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOV(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOV8mi")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOV8mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVAPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVAPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVDQAmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVDQUmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVHPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVHPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVLPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVLPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVNTDQmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVNTI_64mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVNTImr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVNTPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVNTPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVPDI2DImr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVPQI2QImr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVPQIto64mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVSSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVUPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "MOVUPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "ST_FP32m")>; -def: InstRW<[SKLWriteResGroup1], (instregex "ST_FP64m")>; -def: InstRW<[SKLWriteResGroup1], (instregex "ST_FP80m")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VEXTRACTF128mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VEXTRACTI128mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVAPDYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVAPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVAPSYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVAPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVDQAYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVDQAmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVDQUYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVDQUmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVHPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVHPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVLPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVLPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTDQYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTDQmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTPDYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTPSYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVNTPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVPDI2DImr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVPQI2QImr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVPQIto64mr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVSDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVSSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVUPDYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVUPDmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVUPSYmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMOVUPSmr")>; -def: InstRW<[SKLWriteResGroup1], (instregex "VMPTRSTm")>; - -def SKLWriteResGroup2 : SchedWriteRes<[SKLPort0]> { +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDSBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDSWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDUSBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PADDUSWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PAVGBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PAVGWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPEQBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPEQDirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPEQWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPGTBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPGTDirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PCMPGTWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PMAXSWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PMAXUBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PMINSWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PMINUBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLDri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLDrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLQri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLQrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLWri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSLLWrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRADri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRADrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRAWri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRAWrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLDri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLDrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLQri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLQrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLWri")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSRLWrr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSUBSBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSUBSWirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSUBUSBirr")>; +def: InstRW<[SKLWriteResGroup1], (instregex "MMX_PSUBUSWirr")>; + +def SKLWriteResGroup2 : SchedWriteRes<[SKLPort1]> { let Latency = 1; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PADDSBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PADDSWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PADDUSBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PADDUSWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PAVGBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PAVGWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPEQBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPEQDirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPEQWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPGTBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPGTDirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PCMPGTWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PMAXSWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PMAXUBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PMINSWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PMINUBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLDri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLDrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLQri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLQrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLWri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSLLWrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRADri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRADrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRAWri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRAWrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLDri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLDrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLQri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLQrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLWri")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSRLWrr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSUBSBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSUBSWirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSUBUSBirr")>; -def: InstRW<[SKLWriteResGroup2], (instregex "MMX_PSUBUSWirr")>; - -def SKLWriteResGroup3 : SchedWriteRes<[SKLPort1]> { +def: InstRW<[SKLWriteResGroup2], (instregex "MMX_MASKMOVQ64")>; + +def SKLWriteResGroup3 : SchedWriteRes<[SKLPort5]> { let Latency = 1; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup3], (instregex "MMX_MASKMOVQ64")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PABSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PABSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PABSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PADDSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PADDSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PADDUSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PADDUSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PAVGBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PAVGWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPEQBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPEQDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPEQQrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPEQWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPGTBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPGTDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PCMPGTWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXUBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXUDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMAXUWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINUBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINUDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PMINUWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSIGNBrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSIGNDrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSIGNWrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSLLDri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSLLQri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSLLWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSRADri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSRAWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSRLDri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSRLQri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSRLWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSUBSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSUBSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSUBUSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "PSUBUSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPABSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDUSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDUSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDUSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPADDUSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPAVGBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPAVGBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPAVGWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPAVGWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQQYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQQrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPEQWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPCMPGTWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMAXUWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPMINUWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNBYrr256")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNBrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNDYrr256")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNDrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNWYrr256")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSIGNWrr128")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLDYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLDri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLQYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLQri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLVDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLVDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLVQYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLVQrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLWYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRADYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRADri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRAVDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRAVDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRAWYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRAWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLDYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLDri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLQYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLQri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLVDYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLVDrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLVQYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLVQrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLWYri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLWri")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBSWrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBUSBYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBUSBrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBUSWYrr")>; -def: InstRW<[SKLWriteResGroup3], (instregex "VPSUBUSWrr")>; - -def SKLWriteResGroup4 : SchedWriteRes<[SKLPort5]> { +def: InstRW<[SKLWriteResGroup3], (instregex "COMP_FST0r")>; +def: InstRW<[SKLWriteResGroup3], (instregex "COM_FST0r")>; +def: InstRW<[SKLWriteResGroup3], (instregex "INSERTPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_MOVD64rr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_MOVD64to64rr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PALIGNR64irr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PSHUFBrr64")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PSHUFWri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKHBWirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKHDQirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKHWDirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKLBWirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKLDQirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MMX_PUNPCKLWDirr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOV64toPQIrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVDDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVDI2PDIrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVHLPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVLHPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVSDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVSHDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVSLDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVUPDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "MOVUPSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PACKSSDWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PACKSSWBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PACKUSDWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PACKUSWBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PALIGNRrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PBLENDWrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXBDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXBQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVSXWQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXBDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXBQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PMOVZXWQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSHUFBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSHUFDri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSHUFHWri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSHUFLWri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSLLDQri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PSRLDQri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKHBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKHDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKHQDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKHWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKLBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKLDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKLQDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "PUNPCKLWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "SHUFPDrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "SHUFPSrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UCOM_FPr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UCOM_Fr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UNPCKHPDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UNPCKHPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UNPCKLPDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "UNPCKLPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VBROADCASTSSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VINSERTPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOV64toPQIrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVDDUPYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVDDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVDI2PDIrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVHLPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVLHPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSHDUPYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSHDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSLDUPYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVSLDUPrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPDYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPSYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VMOVUPSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSDWYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSDWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSWBYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKSSWBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKUSDWYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKUSDWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKUSWBYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPACKUSWBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPALIGNRYrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPALIGNRrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPBLENDWYrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPBLENDWrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPBROADCASTDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPBROADCASTQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPDYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPDYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPDri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPSYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPSYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPSri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPERMILPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXBDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXBQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVSXWQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXBDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXBQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPMOVZXWQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFBYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFBrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFDYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFDri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFHWYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFHWri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFLWYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSHUFLWri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLDQYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSLLDQri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLDQYri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPSRLDQri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHBWYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHDQYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHQDQYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHQDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHWDYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKHWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLBWYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLBWrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLDQYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLQDQYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLQDQrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLWDYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VPUNPCKLWDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VSHUFPDYrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VSHUFPDrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VSHUFPSYrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VSHUFPSrri")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKHPDYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKHPDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKHPSYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKHPSrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKLPDYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKLPDrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKLPSYrr")>; +def: InstRW<[SKLWriteResGroup3], (instregex "VUNPCKLPSrr")>; + +def SKLWriteResGroup4 : SchedWriteRes<[SKLPort6]> { let Latency = 1; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup4], (instregex "COMP_FST0r")>; -def: InstRW<[SKLWriteResGroup4], (instregex "COM_FST0r")>; -def: InstRW<[SKLWriteResGroup4], (instregex "FINCSTP")>; -def: InstRW<[SKLWriteResGroup4], (instregex "FNOP")>; -def: InstRW<[SKLWriteResGroup4], (instregex "INSERTPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_MOVD64rr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_MOVD64to64rr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_MOVQ64rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PABSBrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PABSDrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PABSWrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PADDBirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PADDDirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PADDQirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PADDWirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PALIGNR64irr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PANDNirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PANDirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PORirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSHUFBrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSHUFWri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSIGNBrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSIGNDrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSIGNWrr64")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSUBBirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSUBDirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSUBQirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PSUBWirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKHBWirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKHDQirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKHWDirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKLBWirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKLDQirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PUNPCKLWDirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MMX_PXORirr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOV64toPQIrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVDDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVDI2PDIrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVHLPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVLHPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVSDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVSHDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVSLDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVUPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "MOVUPSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PACKSSDWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PACKSSWBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PACKUSDWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PACKUSWBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PALIGNRrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PBLENDWrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXBDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXBQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVSXWQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXBDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXBQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PMOVZXWQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSHUFBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSHUFDri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSHUFHWri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSHUFLWri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSLLDQri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PSRLDQri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKHBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKHDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKHQDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKHWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKLBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKLDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKLQDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "PUNPCKLWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "SHUFPDrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "SHUFPSrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UCOM_FPr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UCOM_Fr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UNPCKHPDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UNPCKHPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UNPCKLPDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "UNPCKLPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VBROADCASTSSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VINSERTPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOV64toPQIrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVDDUPYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVDDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVDI2PDIrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVHLPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVLHPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVSDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVSHDUPYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVSHDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVSLDUPYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVSLDUPrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVUPDYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVUPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVUPSYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VMOVUPSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKSSDWYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKSSDWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKSSWBYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKSSWBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKUSDWYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKUSDWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKUSWBYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPACKUSWBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPALIGNRYrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPALIGNRrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPBLENDWYrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPBLENDWrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPBROADCASTDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPBROADCASTQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPDYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPDYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPDri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPSYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPSYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPSri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPERMILPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXBDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXBQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVSXWQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXBDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXBQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPMOVZXWQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFBYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFBrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFDYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFDri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFHWYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFHWri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFLWYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSHUFLWri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSLLDQYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSLLDQri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSRLDQYri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPSRLDQri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHBWYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHDQYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHQDQYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHQDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHWDYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKHWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLBWYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLBWrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLDQYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLQDQYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLQDQrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLWDYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VPUNPCKLWDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VSHUFPDYrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VSHUFPDrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VSHUFPSYrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VSHUFPSrri")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKHPDYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKHPDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKHPSYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKHPSrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKLPDYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKLPDrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKLPSYrr")>; -def: InstRW<[SKLWriteResGroup4], (instregex "VUNPCKLPSrr")>; - -def SKLWriteResGroup5 : SchedWriteRes<[SKLPort6]> { +def: InstRW<[SKLWriteResGroup4], (instregex "JMP(16|32|64)r")>; + +def SKLWriteResGroup5 : SchedWriteRes<[SKLPort01]> { let Latency = 1; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup5], (instregex "ADC(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADC(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADC8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADCX32rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADCX64rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADOX32rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "ADOX64rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BT(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BT(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTC(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTC(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTR(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTR(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTS(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "BTS(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CDQ")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CLAC")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVAE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVB(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVG(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVGE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVL(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVLE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVNE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVNO(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVNP(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVNS(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVO(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVP(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CMOVS(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "CQO")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JAE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JAE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JA_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JA_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JBE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JBE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JB_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JB_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JGE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JGE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JG_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JG_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JLE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JLE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JL_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JL_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JMP(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JMP_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JMP_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNE_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNE_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNO_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNO_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNP_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNP_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNS_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JNS_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JO_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JO_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JP_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JP_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JS_1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "JS_4")>; -def: InstRW<[SKLWriteResGroup5], (instregex "RORX32ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "RORX64ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SAR(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SAR(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SAR8r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SAR8ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SARX32rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SARX64rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SBB(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SBB(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SBB8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETAEr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETBr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETEr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETGEr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETGr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETLEr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETLr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETNEr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETNOr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETNPr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETNSr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETOr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETPr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SETSr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHL(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHL(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHL8r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHL8ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHLX32rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHLX64rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHR(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHR(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHR8r1")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHR8ri")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHRX32rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "SHRX64rr")>; -def: InstRW<[SKLWriteResGroup5], (instregex "STAC")>; - -def SKLWriteResGroup6 : SchedWriteRes<[SKLPort15]> { +def: InstRW<[SKLWriteResGroup5], (instregex "PABSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PABSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PABSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PADDSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PADDSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PADDUSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PADDUSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PAVGBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PAVGWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPEQBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPEQDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPEQQrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPEQWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPGTBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPGTDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PCMPGTWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXUBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXUDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMAXUWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINUBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINUDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PMINUWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSIGNBrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSIGNDrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSIGNWrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSLLDri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSLLQri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSLLWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSRADri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSRAWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSRLDri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSRLQri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSRLWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSUBSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSUBSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSUBUSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "PSUBUSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPABSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDUSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDUSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDUSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPADDUSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPAVGBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPAVGBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPAVGWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPAVGWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQQYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQQrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPEQWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPCMPGTWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMAXUWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPMINUWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNBYrr256")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNBrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNDYrr256")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNDrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNWYrr256")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSIGNWrr128")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLDYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLDri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLQYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLQri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLVDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLVDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLVQYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLVQrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLWYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSLLWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRADYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRADri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRAVDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRAVDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRAWYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRAWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLDYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLDri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLQYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLQri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLVDYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLVDrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLVQYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLVQrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLWYri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSRLWri")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBSWrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBUSBYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBUSBrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBUSWYrr")>; +def: InstRW<[SKLWriteResGroup5], (instregex "VPSUBUSWrr")>; + +def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> { let Latency = 1; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup6], (instregex "ANDN32rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ANDN64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ANDNPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ANDNPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ANDPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ANDPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLENDPDrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLENDPSrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSI32rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSI64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSMSK32rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSMSK64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSR32rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BLSR64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BZHI32rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "BZHI64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "LEA(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVAPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVAPSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVDQArr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVDQUrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVPQI2QIrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "MOVSSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ORPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "ORPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PADDBrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PADDDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PADDQrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PADDWrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PANDNrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PANDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PORrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PSUBBrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PSUBDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PSUBQrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PSUBWrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "PXORrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDNPDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDNPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDNPSYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDNPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDPDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDPSYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VANDPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VBLENDPDYrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VBLENDPDrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VBLENDPSYrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VBLENDPSrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVAPDYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVAPDrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVAPSYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVAPSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVDQAYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVDQArr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVDQUYrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVDQUrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVPQI2QIrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVSSrr(_REV?)")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VORPDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VORPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VORPSYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VORPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDBYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDBrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDQYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDQrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDWYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPADDWrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPANDNYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPANDNrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPANDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPANDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPBLENDDYrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPBLENDDrri")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPORYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPORrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBBYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBBrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBQYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBQrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBWYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPSUBWrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPXORYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VPXORrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VXORPDYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VXORPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VXORPSYrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "VXORPSrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "XORPDrr")>; -def: InstRW<[SKLWriteResGroup6], (instregex "XORPSrr")>; - -def SKLWriteResGroup7 : SchedWriteRes<[SKLPort0156]> { +def: InstRW<[SKLWriteResGroup6], (instregex "FINCSTP")>; +def: InstRW<[SKLWriteResGroup6], (instregex "FNOP")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PABSBrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PABSDrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PABSWrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PADDBirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PADDDirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PADDQirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PADDWirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PANDNirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PANDirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PORirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSIGNBrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSIGNDrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSIGNWrr64")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSUBBirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSUBDirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSUBQirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PSUBWirr")>; +def: InstRW<[SKLWriteResGroup6], (instregex "MMX_PXORirr")>; + +def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> { let Latency = 1; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup7], (instregex "ADD(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADD(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADD8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADD8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "ADD8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "AND(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "AND(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "AND8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "AND8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "AND8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CBW")>; -//def: InstRW<[SKLWriteResGroup7], (instregex "CDQE")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CLC")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMC")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMP(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMP(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMP8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMP8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CMP8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "CWDE")>; -def: InstRW<[SKLWriteResGroup7], (instregex "DEC(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "DEC8r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "INC(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "INC8r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "LAHF")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOV(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOV8ri(_alt?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOV8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOVSX(16|32|64)rr16")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOVSX(16|32|64)rr32")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOVSX(16|32|64)rr8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOVZX(16|32|64)rr16")>; -def: InstRW<[SKLWriteResGroup7], (instregex "MOVZX(16|32|64)rr8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "NEG(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "NEG8r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "NOOP")>; -def: InstRW<[SKLWriteResGroup7], (instregex "NOT(16|32|64)r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "NOT8r")>; -def: InstRW<[SKLWriteResGroup7], (instregex "OR(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "OR(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "OR8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "OR8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "OR8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SAHF")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SGDT64m")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SIDT64m")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SLDT64m")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SMSW16m")>; -def: InstRW<[SKLWriteResGroup7], (instregex "STC")>; -def: InstRW<[SKLWriteResGroup7], (instregex "STRm")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SUB(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SUB(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SUB8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SUB8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SUB8rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "SYSCALL")>; -def: InstRW<[SKLWriteResGroup7], (instregex "TEST(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "TEST8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "TEST8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "TEST8rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XCHG(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XOR(16|32|64)ri8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XOR(16|32|64)rr(_REV?)")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XOR8i8")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XOR8ri")>; -def: InstRW<[SKLWriteResGroup7], (instregex "XOR8rr(_REV?)")>; - -def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0,SKLPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PADDSBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PADDSWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PADDUSBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PADDUSWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PAVGBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PAVGWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPEQBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPEQDirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPEQWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPGTBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPGTDirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PCMPGTWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PMAXSWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PMAXUBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PMINSWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PMINUBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSLLDrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSLLQrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSLLWrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSRADrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSRAWrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSRLDrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSRLQrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSRLWrm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSUBSBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSUBSWirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSUBUSBirm")>; -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PSUBUSWirm")>; - -def SKLWriteResGroup13 : SchedWriteRes<[SKLPort0,SKLPort237]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MASKMOVQ64")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VMASKMOVDQU")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VMASKMOVPDYmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VMASKMOVPDmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VMASKMOVPSYmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VMASKMOVPSmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VPMASKMOVDYmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VPMASKMOVDmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VPMASKMOVQYmr")>; -def: InstRW<[SKLWriteResGroup13], (instregex "VPMASKMOVQmr")>; - -def SKLWriteResGroup14 : SchedWriteRes<[SKLPort5,SKLPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup14], (instregex "FCOM32m")>; -def: InstRW<[SKLWriteResGroup14], (instregex "FCOM64m")>; -def: InstRW<[SKLWriteResGroup14], (instregex "FCOMP32m")>; -def: InstRW<[SKLWriteResGroup14], (instregex "FCOMP64m")>; -def: InstRW<[SKLWriteResGroup14], (instregex "INSERTPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PALIGNR64irm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PINSRWirmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PSHUFBrm64")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PSHUFWmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKHBWirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKHDQirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKHWDirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKLBWirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKLDQirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MMX_PUNPCKLWDirm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MOVHPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MOVHPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MOVLPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "MOVLPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PACKSSDWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PACKSSWBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PACKUSDWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PACKUSWBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PALIGNRrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PBLENDWrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PINSRBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PINSRDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PINSRQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PINSRWrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXBDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXBQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVSXWQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXBDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXBQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PMOVZXWQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PSHUFBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PSHUFDmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PSHUFHWmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PSHUFLWmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKHBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKHDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKHQDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKHWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKLBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKLDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKLQDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "PUNPCKLWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "SHUFPDrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "SHUFPSrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "UNPCKHPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "UNPCKHPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "UNPCKLPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "UNPCKLPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VINSERTPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VMOVHPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VMOVHPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VMOVLPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VMOVLPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKSSDWYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKSSDWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKSSWBYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKSSWBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKUSDWYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKUSDWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKUSWBYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPACKUSWBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPALIGNRYrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPALIGNRrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBLENDWYrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBLENDWrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBROADCASTBYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBROADCASTBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBROADCASTWYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPBROADCASTWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPDYmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPDYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPDmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPSYmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPSYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPSmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPERMILPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPINSRBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPINSRDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPINSRQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPINSRWrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXBDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXBQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVSXWQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXBDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXBQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPMOVZXWQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFBYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFBrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFDYmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFDmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFHWYmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFHWmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFLWYmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPSHUFLWmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHBWYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHDQYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHQDQYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHQDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHWDYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKHWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLBWYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLBWrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLDQYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLQDQYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLQDQrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLWDYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VPUNPCKLWDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VSHUFPDYrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VSHUFPDrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VSHUFPSYrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VSHUFPSrmi")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKHPDYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKHPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKHPSYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKHPSrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKLPDYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKLPDrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKLPSYrm")>; -def: InstRW<[SKLWriteResGroup14], (instregex "VUNPCKLPSrm")>; - -def SKLWriteResGroup15 : SchedWriteRes<[SKLPort6,SKLPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup15], (instregex "FARJMP64")>; -def: InstRW<[SKLWriteResGroup15], (instregex "JMP(16|32|64)m")>; - -def SKLWriteResGroup16 : SchedWriteRes<[SKLPort01,SKLPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup16], (instregex "PABSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PABSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PABSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PADDSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PADDSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PADDUSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PADDUSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PAVGBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PAVGWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPEQBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPEQDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPEQQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPEQWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPGTBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPGTDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PCMPGTWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXUBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXUDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMAXUWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINUBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINUDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PMINUWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSIGNBrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSIGNDrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSIGNWrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSLLDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSLLQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSLLWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSRADrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSRAWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSRLDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSRLQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSRLWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSUBSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSUBSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSUBUSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "PSUBUSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPABSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDUSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDUSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDUSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPADDUSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPAVGBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPAVGBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPAVGWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPAVGWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQQYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPEQWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPCMPGTWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMAXUWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPMINUWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNBYrm256")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNBrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNDYrm256")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNDrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNWYrm256")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSIGNWrm128")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLQYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLVDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLVDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLVQYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLVQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSLLWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRADYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRADrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRAVDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRAVDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRAWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRAWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLQYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLVDYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLVDrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLVQYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLVQrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSRLWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBSWrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBUSBYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBUSBrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBUSWYrm")>; -def: InstRW<[SKLWriteResGroup16], (instregex "VPSUBUSWrm")>; - -def SKLWriteResGroup17 : SchedWriteRes<[SKLPort23,SKLPort05]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PABSBrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PABSDrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PABSWrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PADDBirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PADDDirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PADDQirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PADDWirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PANDNirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PANDirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PORirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSIGNBrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSIGNDrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSIGNWrm64")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSUBBirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSUBDirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSUBQirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PSUBWirm")>; -def: InstRW<[SKLWriteResGroup17], (instregex "MMX_PXORirm")>; - -def SKLWriteResGroup18 : SchedWriteRes<[SKLPort23,SKLPort06]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup18], (instregex "ADC(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "ADC8rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "ADCX32rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "ADCX64rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "ADOX32rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "ADOX64rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "BT(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVAE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVB(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVG(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVGE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVL(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVLE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVNE(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVNO(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVNP(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVNS(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVO(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVP(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "CMOVS(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "RORX32mi")>; -def: InstRW<[SKLWriteResGroup18], (instregex "RORX64mi")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SARX32rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SARX64rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SBB(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SBB8rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SHLX32rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SHLX64rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SHRX32rm")>; -def: InstRW<[SKLWriteResGroup18], (instregex "SHRX64rm")>; - -def SKLWriteResGroup19 : SchedWriteRes<[SKLPort23,SKLPort15]> { +def: InstRW<[SKLWriteResGroup7], (instregex "ADC(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADC(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADC8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADCX32rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADCX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADOX32rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "ADOX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTC(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTC(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTR(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTR(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTS(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "BTS(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CDQ")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CLAC")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVAE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVB(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVG(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVGE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVL(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVLE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNO(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNP(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVNS(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVO(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVP(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CMOVS(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "CQO")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JAE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JAE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JA_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JA_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JBE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JBE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JB_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JB_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JGE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JGE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JG_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JG_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JLE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JLE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JL_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JL_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JMP_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JMP_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNE_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNE_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNO_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNO_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNP_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNP_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNS_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JNS_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JO_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JO_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JP_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JP_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JS_1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "JS_4")>; +def: InstRW<[SKLWriteResGroup7], (instregex "RORX32ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "RORX64ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SAR(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SAR(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SAR8r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SAR8ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SARX32rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SARX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SBB(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SBB(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SBB8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETAEr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETBr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETEr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETGEr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETGr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETLEr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETLr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETNEr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETNOr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETNPr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETNSr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETOr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETPr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SETSr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHL(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHL(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHL8r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHL8ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHLX32rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHLX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHR(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHR(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHR8r1")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHR8ri")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHRX32rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "SHRX64rr")>; +def: InstRW<[SKLWriteResGroup7], (instregex "STAC")>; + +def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> { let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup19], (instregex "ANDN32rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "ANDN64rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSI32rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSI64rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSMSK32rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSMSK64rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSR32rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BLSR64rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BZHI32rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "BZHI64rm")>; -def: InstRW<[SKLWriteResGroup19], (instregex "MOVBE(16|32|64)rm")>; - -def SKLWriteResGroup20 : SchedWriteRes<[SKLPort23,SKLPort015]> { +def: InstRW<[SKLWriteResGroup8], (instregex "ANDN32rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "ANDN64rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSI32rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSI64rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSMSK32rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSMSK64rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSR32rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BLSR64rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BZHI32rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "BZHI64rr")>; +def: InstRW<[SKLWriteResGroup8], (instregex "LEA(16|32|64)r")>; + +def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> { let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup20], (instregex "ANDNPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "ANDNPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "ANDPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "ANDPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "BLENDPDrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "BLENDPSrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "ORPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "ORPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PADDBrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PADDDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PADDQrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PADDWrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PANDNrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PANDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PORrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PSUBBrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PSUBDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PSUBQrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PSUBWrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "PXORrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDNPDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDNPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDNPSYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDNPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDPDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDPSYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VANDPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VBLENDPDYrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VBLENDPDrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VBLENDPSYrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VBLENDPSrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VINSERTF128rm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VINSERTI128rm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VMASKMOVPDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VMASKMOVPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VMASKMOVPSYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VMASKMOVPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VORPDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VORPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VORPSYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VORPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDBYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDBrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDQYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDQrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDWYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPADDWrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPANDNYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPANDNrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPANDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPANDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPBLENDDYrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPBLENDDrmi")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPMASKMOVDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPMASKMOVDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPMASKMOVQYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPMASKMOVQrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPORYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPORrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBBYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBBrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBQYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBQrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBWYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPSUBWrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPXORYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VPXORrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VXORPDYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VXORPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VXORPSYrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "VXORPSrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "XORPDrm")>; -def: InstRW<[SKLWriteResGroup20], (instregex "XORPSrm")>; - -def SKLWriteResGroup21 : SchedWriteRes<[SKLPort23,SKLPort0156]> { +def: InstRW<[SKLWriteResGroup9], (instregex "ANDNPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "ANDNPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "ANDPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "ANDPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "BLENDPDrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "BLENDPSrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVAPDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVAPSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVDQArr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVDQUrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVPQI2QIrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "MOVSSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "ORPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "ORPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PADDBrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PADDDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PADDQrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PADDWrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PANDNrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PANDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PORrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PSUBBrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PSUBDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PSUBQrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PSUBWrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "PXORrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDNPDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDNPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDNPSYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDNPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDPDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDPSYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VANDPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPDYrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPDrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPSYrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VBLENDPSrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPDYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPDrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPSYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVAPSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQAYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQArr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQUYrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVDQUrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVPQI2QIrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVSSrr(_REV?)")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VMOVZPQILo2PQIrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VORPDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VORPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VORPSYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VORPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDBYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDBrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDQYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDQrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDWYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPADDWrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPANDNYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPANDNrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPANDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPANDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPBLENDDYrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPBLENDDrri")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPORYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPORrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBBYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBBrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBQYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBQrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBWYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPSUBWrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPXORYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VPXORrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VXORPDYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VXORPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VXORPSYrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "VXORPSrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "XORPDrr")>; +def: InstRW<[SKLWriteResGroup9], (instregex "XORPSrr")>; + +def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> { let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup21], (instregex "ADD(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "ADD8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "AND(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "AND8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP8mi")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP8mr")>; -def: InstRW<[SKLWriteResGroup21], (instregex "CMP8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "OR(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "OR8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "POP(16|32|64)r(mr?)")>; -def: InstRW<[SKLWriteResGroup21], (instregex "SUB(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "SUB8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "TEST(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "TEST8mi")>; -def: InstRW<[SKLWriteResGroup21], (instregex "TEST8rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "XOR(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup21], (instregex "XOR8rm")>; - -def SKLWriteResGroup22 : SchedWriteRes<[SKLPort237,SKLPort0156]> { +def: InstRW<[SKLWriteResGroup10], (instregex "ADD(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "ADD8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "AND8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CBW")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CLC")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMC")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CMP8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "CWDE")>; +def: InstRW<[SKLWriteResGroup10], (instregex "DEC(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "DEC8r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "INC(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "INC8r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "LAHF")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOV(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOV8ri(_alt?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOV8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOVSX(16|32|64)rr16")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOVSX(16|32|64)rr32")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOVSX(16|32|64)rr8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOVZX(16|32|64)rr16")>; +def: InstRW<[SKLWriteResGroup10], (instregex "MOVZX(16|32|64)rr8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "NEG(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "NEG8r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "NOOP")>; +def: InstRW<[SKLWriteResGroup10], (instregex "NOT(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "NOT8r")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "OR8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SAHF")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SGDT64m")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SIDT64m")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SLDT64m")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SMSW16m")>; +def: InstRW<[SKLWriteResGroup10], (instregex "STC")>; +def: InstRW<[SKLWriteResGroup10], (instregex "STRm")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SUB8rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "SYSCALL")>; +def: InstRW<[SKLWriteResGroup10], (instregex "TEST(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup10], (instregex "TEST8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "TEST8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "TEST8rr")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XCHG(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR(16|32|64)ri8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR8i8")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR8ri")>; +def: InstRW<[SKLWriteResGroup10], (instregex "XOR8rr(_REV?)")>; + +def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> { let Latency = 1; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup22], (instregex "SFENCE")>; - -def SKLWriteResGroup23 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup23], (instregex "EXTRACTPSmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "PEXTRBmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "PEXTRDmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "PEXTRQmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "PEXTRWmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "STMXCSR")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VEXTRACTPSmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VPEXTRBmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VPEXTRDmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VPEXTRQmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VPEXTRWmr")>; -def: InstRW<[SKLWriteResGroup23], (instregex "VSTMXCSR")>; - -def SKLWriteResGroup24 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup24], (instregex "FNSTCW16m")>; - -def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup25], (instregex "SETAEm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETBm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETEm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETGEm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETGm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETLEm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETLm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETNEm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETNOm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETNPm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETNSm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETOm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETPm")>; -def: InstRW<[SKLWriteResGroup25], (instregex "SETSm")>; - -def SKLWriteResGroup26 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort15]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup26], (instregex "MOVBE(16|32|64)mr")>; - -def SKLWriteResGroup27 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup27], (instregex "PUSH(16|32|64)r(mr?)")>; -def: InstRW<[SKLWriteResGroup27], (instregex "PUSH64i8")>; -def: InstRW<[SKLWriteResGroup27], (instregex "STOSB")>; -def: InstRW<[SKLWriteResGroup27], (instregex "STOSL")>; -def: InstRW<[SKLWriteResGroup27], (instregex "STOSQ")>; -def: InstRW<[SKLWriteResGroup27], (instregex "STOSW")>; - -def SKLWriteResGroup28 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { - let Latency = 1; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup28], (instregex "BTC(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup28], (instregex "BTR(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup28], (instregex "BTS(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SAR(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SAR(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SAR8m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SAR8mi")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHL(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHL(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHL8m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHL8mi")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHR(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHR(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHR8m1")>; -def: InstRW<[SKLWriteResGroup28], (instregex "SHR8mi")>; - -def SKLWriteResGroup29 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 1; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup29], (instregex "ADD(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup29], (instregex "ADD(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "ADD8mi")>; -def: InstRW<[SKLWriteResGroup29], (instregex "ADD8mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "AND(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup29], (instregex "AND(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "AND8mi")>; -def: InstRW<[SKLWriteResGroup29], (instregex "AND8mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "DEC(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "DEC8m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "INC(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "INC8m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "NEG(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "NEG8m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "NOT(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "NOT8m")>; -def: InstRW<[SKLWriteResGroup29], (instregex "OR(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup29], (instregex "OR(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "OR8mi")>; -def: InstRW<[SKLWriteResGroup29], (instregex "OR8mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "POP(16|32|64)rmm")>; -def: InstRW<[SKLWriteResGroup29], (instregex "PUSH(16|32|64)rmm")>; -def: InstRW<[SKLWriteResGroup29], (instregex "SUB(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup29], (instregex "SUB(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "SUB8mi")>; -def: InstRW<[SKLWriteResGroup29], (instregex "SUB8mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "XOR(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup29], (instregex "XOR(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup29], (instregex "XOR8mi")>; -def: InstRW<[SKLWriteResGroup29], (instregex "XOR8mr")>; - -def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0]> { +def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MMX_MOVD64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MMX_MOVNTQmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MMX_MOVQ64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOV(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOV8mi")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOV8mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVAPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVAPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVDQAmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVDQUmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVHPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVHPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVLPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVLPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTDQmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTI_64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTImr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVNTPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVPDI2DImr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVPQI2QImr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVPQIto64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVSSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVUPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "MOVUPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP32m")>; +def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP64m")>; +def: InstRW<[SKLWriteResGroup11], (instregex "ST_FP80m")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VEXTRACTF128mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VEXTRACTI128mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVAPDYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVAPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVAPSYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVAPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVDQAYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVDQAmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVDQUYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVDQUmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVHPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVHPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVLPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVLPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTDQYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTDQmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTPDYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTPSYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVNTPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVPDI2DImr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVPQI2QImr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVPQIto64mr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVSDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVSSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVUPDYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVUPDmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVUPSYmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMOVUPSmr")>; +def: InstRW<[SKLWriteResGroup11], (instregex "VMPTRSTm")>; + +def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> { let Latency = 2; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup31], (instregex "COMISDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "COMISSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MMX_MOVD64grr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MMX_PMOVMSKBrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MOVMSKPDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MOVMSKPSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MOVPDI2DIrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "MOVPQIto64rr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "PMOVMSKBrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "UCOMISDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "UCOMISSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VCOMISDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VCOMISSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVMSKPDYrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVMSKPDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVMSKPSYrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVMSKPSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVPDI2DIrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VMOVPQIto64rr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VPMOVMSKBYrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VPMOVMSKBrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VTESTPDYrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VTESTPDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VTESTPSYrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VTESTPSrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VUCOMISDrr")>; -def: InstRW<[SKLWriteResGroup31], (instregex "VUCOMISSrr")>; - -def SKLWriteResGroup32 : SchedWriteRes<[SKLPort5]> { +def: InstRW<[SKLWriteResGroup12], (instregex "COMISDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "COMISSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64grr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MMX_PMOVMSKBrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MOVMSKPDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MOVMSKPSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MOVPDI2DIrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "MOVPQIto64rr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "PMOVMSKBrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "UCOMISDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "UCOMISSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VCOMISDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VCOMISSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVMSKPDYrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVMSKPDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVMSKPSYrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVMSKPSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVPDI2DIrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VMOVPQIto64rr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VPMOVMSKBYrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VPMOVMSKBrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VTESTPDYrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VTESTPDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VTESTPSYrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VTESTPSrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VUCOMISDrr")>; +def: InstRW<[SKLWriteResGroup12], (instregex "VUCOMISSrr")>; + +def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup32], (instregex "MMX_MOVQ2DQrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "MMX_PINSRWirri")>; -def: InstRW<[SKLWriteResGroup32], (instregex "PINSRBrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "PINSRDrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "PINSRQrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "PINSRWrri")>; -def: InstRW<[SKLWriteResGroup32], (instregex "VPINSRBrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "VPINSRDrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "VPINSRQrr")>; -def: InstRW<[SKLWriteResGroup32], (instregex "VPINSRWrri")>; - -def SKLWriteResGroup33 : SchedWriteRes<[SKLPort05]> { +def: InstRW<[SKLWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "MMX_PINSRWirri")>; +def: InstRW<[SKLWriteResGroup13], (instregex "PINSRBrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "PINSRDrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "PINSRQrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "PINSRWrri")>; +def: InstRW<[SKLWriteResGroup13], (instregex "VPINSRBrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "VPINSRDrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "VPINSRQrr")>; +def: InstRW<[SKLWriteResGroup13], (instregex "VPINSRWrri")>; + +def SKLWriteResGroup14 : SchedWriteRes<[SKLPort05]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup33], (instregex "FDECSTP")>; -def: InstRW<[SKLWriteResGroup33], (instregex "MMX_MOVDQ2Qrr")>; +def: InstRW<[SKLWriteResGroup14], (instregex "FDECSTP")>; +def: InstRW<[SKLWriteResGroup14], (instregex "MMX_MOVDQ2Qrr")>; -def SKLWriteResGroup34 : SchedWriteRes<[SKLPort06]> { +def SKLWriteResGroup15 : SchedWriteRes<[SKLPort06]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup34], (instregex "CMOVA(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup34], (instregex "CMOVBE(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROL(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROL(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROL8r1")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROL8ri")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROR(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROR(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROR8r1")>; -def: InstRW<[SKLWriteResGroup34], (instregex "ROR8ri")>; -def: InstRW<[SKLWriteResGroup34], (instregex "SETAr")>; -def: InstRW<[SKLWriteResGroup34], (instregex "SETBEr")>; - -def SKLWriteResGroup35 : SchedWriteRes<[SKLPort015]> { +def: InstRW<[SKLWriteResGroup15], (instregex "CMOVA(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup15], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROL(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROL(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROL8r1")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROL8ri")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROR(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROR(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROR8r1")>; +def: InstRW<[SKLWriteResGroup15], (instregex "ROR8ri")>; +def: InstRW<[SKLWriteResGroup15], (instregex "SETAr")>; +def: InstRW<[SKLWriteResGroup15], (instregex "SETBEr")>; + +def SKLWriteResGroup16 : SchedWriteRes<[SKLPort015]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup35], (instregex "BLENDVPDrr0")>; -def: InstRW<[SKLWriteResGroup35], (instregex "BLENDVPSrr0")>; -def: InstRW<[SKLWriteResGroup35], (instregex "PBLENDVBrr0")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VBLENDVPDYrr")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VBLENDVPDrr")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VBLENDVPSYrr")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VBLENDVPSrr")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VPBLENDVBYrr")>; -def: InstRW<[SKLWriteResGroup35], (instregex "VPBLENDVBrr")>; - -def SKLWriteResGroup36 : SchedWriteRes<[SKLPort0156]> { +def: InstRW<[SKLWriteResGroup16], (instregex "BLENDVPDrr0")>; +def: InstRW<[SKLWriteResGroup16], (instregex "BLENDVPSrr0")>; +def: InstRW<[SKLWriteResGroup16], (instregex "PBLENDVBrr0")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VBLENDVPDYrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VBLENDVPDrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VBLENDVPSYrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VBLENDVPSrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VPBLENDVBYrr")>; +def: InstRW<[SKLWriteResGroup16], (instregex "VPBLENDVBrr")>; + +def SKLWriteResGroup17 : SchedWriteRes<[SKLPort0156]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup36], (instregex "LFENCE")>; -def: InstRW<[SKLWriteResGroup36], (instregex "WAIT")>; -def: InstRW<[SKLWriteResGroup36], (instregex "XGETBV")>; +def: InstRW<[SKLWriteResGroup17], (instregex "LFENCE")>; +def: InstRW<[SKLWriteResGroup17], (instregex "WAIT")>; +def: InstRW<[SKLWriteResGroup17], (instregex "XGETBV")>; -def SKLWriteResGroup37 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup18 : SchedWriteRes<[SKLPort0,SKLPort237]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup37], (instregex "COMISDrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "COMISSrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "UCOMISDrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "UCOMISSrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VCOMISDrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VCOMISSrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VTESTPDYrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VTESTPDrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VTESTPSYrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VTESTPSrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VUCOMISDrm")>; -def: InstRW<[SKLWriteResGroup37], (instregex "VUCOMISSrm")>; - -def SKLWriteResGroup38 : SchedWriteRes<[SKLPort5,SKLPort01]> { +def: InstRW<[SKLWriteResGroup18], (instregex "MMX_MASKMOVQ64")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVDQU")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPDYmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPDmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPSYmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPSmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VPMASKMOVDYmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VPMASKMOVDmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VPMASKMOVQYmr")>; +def: InstRW<[SKLWriteResGroup18], (instregex "VPMASKMOVQmr")>; + +def SKLWriteResGroup19 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup38], (instregex "PSLLDrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSLLQrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSLLWrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSRADrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSRAWrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSRLDrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSRLQrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "PSRLWrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSLLDrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSLLQrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSLLWrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSRADrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSRAWrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSRLDrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSRLQrr")>; -def: InstRW<[SKLWriteResGroup38], (instregex "VPSRLWrr")>; - -def SKLWriteResGroup39 : SchedWriteRes<[SKLPort6,SKLPort0156]> { +def: InstRW<[SKLWriteResGroup19], (instregex "PSLLDrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSLLQrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSLLWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSRADrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSRAWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSRLDrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSRLQrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "PSRLWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSLLDrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSLLQrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSLLWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSRADrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSRAWrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSRLDrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSRLQrr")>; +def: InstRW<[SKLWriteResGroup19], (instregex "VPSRLWrr")>; + +def SKLWriteResGroup20 : SchedWriteRes<[SKLPort6,SKLPort0156]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup39], (instregex "CLFLUSH")>; +def: InstRW<[SKLWriteResGroup20], (instregex "CLFLUSH")>; -def SKLWriteResGroup40 : SchedWriteRes<[SKLPort06,SKLPort15]> { +def SKLWriteResGroup21 : SchedWriteRes<[SKLPort237,SKLPort0156]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup40], (instregex "BEXTR32rr")>; -def: InstRW<[SKLWriteResGroup40], (instregex "BEXTR64rr")>; -def: InstRW<[SKLWriteResGroup40], (instregex "BSWAP(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup21], (instregex "SFENCE")>; -def SKLWriteResGroup41 : SchedWriteRes<[SKLPort06,SKLPort0156]> { +def SKLWriteResGroup22 : SchedWriteRes<[SKLPort06,SKLPort15]> { let Latency = 2; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup41], (instregex "ADC8i8")>; -def: InstRW<[SKLWriteResGroup41], (instregex "ADC8ri")>; -def: InstRW<[SKLWriteResGroup41], (instregex "CWD")>; -def: InstRW<[SKLWriteResGroup41], (instregex "JRCXZ")>; -def: InstRW<[SKLWriteResGroup41], (instregex "SBB8i8")>; -def: InstRW<[SKLWriteResGroup41], (instregex "SBB8ri")>; - -def SKLWriteResGroup42 : SchedWriteRes<[SKLPort5,SKLPort23]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SKLWriteResGroup42], (instregex "MMX_PACKSSDWirm")>; -def: InstRW<[SKLWriteResGroup42], (instregex "MMX_PACKSSWBirm")>; -def: InstRW<[SKLWriteResGroup42], (instregex "MMX_PACKUSWBirm")>; - -def SKLWriteResGroup43 : SchedWriteRes<[SKLPort23,SKLPort06]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup43], (instregex "CMOVA(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup43], (instregex "CMOVBE(16|32|64)rm")>; - -def SKLWriteResGroup44 : SchedWriteRes<[SKLPort23,SKLPort015]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup44], (instregex "BLENDVPDrm0")>; -def: InstRW<[SKLWriteResGroup44], (instregex "BLENDVPSrm0")>; -def: InstRW<[SKLWriteResGroup44], (instregex "PBLENDVBrm0")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VBLENDVPDYrm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VBLENDVPDrm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VBLENDVPSYrm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VBLENDVPSrm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VPBLENDVBYrm")>; -def: InstRW<[SKLWriteResGroup44], (instregex "VPBLENDVBrm")>; - -def SKLWriteResGroup45 : SchedWriteRes<[SKLPort23,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SKLWriteResGroup45], (instregex "LEAVE64")>; -def: InstRW<[SKLWriteResGroup45], (instregex "SCASB")>; -def: InstRW<[SKLWriteResGroup45], (instregex "SCASL")>; -def: InstRW<[SKLWriteResGroup45], (instregex "SCASQ")>; -def: InstRW<[SKLWriteResGroup45], (instregex "SCASW")>; +def: InstRW<[SKLWriteResGroup22], (instregex "BEXTR32rr")>; +def: InstRW<[SKLWriteResGroup22], (instregex "BEXTR64rr")>; +def: InstRW<[SKLWriteResGroup22], (instregex "BSWAP(16|32|64)r")>; -def SKLWriteResGroup46 : SchedWriteRes<[SKLPort237,SKLPort0156]> { +def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup46], (instregex "MFENCE")>; +def: InstRW<[SKLWriteResGroup23], (instregex "ADC8i8")>; +def: InstRW<[SKLWriteResGroup23], (instregex "ADC8ri")>; +def: InstRW<[SKLWriteResGroup23], (instregex "CWD")>; +def: InstRW<[SKLWriteResGroup23], (instregex "JRCXZ")>; +def: InstRW<[SKLWriteResGroup23], (instregex "SBB8i8")>; +def: InstRW<[SKLWriteResGroup23], (instregex "SBB8ri")>; -def SKLWriteResGroup47 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> { +def SKLWriteResGroup24 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup47], (instregex "FNSTSWm")>; - -def SKLWriteResGroup48 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05]> { +def: InstRW<[SKLWriteResGroup24], (instregex "EXTRACTPSmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "PEXTRBmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "PEXTRDmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "PEXTRQmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "PEXTRWmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "STMXCSR")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VEXTRACTPSmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VPEXTRBmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VPEXTRDmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VPEXTRQmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VPEXTRWmr")>; +def: InstRW<[SKLWriteResGroup24], (instregex "VSTMXCSR")>; + +def SKLWriteResGroup25 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup48], (instregex "FLDCW16m")>; +def: InstRW<[SKLWriteResGroup25], (instregex "FNSTCW16m")>; -def SKLWriteResGroup49 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort0156]> { +def SKLWriteResGroup26 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup49], (instregex "LDMXCSR")>; -def: InstRW<[SKLWriteResGroup49], (instregex "VLDMXCSR")>; - -def SKLWriteResGroup51 : SchedWriteRes<[SKLPort6,SKLPort23,SKLPort0156]> { +def: InstRW<[SKLWriteResGroup26], (instregex "SETAEm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETBm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETEm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETGEm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETGm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETLEm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETLm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETNEm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETNOm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETNPm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETNSm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETOm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETPm")>; +def: InstRW<[SKLWriteResGroup26], (instregex "SETSm")>; + +def SKLWriteResGroup27 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort15]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup51], (instregex "LRETQ")>; -def: InstRW<[SKLWriteResGroup51], (instregex "RETQ")>; +def: InstRW<[SKLWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>; -def SKLWriteResGroup52 : SchedWriteRes<[SKLPort23,SKLPort06,SKLPort15]> { +def SKLWriteResGroup28 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup52], (instregex "BEXTR32rm")>; -def: InstRW<[SKLWriteResGroup52], (instregex "BEXTR64rm")>; - -def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { - let Latency = 2; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SKLWriteResGroup53], (instregex "SETAm")>; -def: InstRW<[SKLWriteResGroup53], (instregex "SETBEm")>; - -def SKLWriteResGroup54 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup54], (instregex "CALL(16|32|64)r")>; - -def SKLWriteResGroup55 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup55], (instregex "CALL64pcrel32")>; - -def SKLWriteResGroup56 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { - let Latency = 2; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[SKLWriteResGroup56], (instregex "ROL(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROL(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROL8m1")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROL8mi")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROR(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROR(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROR8m1")>; -def: InstRW<[SKLWriteResGroup56], (instregex "ROR8mi")>; - -def SKLWriteResGroup57 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[SKLWriteResGroup57], (instregex "XADD(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup57], (instregex "XADD8rm")>; - -def SKLWriteResGroup58 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 2; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup58], (instregex "CALL(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup58], (instregex "FARCALL64")>; +def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; +def: InstRW<[SKLWriteResGroup28], (instregex "PUSH64i8")>; +def: InstRW<[SKLWriteResGroup28], (instregex "STOSB")>; +def: InstRW<[SKLWriteResGroup28], (instregex "STOSL")>; +def: InstRW<[SKLWriteResGroup28], (instregex "STOSQ")>; +def: InstRW<[SKLWriteResGroup28], (instregex "STOSW")>; -def SKLWriteResGroup60 : SchedWriteRes<[SKLPort1]> { +def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> { let Latency = 3; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup60], (instregex "BSF(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "BSR(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "IMUL64rr(i8?)")>; -def: InstRW<[SKLWriteResGroup60], (instregex "IMUL8r")>; -def: InstRW<[SKLWriteResGroup60], (instregex "LZCNT(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "MUL8r")>; -def: InstRW<[SKLWriteResGroup60], (instregex "PDEP32rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "PDEP64rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "PEXT32rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "PEXT64rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "POPCNT(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup60], (instregex "SHLD(16|32|64)rri8")>; -def: InstRW<[SKLWriteResGroup60], (instregex "SHRD(16|32|64)rri8")>; -def: InstRW<[SKLWriteResGroup60], (instregex "TZCNT(16|32|64)rr")>; - -def SKLWriteResGroup60_16 : SchedWriteRes<[SKLPort1, SKLPort0156]> { +def: InstRW<[SKLWriteResGroup29], (instregex "BSF(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "BSR(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "IMUL64rr(i8?)")>; +def: InstRW<[SKLWriteResGroup29], (instregex "IMUL8r")>; +def: InstRW<[SKLWriteResGroup29], (instregex "LZCNT(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "MUL8r")>; +def: InstRW<[SKLWriteResGroup29], (instregex "PDEP32rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "PDEP64rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "PEXT32rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "PEXT64rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "POPCNT(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup29], (instregex "SHLD(16|32|64)rri8")>; +def: InstRW<[SKLWriteResGroup29], (instregex "SHRD(16|32|64)rri8")>; +def: InstRW<[SKLWriteResGroup29], (instregex "TZCNT(16|32|64)rr")>; + +def SKLWriteResGroup29_16 : SchedWriteRes<[SKLPort1, SKLPort0156]> { let Latency = 3; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup60_16], (instregex "IMUL16rr(i8?)")>; +def: InstRW<[SKLWriteResGroup29_16], (instregex "IMUL16rr(i8?)")>; -def SKLWriteResGroup60_32 : SchedWriteRes<[SKLPort1]> { +def SKLWriteResGroup29_32 : SchedWriteRes<[SKLPort1]> { let Latency = 3; let NumMicroOps = 1; } -def: InstRW<[SKLWriteResGroup60_32], (instregex "IMUL32rr(i8?)")>; +def: InstRW<[SKLWriteResGroup29_32], (instregex "IMUL32rr(i8?)")>; -def SKLWriteResGroup61 : SchedWriteRes<[SKLPort5]> { +def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> { let Latency = 3; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup61], (instregex "ADD_FPrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "ADD_FST0r")>; -def: InstRW<[SKLWriteResGroup61], (instregex "ADD_FrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "MMX_PSADBWirr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "PCMPGTQrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "PSADBWrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUBR_FPrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUBR_FST0r")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUBR_FrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUB_FPrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUB_FST0r")>; -def: InstRW<[SKLWriteResGroup61], (instregex "SUB_FrST0")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VBROADCASTSDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VBROADCASTSSYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VEXTRACTF128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VEXTRACTI128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VINSERTF128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VINSERTI128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTBYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTBrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTWYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPBROADCASTWrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPCMPGTQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPCMPGTQrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERM2F128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERM2I128rr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERMDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERMPDYri")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERMPSYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPERMQYri")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXBDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXBQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXBWYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXDQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXWDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVSXWQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXBDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXBQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXBWYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXDQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXWDYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPMOVZXWQYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPSADBWYrr")>; -def: InstRW<[SKLWriteResGroup61], (instregex "VPSADBWrr")>; - -def SKLWriteResGroup62 : SchedWriteRes<[SKLPort0,SKLPort5]> { - let Latency = 3; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup62], (instregex "EXTRACTPSrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "MMX_PEXTRWirri")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PEXTRBrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PEXTRDrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PEXTRQrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PEXTRWri")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PEXTRWrr_REV")>; -def: InstRW<[SKLWriteResGroup62], (instregex "PTESTrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VEXTRACTPSrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPEXTRBrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPEXTRDrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPEXTRQrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPEXTRWri")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPEXTRWrr_REV")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPTESTYrr")>; -def: InstRW<[SKLWriteResGroup62], (instregex "VPTESTrr")>; - -def SKLWriteResGroup63 : SchedWriteRes<[SKLPort0,SKLPort0156]> { +def: InstRW<[SKLWriteResGroup30], (instregex "ADD_FPrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "ADD_FST0r")>; +def: InstRW<[SKLWriteResGroup30], (instregex "ADD_FrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "MMX_PSADBWirr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "PCMPGTQrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "PSADBWrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUBR_FPrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUBR_FST0r")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUBR_FrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUB_FPrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUB_FST0r")>; +def: InstRW<[SKLWriteResGroup30], (instregex "SUB_FrST0")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VBROADCASTSDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VBROADCASTSSYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VEXTRACTF128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VEXTRACTI128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VINSERTF128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VINSERTI128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTBYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTBrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTWYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPBROADCASTWrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPCMPGTQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPCMPGTQrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERM2F128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERM2I128rr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERMDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERMPDYri")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERMPSYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPERMQYri")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXBDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXBQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXBWYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXDQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXWDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVSXWQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXBDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXBQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXBWYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXDQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXWDYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPMOVZXWQYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPSADBWYrr")>; +def: InstRW<[SKLWriteResGroup30], (instregex "VPSADBWrr")>; + +def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 3; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup63], (instregex "FNSTSW16r")>; - -def SKLWriteResGroup64 : SchedWriteRes<[SKLPort1,SKLPort23]> { +def: InstRW<[SKLWriteResGroup31], (instregex "EXTRACTPSrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "MMX_PEXTRWirri")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PEXTRBrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PEXTRDrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PEXTRQrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PEXTRWri")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PEXTRWrr_REV")>; +def: InstRW<[SKLWriteResGroup31], (instregex "PTESTrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VEXTRACTPSrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPEXTRBrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPEXTRDrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPEXTRQrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPEXTRWri")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPEXTRWrr_REV")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPTESTYrr")>; +def: InstRW<[SKLWriteResGroup31], (instregex "VPTESTrr")>; + +def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> { let Latency = 3; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup64], (instregex "BSF(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "BSR(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "IMUL64m")>; -def: InstRW<[SKLWriteResGroup64], (instregex "IMUL(32|64)rm(i8?)")>; -def: InstRW<[SKLWriteResGroup64], (instregex "IMUL8m")>; -def: InstRW<[SKLWriteResGroup64], (instregex "LZCNT(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "MUL64m")>; -def: InstRW<[SKLWriteResGroup64], (instregex "MUL8m")>; -def: InstRW<[SKLWriteResGroup64], (instregex "PDEP32rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "PDEP64rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "PEXT32rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "PEXT64rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "POPCNT(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup64], (instregex "TZCNT(16|32|64)rm")>; - -def SKLWriteResGroup64_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup64_16], (instregex "IMUL16rm(i8?)")>; - -def SKLWriteResGroup64_16_2 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { - let Latency = 3; - let NumMicroOps = 5; -} -def: InstRW<[SKLWriteResGroup64_16_2], (instregex "IMUL16m")>; -def: InstRW<[SKLWriteResGroup64_16_2], (instregex "MUL16m")>; - -def SKLWriteResGroup64_32 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup64_32], (instregex "IMUL32m")>; -def: InstRW<[SKLWriteResGroup64_32], (instregex "MUL32m")>; +def: InstRW<[SKLWriteResGroup32], (instregex "FNSTSW16r")>; -def SKLWriteResGroup65 : SchedWriteRes<[SKLPort5,SKLPort23]> { - let Latency = 3; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup65], (instregex "ADD_F32m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "ADD_F64m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "ILD_F16m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "ILD_F32m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "ILD_F64m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "MMX_PSADBWirm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "PCMPGTQrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "PSADBWrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "SUBR_F32m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "SUBR_F64m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "SUB_F32m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "SUB_F64m")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPCMPGTQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPCMPGTQrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERM2F128rm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERM2I128rm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERMDYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERMPDYmi")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERMPSYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPERMQYmi")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXBDYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXBQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXBWYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXDQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXWDYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVSXWQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXBDYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXBQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXBWYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXDQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXWDYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPMOVZXWQYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPSADBWYrm")>; -def: InstRW<[SKLWriteResGroup65], (instregex "VPSADBWrm")>; - -def SKLWriteResGroup66 : SchedWriteRes<[SKLPort06]> { +def SKLWriteResGroup33 : SchedWriteRes<[SKLPort06]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [3]; } -def: InstRW<[SKLWriteResGroup66], (instregex "ROL(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "ROL8rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "ROR(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "ROR8rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SAR(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SAR8rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SHL(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SHL8rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SHR(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup66], (instregex "SHR8rCL")>; - -def SKLWriteResGroup67 : SchedWriteRes<[SKLPort0156]> { +def: InstRW<[SKLWriteResGroup33], (instregex "ROL(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "ROL8rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "ROR(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "ROR8rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SAR(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SAR8rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SHL(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SHL8rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SHR(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup33], (instregex "SHR8rCL")>; + +def SKLWriteResGroup34 : SchedWriteRes<[SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [3]; } -def: InstRW<[SKLWriteResGroup67], (instregex "XADD(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup67], (instregex "XADD8rr")>; -def: InstRW<[SKLWriteResGroup67], (instregex "XCHG8rr")>; +def: InstRW<[SKLWriteResGroup34], (instregex "XADD(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup34], (instregex "XADD8rr")>; +def: InstRW<[SKLWriteResGroup34], (instregex "XCHG8rr")>; -def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0,SKLPort5]> { +def SKLWriteResGroup35 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup68], (instregex "MMX_PHADDSWrr64")>; -def: InstRW<[SKLWriteResGroup68], (instregex "MMX_PHSUBSWrr64")>; +def: InstRW<[SKLWriteResGroup35], (instregex "MMX_PHADDSWrr64")>; +def: InstRW<[SKLWriteResGroup35], (instregex "MMX_PHSUBSWrr64")>; -def SKLWriteResGroup69 : SchedWriteRes<[SKLPort5,SKLPort01]> { +def SKLWriteResGroup36 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup69], (instregex "PHADDSWrr128")>; -def: InstRW<[SKLWriteResGroup69], (instregex "PHSUBSWrr128")>; -def: InstRW<[SKLWriteResGroup69], (instregex "VPHADDSWrr128")>; -def: InstRW<[SKLWriteResGroup69], (instregex "VPHADDSWrr256")>; -def: InstRW<[SKLWriteResGroup69], (instregex "VPHSUBSWrr128")>; -def: InstRW<[SKLWriteResGroup69], (instregex "VPHSUBSWrr256")>; +def: InstRW<[SKLWriteResGroup36], (instregex "PHADDSWrr128")>; +def: InstRW<[SKLWriteResGroup36], (instregex "PHSUBSWrr128")>; +def: InstRW<[SKLWriteResGroup36], (instregex "VPHADDSWrr128")>; +def: InstRW<[SKLWriteResGroup36], (instregex "VPHADDSWrr256")>; +def: InstRW<[SKLWriteResGroup36], (instregex "VPHSUBSWrr128")>; +def: InstRW<[SKLWriteResGroup36], (instregex "VPHSUBSWrr256")>; -def SKLWriteResGroup70 : SchedWriteRes<[SKLPort5,SKLPort05]> { +def SKLWriteResGroup37 : SchedWriteRes<[SKLPort5,SKLPort05]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup70], (instregex "MMX_PHADDWrr64")>; -def: InstRW<[SKLWriteResGroup70], (instregex "MMX_PHADDrr64")>; -def: InstRW<[SKLWriteResGroup70], (instregex "MMX_PHSUBDrr64")>; -def: InstRW<[SKLWriteResGroup70], (instregex "MMX_PHSUBWrr64")>; +def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PHADDWrr64")>; +def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PHADDrr64")>; +def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PHSUBDrr64")>; +def: InstRW<[SKLWriteResGroup37], (instregex "MMX_PHSUBWrr64")>; -def SKLWriteResGroup71 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup38 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup71], (instregex "PHADDDrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "PHADDWrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "PHSUBDrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "PHSUBWrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHADDDYrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHADDDrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHADDWYrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHADDWrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHSUBDYrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHSUBDrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHSUBWYrr")>; -def: InstRW<[SKLWriteResGroup71], (instregex "VPHSUBWrr")>; - -def SKLWriteResGroup72 : SchedWriteRes<[SKLPort5,SKLPort0156]> { +def: InstRW<[SKLWriteResGroup38], (instregex "PHADDDrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "PHADDWrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "PHSUBDrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "PHSUBWrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHADDDYrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHADDDrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHADDWYrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHADDWrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHSUBDYrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHSUBDrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHSUBWYrr")>; +def: InstRW<[SKLWriteResGroup38], (instregex "VPHSUBWrr")>; + +def SKLWriteResGroup39 : SchedWriteRes<[SKLPort5,SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup72], (instregex "MMX_PACKSSDWirr")>; -def: InstRW<[SKLWriteResGroup72], (instregex "MMX_PACKSSWBirr")>; -def: InstRW<[SKLWriteResGroup72], (instregex "MMX_PACKUSWBirr")>; +def: InstRW<[SKLWriteResGroup39], (instregex "MMX_PACKSSDWirr")>; +def: InstRW<[SKLWriteResGroup39], (instregex "MMX_PACKSSWBirr")>; +def: InstRW<[SKLWriteResGroup39], (instregex "MMX_PACKUSWBirr")>; -def SKLWriteResGroup73 : SchedWriteRes<[SKLPort6,SKLPort0156]> { +def SKLWriteResGroup40 : SchedWriteRes<[SKLPort6,SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup73], (instregex "CLD")>; +def: InstRW<[SKLWriteResGroup40], (instregex "CLD")>; -def SKLWriteResGroup74 : SchedWriteRes<[SKLPort06,SKLPort0156]> { +def SKLWriteResGroup41 : SchedWriteRes<[SKLPort237,SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup74], (instregex "RCL(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCL(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCL8r1")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCL8ri")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCR(16|32|64)r1")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCR(16|32|64)ri")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCR8r1")>; -def: InstRW<[SKLWriteResGroup74], (instregex "RCR8ri")>; +def: InstRW<[SKLWriteResGroup41], (instregex "MFENCE")>; -def SKLWriteResGroup75 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { +def SKLWriteResGroup42 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 3; let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup75], (instregex "PTESTrm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "VPTESTYrm")>; -def: InstRW<[SKLWriteResGroup75], (instregex "VPTESTrm")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCL(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCL(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCL8r1")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCL8ri")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCR(16|32|64)r1")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCR(16|32|64)ri")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCR8r1")>; +def: InstRW<[SKLWriteResGroup42], (instregex "RCR8ri")>; -def SKLWriteResGroup76 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { +def SKLWriteResGroup43 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort237]> { let Latency = 3; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup76], (instregex "ISTT_FP16m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "ISTT_FP32m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "ISTT_FP64m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "IST_F16m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "IST_F32m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "IST_FP16m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "IST_FP32m")>; -def: InstRW<[SKLWriteResGroup76], (instregex "IST_FP64m")>; - -def SKLWriteResGroup77 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { - let Latency = 3; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SKLWriteResGroup77], (instregex "MMX_PHADDSWrm64")>; -def: InstRW<[SKLWriteResGroup77], (instregex "MMX_PHSUBSWrm64")>; +def: InstRW<[SKLWriteResGroup43], (instregex "FNSTSWm")>; -def SKLWriteResGroup78 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { +def SKLWriteResGroup44 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06]> { let Latency = 3; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [1,1,2]; } -def: InstRW<[SKLWriteResGroup78], (instregex "PHADDSWrm128")>; -def: InstRW<[SKLWriteResGroup78], (instregex "PHSUBSWrm128")>; -def: InstRW<[SKLWriteResGroup78], (instregex "VPHADDSWrm128")>; -def: InstRW<[SKLWriteResGroup78], (instregex "VPHADDSWrm256")>; -def: InstRW<[SKLWriteResGroup78], (instregex "VPHSUBSWrm128")>; -def: InstRW<[SKLWriteResGroup78], (instregex "VPHSUBSWrm256")>; +def: InstRW<[SKLWriteResGroup44], (instregex "SETAm")>; +def: InstRW<[SKLWriteResGroup44], (instregex "SETBEm")>; -def SKLWriteResGroup79 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort05]> { +def SKLWriteResGroup45 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort237,SKLPort0156]> { let Latency = 3; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup79], (instregex "MMX_PHADDWrm64")>; -def: InstRW<[SKLWriteResGroup79], (instregex "MMX_PHADDrm64")>; -def: InstRW<[SKLWriteResGroup79], (instregex "MMX_PHSUBDrm64")>; -def: InstRW<[SKLWriteResGroup79], (instregex "MMX_PHSUBWrm64")>; +def: InstRW<[SKLWriteResGroup45], (instregex "CALL(16|32|64)r")>; -def SKLWriteResGroup80 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup46 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 3; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SKLWriteResGroup80], (instregex "PHADDDrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "PHADDWrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "PHSUBDrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "PHSUBWrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHADDDYrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHADDDrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHADDWYrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHADDWrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHSUBDYrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHSUBDrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHSUBWYrm")>; -def: InstRW<[SKLWriteResGroup80], (instregex "VPHSUBWrm")>; - -def SKLWriteResGroup81 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06]> { - let Latency = 3; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; -} -def: InstRW<[SKLWriteResGroup81], (instregex "ROR(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup81], (instregex "ROR8mCL")>; - -def SKLWriteResGroup82 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 3; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[SKLWriteResGroup82], (instregex "RCL(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCL(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCL8m1")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCL8mi")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCR(16|32|64)m1")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCR(16|32|64)mi")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCR8m1")>; -def: InstRW<[SKLWriteResGroup82], (instregex "RCR8mi")>; - -def SKLWriteResGroup83 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { - let Latency = 3; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,3]; -} -def: InstRW<[SKLWriteResGroup83], (instregex "ROL(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "ROL8mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SAR(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SAR8mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SHL(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SHL8mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SHR(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup83], (instregex "SHR8mCL")>; - -def SKLWriteResGroup84 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 3; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,3]; -} -def: InstRW<[SKLWriteResGroup84], (instregex "ADC(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup84], (instregex "ADC8mi")>; - -def SKLWriteResGroup85 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 3; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,2,1]; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup85], (instregex "ADC(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup85], (instregex "ADC8mr")>; -def: InstRW<[SKLWriteResGroup85], (instregex "CMPXCHG(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup85], (instregex "CMPXCHG8rm")>; -def: InstRW<[SKLWriteResGroup85], (instregex "SBB(16|32|64)mi8")>; -def: InstRW<[SKLWriteResGroup85], (instregex "SBB(16|32|64)mr")>; -def: InstRW<[SKLWriteResGroup85], (instregex "SBB8mi")>; -def: InstRW<[SKLWriteResGroup85], (instregex "SBB8mr")>; +def: InstRW<[SKLWriteResGroup46], (instregex "CALL64pcrel32")>; -def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup47 : SchedWriteRes<[SKLPort0]> { let Latency = 4; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup86], (instregex "AESDECLASTrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "AESDECrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "AESENCLASTrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "AESENCrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMADDUBSWrr64")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMADDWDirr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMULHRSWrr64")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMULHUWirr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMULHWirr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMULLWirr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MMX_PMULUDQirr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MUL_FPrST0")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MUL_FST0r")>; -def: InstRW<[SKLWriteResGroup86], (instregex "MUL_FrST0")>; -def: InstRW<[SKLWriteResGroup86], (instregex "RCPPSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "RCPSSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "RSQRTPSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "RSQRTSSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VAESDECLASTrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VAESDECrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VAESENCLASTrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VAESENCrr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRCPPSYr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRCPPSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRCPSSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRSQRTPSYr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRSQRTPSr")>; -def: InstRW<[SKLWriteResGroup86], (instregex "VRSQRTSSr")>; - -def SKLWriteResGroup87 : SchedWriteRes<[SKLPort01]> { +def: InstRW<[SKLWriteResGroup47], (instregex "AESDECLASTrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "AESDECrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "AESENCLASTrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "AESENCrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMADDUBSWrr64")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMADDWDirr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMULHRSWrr64")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMULHUWirr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMULHWirr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMULLWirr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MMX_PMULUDQirr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MUL_FPrST0")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MUL_FST0r")>; +def: InstRW<[SKLWriteResGroup47], (instregex "MUL_FrST0")>; +def: InstRW<[SKLWriteResGroup47], (instregex "RCPPSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "RCPSSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "RSQRTPSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "RSQRTSSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VAESDECLASTrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VAESDECrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VAESENCLASTrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VAESENCrr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRCPPSYr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRCPPSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRCPSSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRSQRTPSYr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRSQRTPSr")>; +def: InstRW<[SKLWriteResGroup47], (instregex "VRSQRTSSr")>; + +def SKLWriteResGroup48 : SchedWriteRes<[SKLPort01]> { let Latency = 4; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup87], (instregex "ADDPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "ADDPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "ADDSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "ADDSSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "ADDSUBPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "ADDSUBPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "MULPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "MULPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "MULSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "MULSSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "SUBPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "SUBPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "SUBSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "SUBSSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDPDYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDPSYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSUBPDYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSUBPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSUBPSYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VADDSUBPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD132SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD213SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADD231SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMADDSUB231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB132SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB213SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUB231SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFMSUBADD231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD132SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD213SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMADD231SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB132SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB213SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231PDYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231PDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231PSYr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231PSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231SDr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VFNMSUB231SSr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULPDYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULPSYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VMULSSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBPDYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBPDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBPSYrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBPSrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBSDrr")>; -def: InstRW<[SKLWriteResGroup87], (instregex "VSUBSSrr")>; - -def SKLWriteResGroup89 : SchedWriteRes<[SKLPort015]> { +def: InstRW<[SKLWriteResGroup48], (instregex "ADDPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDSUBPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "ADDSUBPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "MULPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "MULPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "MULSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "MULSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "SUBPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "SUBPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "SUBSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "SUBSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDPDYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDPSYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPDYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPSYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VADDSUBPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD132SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD213SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADD231SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMADDSUB231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB132SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB213SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUB231SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFMSUBADD231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD132SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD213SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMADD231SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB132SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB213SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PDYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PSYr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231PSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231SDr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VFNMSUB231SSr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULPDYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULPSYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VMULSSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBPDYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBPDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBPSYrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBPSrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBSDrr")>; +def: InstRW<[SKLWriteResGroup48], (instregex "VSUBSSrr")>; + +def SKLWriteResGroup49 : SchedWriteRes<[SKLPort015]> { let Latency = 4; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup89], (instregex "CMPPDrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "CMPPSrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "CMPSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "CVTDQ2PSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "CVTPS2DQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "CVTTPS2DQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MAXPDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MAXPSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MAXSDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MAXSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MINPDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MINPSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MINSDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "MINSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PHMINPOSUWrr128")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMADDUBSWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMADDWDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULDQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULHRSWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULHUWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULHWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULLWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "PMULUDQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPPDYrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPPDrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPPSYrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPPSrri")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPSDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCMPSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTDQ2PSYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTDQ2PSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPS2DQYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPS2DQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTTPS2DQYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VCVTTPS2DQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXPDYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXPDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXPSYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXPSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXSDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMAXSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINPDYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINPDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINPSYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINPSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINSDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VMINSSrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPHMINPOSUWrr128")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMADDUBSWYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMADDUBSWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMADDWDYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMADDWDrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULDQYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULDQrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHRSWYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHRSWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHUWYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHUWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHWYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULHWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULLWYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULLWrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULUDQYrr")>; -def: InstRW<[SKLWriteResGroup89], (instregex "VPMULUDQrr")>; - -def SKLWriteResGroup90 : SchedWriteRes<[SKLPort5]> { +def: InstRW<[SKLWriteResGroup49], (instregex "CMPPDrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CMPPSrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CMPSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CVTDQ2PSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CVTPS2DQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "CVTTPS2DQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAXPDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAXPSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAXSDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MAXSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MINPDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MINPSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MINSDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "MINSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PHMINPOSUWrr128")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMADDUBSWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMADDWDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULDQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULHRSWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULHUWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULHWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULLWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "PMULUDQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPPDYrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPPDrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPPSYrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPPSrri")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPSDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCMPSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTDQ2PSYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTDQ2PSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTPS2DQYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTPS2DQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTTPS2DQYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VCVTTPS2DQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPDYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPSYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXPSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXSDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMAXSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINPDYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINPDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINPSYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINPSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINSDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VMINSSrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPHMINPOSUWrr128")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMADDUBSWYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMADDUBSWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMADDWDYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMADDWDrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULDQYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULDQrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHRSWYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHRSWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHUWYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHUWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHWYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULHWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULLWYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULLWrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULUDQYrr")>; +def: InstRW<[SKLWriteResGroup49], (instregex "VPMULUDQrr")>; + +def SKLWriteResGroup50 : SchedWriteRes<[SKLPort5]> { let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup90], (instregex "MPSADBWrri")>; -def: InstRW<[SKLWriteResGroup90], (instregex "VMPSADBWYrri")>; -def: InstRW<[SKLWriteResGroup90], (instregex "VMPSADBWrri")>; +def: InstRW<[SKLWriteResGroup50], (instregex "MPSADBWrri")>; +def: InstRW<[SKLWriteResGroup50], (instregex "VMPSADBWYrri")>; +def: InstRW<[SKLWriteResGroup50], (instregex "VMPSADBWrri")>; -def SKLWriteResGroup91 : SchedWriteRes<[SKLPort0,SKLPort23]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup91], (instregex "AESDECLASTrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "AESDECrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "AESENCLASTrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "AESENCrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_CVTPI2PSirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMADDUBSWrm64")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMADDWDirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMULHRSWrm64")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMULHUWirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMULHWirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMULLWirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MMX_PMULUDQirm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MUL_F32m")>; -def: InstRW<[SKLWriteResGroup91], (instregex "MUL_F64m")>; -def: InstRW<[SKLWriteResGroup91], (instregex "RCPPSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "RCPSSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "RSQRTPSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "RSQRTSSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VAESDECLASTrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VAESDECrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VAESENCLASTrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VAESENCrm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRCPPSYm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRCPPSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRCPSSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRSQRTPSYm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRSQRTPSm")>; -def: InstRW<[SKLWriteResGroup91], (instregex "VRSQRTSSm")>; - -def SKLWriteResGroup92 : SchedWriteRes<[SKLPort1,SKLPort5]> { +def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> { let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup92], (instregex "IMUL64r")>; -def: InstRW<[SKLWriteResGroup92], (instregex "MUL64r")>; -def: InstRW<[SKLWriteResGroup92], (instregex "MULX64rr")>; - -def SKLWriteResGroup92_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { - let Latency = 4; - let NumMicroOps = 4; -} -def: InstRW<[SKLWriteResGroup92_16], (instregex "IMUL16r")>; -def: InstRW<[SKLWriteResGroup92_16], (instregex "MUL16r")>; +def: InstRW<[SKLWriteResGroup51], (instregex "IMUL64r")>; +def: InstRW<[SKLWriteResGroup51], (instregex "MUL64r")>; +def: InstRW<[SKLWriteResGroup51], (instregex "MULX64rr")>; -def SKLWriteResGroup93 : SchedWriteRes<[SKLPort5,SKLPort01]> { +def SKLWriteResGroup51_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let NumMicroOps = 4; } -def: InstRW<[SKLWriteResGroup93], (instregex "VPSLLDYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSLLQYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSLLWYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSRADYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSRAWYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSRLDYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSRLQYrr")>; -def: InstRW<[SKLWriteResGroup93], (instregex "VPSRLWYrr")>; +def: InstRW<[SKLWriteResGroup51_16], (instregex "IMUL16r")>; +def: InstRW<[SKLWriteResGroup51_16], (instregex "MUL16r")>; -def SKLWriteResGroup94 : SchedWriteRes<[SKLPort01,SKLPort23]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup94], (instregex "ADDPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "ADDPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "ADDSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "ADDSSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "ADDSUBPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "ADDSUBPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "MULPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "MULPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "MULSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "MULSSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "SUBPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "SUBPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "SUBSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "SUBSSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDPDYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDPSYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSUBPDYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSUBPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSUBPSYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VADDSUBPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD132SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD213SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADD231SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMADDSUB231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB132SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB213SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUB231SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFMSUBADD231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD132SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD213SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMADD231SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB132SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB213SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231PDYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231PDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231PSYm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231PSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231SDm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VFNMSUB231SSm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULPDYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULPSYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VMULSSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBPDYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBPDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBPSYrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBPSrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBSDrm")>; -def: InstRW<[SKLWriteResGroup94], (instregex "VSUBSSrm")>; - -def SKLWriteResGroup96 : SchedWriteRes<[SKLPort23,SKLPort015]> { +def SKLWriteResGroup52 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup96], (instregex "CMPPDrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CMPPSrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CMPSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CVTDQ2PSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CVTPS2DQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CVTPS2PDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CVTSS2SDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "CVTTPS2DQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MAXPDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MAXPSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MAXSDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MAXSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MINPDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MINPSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MINSDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MINSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MMX_CVTPS2PIirm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "MMX_CVTTPS2PIirm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PHMINPOSUWrm128")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMADDUBSWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMADDWDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULDQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULHRSWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULHUWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULHWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULLWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "PMULUDQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPPDYrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPPDrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPPSYrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPPSrmi")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPSDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCMPSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTDQ2PSYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTDQ2PSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPH2PSYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPH2PSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPS2DQYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPS2DQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPS2PDYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTPS2PDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTSS2SDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTTPS2DQYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXPDYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXPDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXPSYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXPSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXSDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMAXSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINPDYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINPDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINPSYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINPSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINSDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VMINSSrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPHMINPOSUWrm128")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMADDUBSWYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMADDUBSWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMADDWDYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMADDWDrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULDQYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULDQrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHRSWYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHRSWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHUWYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHUWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHWYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULHWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULLWYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULLWrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULUDQYrm")>; -def: InstRW<[SKLWriteResGroup96], (instregex "VPMULUDQrm")>; - -def SKLWriteResGroup97 : SchedWriteRes<[SKLPort5,SKLPort23]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SKLWriteResGroup97], (instregex "FICOM16m")>; -def: InstRW<[SKLWriteResGroup97], (instregex "FICOM32m")>; -def: InstRW<[SKLWriteResGroup97], (instregex "FICOMP16m")>; -def: InstRW<[SKLWriteResGroup97], (instregex "FICOMP32m")>; -def: InstRW<[SKLWriteResGroup97], (instregex "MPSADBWrmi")>; -def: InstRW<[SKLWriteResGroup97], (instregex "VMPSADBWYrmi")>; -def: InstRW<[SKLWriteResGroup97], (instregex "VMPSADBWrmi")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSLLDYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSLLQYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSLLWYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSRADYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSRAWYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSRLDYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSRLQYrr")>; +def: InstRW<[SKLWriteResGroup52], (instregex "VPSRLWYrr")>; -def SKLWriteResGroup98 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> { +def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { let Latency = 4; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup98], (instregex "MULX64rm")>; +def: InstRW<[SKLWriteResGroup53], (instregex "ISTT_FP16m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "ISTT_FP32m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "ISTT_FP64m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "IST_F16m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "IST_F32m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "IST_FP16m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "IST_FP32m")>; +def: InstRW<[SKLWriteResGroup53], (instregex "IST_FP64m")>; -def SKLWriteResGroup100 : SchedWriteRes<[SKLPort0156]> { +def SKLWriteResGroup54 : SchedWriteRes<[SKLPort0156]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [4]; } -def: InstRW<[SKLWriteResGroup100], (instregex "FNCLEX")>; +def: InstRW<[SKLWriteResGroup54], (instregex "FNCLEX")>; -def SKLWriteResGroup101 : SchedWriteRes<[SKLPort6,SKLPort0156]> { +def SKLWriteResGroup55 : SchedWriteRes<[SKLPort6,SKLPort0156]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup101], (instregex "PAUSE")>; +def: InstRW<[SKLWriteResGroup55], (instregex "PAUSE")>; -def SKLWriteResGroup102 : SchedWriteRes<[SKLPort015,SKLPort0156]> { +def SKLWriteResGroup56 : SchedWriteRes<[SKLPort015,SKLPort0156]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup102], (instregex "VZEROUPPER")>; +def: InstRW<[SKLWriteResGroup56], (instregex "VZEROUPPER")>; -def SKLWriteResGroup103 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> { +def SKLWriteResGroup57 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort0156]> { let Latency = 4; let NumMicroOps = 4; let ResourceCycles = [1,1,2]; } -def: InstRW<[SKLWriteResGroup103], (instregex "LAR(16|32|64)rr")>; - -def SKLWriteResGroup105 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup105], (instregex "SHLD(16|32|64)mri8")>; -def: InstRW<[SKLWriteResGroup105], (instregex "SHRD(16|32|64)mri8")>; +def: InstRW<[SKLWriteResGroup57], (instregex "LAR(16|32|64)rr")>; -def SKLWriteResGroup106 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> { - let Latency = 4; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SKLWriteResGroup106], (instregex "LAR(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup106], (instregex "LSL(16|32|64)rm")>; - -def SKLWriteResGroup107 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> { - let Latency = 4; - let NumMicroOps = 6; - let ResourceCycles = [1,1,4]; +def SKLWriteResGroup58 : SchedWriteRes<[SKLPort23]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup107], (instregex "PUSHF16")>; -def: InstRW<[SKLWriteResGroup107], (instregex "PUSHF64")>; - -def SKLWriteResGroup109 : SchedWriteRes<[SKLPort0,SKLPort5]> { +def: InstRW<[SKLWriteResGroup58], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MMX_MOVD64rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MMX_MOVD64to64rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MMX_MOVQ64rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOV(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOV64toPQIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOV8rm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVDDUPrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVDI2PDIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVSSrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm16")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm32")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVSX(16|32|64)rm8")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVZX(16|32|64)rm16")>; +def: InstRW<[SKLWriteResGroup58], (instregex "MOVZX(16|32|64)rm8")>; +def: InstRW<[SKLWriteResGroup58], (instregex "PREFETCHNTA")>; +def: InstRW<[SKLWriteResGroup58], (instregex "PREFETCHT0")>; +def: InstRW<[SKLWriteResGroup58], (instregex "PREFETCHT1")>; +def: InstRW<[SKLWriteResGroup58], (instregex "PREFETCHT2")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOV64toPQIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOVDDUPrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOVSDrm")>; +def: InstRW<[SKLWriteResGroup58], (instregex "VMOVSSrm")>; + +def SKLWriteResGroup59 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 5; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup109], (instregex "CVTDQ2PDrr")>; -def: InstRW<[SKLWriteResGroup109], (instregex "MMX_CVTPI2PDirr")>; -def: InstRW<[SKLWriteResGroup109], (instregex "VCVTDQ2PDrr")>; +def: InstRW<[SKLWriteResGroup59], (instregex "CVTDQ2PDrr")>; +def: InstRW<[SKLWriteResGroup59], (instregex "MMX_CVTPI2PDirr")>; +def: InstRW<[SKLWriteResGroup59], (instregex "VCVTDQ2PDrr")>; -def SKLWriteResGroup110 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup60 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 5; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup110], (instregex "CVTPD2DQrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTPD2PSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTPS2PDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTSD2SSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTSI2SD64rr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTSI2SDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTSI2SSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTSS2SDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "CVTTPD2DQrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "MMX_CVTPD2PIirr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "MMX_CVTPS2PIirr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "MMX_CVTTPD2PIirr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "MMX_CVTTPS2PIirr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTPD2DQrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTPD2PSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTPH2PSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTPS2PDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTPS2PHrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTSD2SSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTSI2SD64rr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTSI2SDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTSI2SSrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTSS2SDrr")>; -def: InstRW<[SKLWriteResGroup110], (instregex "VCVTTPD2DQrr")>; - -def SKLWriteResGroup113 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup113], (instregex "CVTDQ2PDrm")>; -def: InstRW<[SKLWriteResGroup113], (instregex "MMX_CVTPI2PDirm")>; -def: InstRW<[SKLWriteResGroup113], (instregex "VCVTDQ2PDrm")>; - -def SKLWriteResGroup114 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup114], (instregex "STR(16|32|64)r")>; - -def SKLWriteResGroup115 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { +def: InstRW<[SKLWriteResGroup60], (instregex "CVTPD2DQrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTPD2PSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTPS2PDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSD2SSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSI2SD64rr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSI2SDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSI2SSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTSS2SDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "CVTTPD2DQrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVTPD2PIirr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVTPS2PIirr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVTTPD2PIirr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "MMX_CVTTPS2PIirr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPD2DQrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPD2PSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPH2PSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPS2PDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTPS2PHrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSD2SSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSI2SDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSI2SSrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTSS2SDrr")>; +def: InstRW<[SKLWriteResGroup60], (instregex "VCVTTPD2DQrr")>; + +def SKLWriteResGroup61 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> { let Latency = 5; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup115], (instregex "IMUL32r")>; -def: InstRW<[SKLWriteResGroup115], (instregex "MUL32r")>; -def: InstRW<[SKLWriteResGroup115], (instregex "MULX32rr")>; +def: InstRW<[SKLWriteResGroup61], (instregex "STR(16|32|64)r")>; -def SKLWriteResGroup116 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup62 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 5; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup116], (instregex "CVTPD2DQrm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "CVTPD2PSrm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "CVTSD2SSrm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "CVTTPD2DQrm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "MMX_CVTPD2PIirm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "MMX_CVTTPD2PIirm")>; -def: InstRW<[SKLWriteResGroup116], (instregex "VCVTSD2SSrm")>; - -def SKLWriteResGroup118 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup118], (instregex "MULX32rm")>; - -def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort015]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup119], (instregex "VCVTPS2PHmr")>; +def: InstRW<[SKLWriteResGroup62], (instregex "IMUL32r")>; +def: InstRW<[SKLWriteResGroup62], (instregex "MUL32r")>; +def: InstRW<[SKLWriteResGroup62], (instregex "MULX32rr")>; -def SKLWriteResGroup120 : SchedWriteRes<[SKLPort06,SKLPort0156]> { +def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 5; let NumMicroOps = 5; let ResourceCycles = [1,4]; } -def: InstRW<[SKLWriteResGroup120], (instregex "XSETBV")>; +def: InstRW<[SKLWriteResGroup63], (instregex "XSETBV")>; -def SKLWriteResGroup121 : SchedWriteRes<[SKLPort06,SKLPort0156]> { +def SKLWriteResGroup64 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 5; let NumMicroOps = 5; let ResourceCycles = [2,3]; } -def: InstRW<[SKLWriteResGroup121], (instregex "CMPXCHG(16|32|64)rr")>; -def: InstRW<[SKLWriteResGroup121], (instregex "CMPXCHG8rr")>; +def: InstRW<[SKLWriteResGroup64], (instregex "CMPXCHG(16|32|64)rr")>; +def: InstRW<[SKLWriteResGroup64], (instregex "CMPXCHG8rr")>; -def SKLWriteResGroup122 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup65 : SchedWriteRes<[SKLPort4,SKLPort237,SKLPort0156]> { let Latency = 5; - let NumMicroOps = 8; - let ResourceCycles = [1,1,1,1,1,3]; + let NumMicroOps = 6; + let ResourceCycles = [1,1,4]; } -def: InstRW<[SKLWriteResGroup122], (instregex "ADD8mi")>; -def: InstRW<[SKLWriteResGroup122], (instregex "AND8mi")>; -def: InstRW<[SKLWriteResGroup122], (instregex "OR8mi")>; -def: InstRW<[SKLWriteResGroup122], (instregex "SUB8mi")>; -def: InstRW<[SKLWriteResGroup122], (instregex "XCHG(16|32|64)rm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "XCHG8rm")>; -def: InstRW<[SKLWriteResGroup122], (instregex "XOR8mi")>; +def: InstRW<[SKLWriteResGroup65], (instregex "PUSHF16")>; +def: InstRW<[SKLWriteResGroup65], (instregex "PUSHF64")>; -def SKLWriteResGroup123 : SchedWriteRes<[SKLPort5]> { +def SKLWriteResGroup66 : SchedWriteRes<[SKLPort5]> { let Latency = 6; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup123], (instregex "PCLMULQDQrr")>; -def: InstRW<[SKLWriteResGroup123], (instregex "VPCLMULQDQrr")>; +def: InstRW<[SKLWriteResGroup66], (instregex "PCLMULQDQrr")>; +def: InstRW<[SKLWriteResGroup66], (instregex "VPCLMULQDQrr")>; -def SKLWriteResGroup124 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup67 : SchedWriteRes<[SKLPort23]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup67], (instregex "LDDQUrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVAPDrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVAPSrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVDQArm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVDQUrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVNTDQArm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVSHDUPrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVSLDUPrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVUPDrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "MOVUPSrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VBROADCASTSSrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VLDDQUrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVAPDrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVAPSrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVDQArm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVDQUrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVNTDQArm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVSHDUPrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVSLDUPrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVUPDrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VMOVUPSrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VPBROADCASTDrm")>; +def: InstRW<[SKLWriteResGroup67], (instregex "VPBROADCASTQrm")>; + +def SKLWriteResGroup68 : SchedWriteRes<[SKLPort0]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup124], (instregex "MMX_CVTPI2PSirr")>; +def: InstRW<[SKLWriteResGroup68], (instregex "MMX_CVTPI2PSirr")>; -def SKLWriteResGroup125 : SchedWriteRes<[SKLPort0,SKLPort015]> { +def SKLWriteResGroup69 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup125], (instregex "CVTSD2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "CVTSD2SIrr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "CVTSS2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "CVTSS2SIrr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "CVTTSD2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "CVTTSD2SIrr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTSD2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTSD2SIrr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTSS2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTSS2SIrr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTTSD2SI64rr")>; -def: InstRW<[SKLWriteResGroup125], (instregex "VCVTTSD2SIrr")>; - -def SKLWriteResGroup126 : SchedWriteRes<[SKLPort5,SKLPort23]> { +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDSBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDSWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDUSBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PADDUSWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PAVGBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PAVGWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPEQBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPEQDirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPEQWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPGTBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPGTDirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PCMPGTWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PMAXSWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PMAXUBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PMINSWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PMINUBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSLLDrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSLLQrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSLLWrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSRADrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSRAWrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSRLDrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSRLQrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSRLWrm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSUBSBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSUBSWirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSUBUSBirm")>; +def: InstRW<[SKLWriteResGroup69], (instregex "MMX_PSUBUSWirm")>; + +def SKLWriteResGroup70 : SchedWriteRes<[SKLPort0,SKLPort015]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup126], (instregex "PCLMULQDQrm")>; -def: InstRW<[SKLWriteResGroup126], (instregex "VPCLMULQDQrm")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTSD2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTSD2SIrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTSS2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTSS2SIrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTTSD2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "CVTTSD2SIrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTSD2SIrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTSS2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTSS2SIrr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[SKLWriteResGroup70], (instregex "VCVTTSD2SIrr")>; + +def SKLWriteResGroup71 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PINSRWirmi")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PSHUFWmi")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKHBWirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKHDQirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKHWDirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKLBWirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKLDQirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MMX_PUNPCKLWDirm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MOVHPDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MOVHPSrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MOVLPDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "MOVLPSrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PINSRBrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PINSRDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PINSRQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PINSRWrmi")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXBDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXBQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXBWrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXDQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXWDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVSXWQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXBDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXBQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXBWrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXDQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXWDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "PMOVZXWQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VMOVHPDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VMOVHPSrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VMOVLPDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VMOVLPSrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPINSRBrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPINSRDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPINSRQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPINSRWrmi")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXBDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXBQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXBWrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXDQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXWDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVSXWQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXBDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXBQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXBWrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXDQrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXWDrm")>; +def: InstRW<[SKLWriteResGroup71], (instregex "VPMOVZXWQrm")>; + +def SKLWriteResGroup72 : SchedWriteRes<[SKLPort6,SKLPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup72], (instregex "FARJMP64")>; +def: InstRW<[SKLWriteResGroup72], (instregex "JMP(16|32|64)m")>; -def SKLWriteResGroup127 : SchedWriteRes<[SKLPort5,SKLPort01]> { +def SKLWriteResGroup73 : SchedWriteRes<[SKLPort23,SKLPort05]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PABSBrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PABSDrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PABSWrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PADDBirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PADDDirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PADDQirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PADDWirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PANDNirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PANDirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PORirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSIGNBrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSIGNDrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSIGNWrm64")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSUBBirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSUBDirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSUBQirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PSUBWirm")>; +def: InstRW<[SKLWriteResGroup73], (instregex "MMX_PXORirm")>; + +def SKLWriteResGroup74 : SchedWriteRes<[SKLPort23,SKLPort06]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup74], (instregex "ADC(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADC8rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADCX32rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADCX64rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADOX32rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "ADOX64rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "BT(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVAE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVB(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVG(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVGE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVL(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVLE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNE(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNO(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNP(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVNS(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVO(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVP(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "CMOVS(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "RORX32mi")>; +def: InstRW<[SKLWriteResGroup74], (instregex "RORX64mi")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SARX32rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SARX64rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SBB(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SBB8rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SHLX32rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SHLX64rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SHRX32rm")>; +def: InstRW<[SKLWriteResGroup74], (instregex "SHRX64rm")>; + +def SKLWriteResGroup75 : SchedWriteRes<[SKLPort23,SKLPort15]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup75], (instregex "ANDN32rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "ANDN64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSI32rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSI64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSMSK32rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSMSK64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSR32rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BLSR64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BZHI32rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "BZHI64rm")>; +def: InstRW<[SKLWriteResGroup75], (instregex "MOVBE(16|32|64)rm")>; + +def SKLWriteResGroup76 : SchedWriteRes<[SKLPort23,SKLPort0156]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup76], (instregex "ADD(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "ADD8rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "AND(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "AND8rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP8mi")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP8mr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "CMP8rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "OR(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "OR8rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "POP(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup76], (instregex "POP(16|32|64)rmr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "SUB(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "SUB8rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "TEST(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "TEST8mi")>; +def: InstRW<[SKLWriteResGroup76], (instregex "TEST8mr")>; +def: InstRW<[SKLWriteResGroup76], (instregex "XOR(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup76], (instregex "XOR8rm")>; + +def SKLWriteResGroup77 : SchedWriteRes<[SKLPort5,SKLPort01]> { let Latency = 6; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup127], (instregex "HADDPDrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "HADDPSrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "HSUBPDrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "HSUBPSrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHADDPDYrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHADDPDrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHADDPSYrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHADDPSrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHSUBPDYrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHSUBPDrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHSUBPSYrr")>; -def: InstRW<[SKLWriteResGroup127], (instregex "VHSUBPSrr")>; - -def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort23]> { +def: InstRW<[SKLWriteResGroup77], (instregex "HADDPDrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "HADDPSrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "HSUBPDrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "HSUBPSrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHADDPDYrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHADDPDrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHADDPSYrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHADDPSrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHSUBPDYrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHSUBPDrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHSUBPSYrr")>; +def: InstRW<[SKLWriteResGroup77], (instregex "VHSUBPSrr")>; + +def SKLWriteResGroup78 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 6; let NumMicroOps = 3; let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup128], (instregex "ADD_FI16m")>; -def: InstRW<[SKLWriteResGroup128], (instregex "ADD_FI32m")>; -def: InstRW<[SKLWriteResGroup128], (instregex "SUBR_FI16m")>; -def: InstRW<[SKLWriteResGroup128], (instregex "SUBR_FI32m")>; -def: InstRW<[SKLWriteResGroup128], (instregex "SUB_FI16m")>; -def: InstRW<[SKLWriteResGroup128], (instregex "SUB_FI32m")>; +def: InstRW<[SKLWriteResGroup78], (instregex "CVTSI2SS64rr")>; +def: InstRW<[SKLWriteResGroup78], (instregex "VCVTSI2SS64rr")>; -def SKLWriteResGroup129 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup79 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; } -def: InstRW<[SKLWriteResGroup129], (instregex "CVTSI2SS64rr")>; -def: InstRW<[SKLWriteResGroup129], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[SKLWriteResGroup79], (instregex "SHLD(16|32|64)rrCL")>; +def: InstRW<[SKLWriteResGroup79], (instregex "SHRD(16|32|64)rrCL")>; -def SKLWriteResGroup130 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort015]> { +def SKLWriteResGroup80 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup130], (instregex "CVTSD2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTSD2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTSS2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTSS2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTTSD2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTTSD2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "CVTTSS2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTSD2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTSD2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTSS2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTSS2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTTSD2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTTSD2SIrm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTTSS2SI64rm")>; -def: InstRW<[SKLWriteResGroup130], (instregex "VCVTTSS2SIrm")>; - -def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { +def: InstRW<[SKLWriteResGroup80], (instregex "SLDT(16|32|64)r")>; + +def SKLWriteResGroup81 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort015]> { let Latency = 6; let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup131], (instregex "SHLD(16|32|64)rrCL")>; -def: InstRW<[SKLWriteResGroup131], (instregex "SHRD(16|32|64)rrCL")>; +def: InstRW<[SKLWriteResGroup81], (instregex "VCVTPS2PHmr")>; -def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { +def SKLWriteResGroup82 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { let Latency = 6; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup133], (instregex "HADDPDrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "HADDPSrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "HSUBPDrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "HSUBPSrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHADDPDYrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHADDPDrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHADDPSYrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHADDPSrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHSUBPDYrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHSUBPDrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHSUBPSYrm")>; -def: InstRW<[SKLWriteResGroup133], (instregex "VHSUBPSrm")>; - -def SKLWriteResGroup134 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06,SKLPort0156]> { +def: InstRW<[SKLWriteResGroup82], (instregex "BTC(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup82], (instregex "BTR(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup82], (instregex "BTS(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SAR(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SAR(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SAR8m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SAR8mi")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHL(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHL(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHL8m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHL8mi")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHR(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHR(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHR8m1")>; +def: InstRW<[SKLWriteResGroup82], (instregex "SHR8mi")>; + +def SKLWriteResGroup83 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { let Latency = 6; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup134], (instregex "SLDT(16|32|64)r")>; - -def SKLWriteResGroup136 : SchedWriteRes<[SKLPort6,SKLPort0156]> { +def: InstRW<[SKLWriteResGroup83], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "ADD(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "ADD8mi")>; +def: InstRW<[SKLWriteResGroup83], (instregex "ADD8mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "AND(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "AND8mi")>; +def: InstRW<[SKLWriteResGroup83], (instregex "AND8mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "DEC(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "DEC8m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "INC(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "INC8m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "NEG(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "NEG8m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "NOT(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "NOT8m")>; +def: InstRW<[SKLWriteResGroup83], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "OR(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "OR8mi")>; +def: InstRW<[SKLWriteResGroup83], (instregex "OR8mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "POP(16|32|64)rmm")>; +def: InstRW<[SKLWriteResGroup83], (instregex "PUSH(16|32|64)rmm")>; +def: InstRW<[SKLWriteResGroup83], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "SUB(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "SUB8mi")>; +def: InstRW<[SKLWriteResGroup83], (instregex "SUB8mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup83], (instregex "XOR(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup83], (instregex "XOR8mi")>; +def: InstRW<[SKLWriteResGroup83], (instregex "XOR8mr")>; + +def SKLWriteResGroup84 : SchedWriteRes<[SKLPort6,SKLPort0156]> { let Latency = 6; let NumMicroOps = 6; let ResourceCycles = [1,5]; } -def: InstRW<[SKLWriteResGroup136], (instregex "STD")>; +def: InstRW<[SKLWriteResGroup84], (instregex "STD")>; -def SKLWriteResGroup137 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 6; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,2,1]; +def SKLWriteResGroup85 : SchedWriteRes<[SKLPort23]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKLWriteResGroup85], (instregex "LD_F32m")>; +def: InstRW<[SKLWriteResGroup85], (instregex "LD_F64m")>; +def: InstRW<[SKLWriteResGroup85], (instregex "LD_F80m")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VBROADCASTF128")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VBROADCASTI128")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VBROADCASTSSYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VLDDQUYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVAPDYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVAPSYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVDDUPYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVDQAYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVDQUYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVNTDQAYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVUPDYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VMOVUPSYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VPBROADCASTDYrm")>; +def: InstRW<[SKLWriteResGroup85], (instregex "VPBROADCASTQYrm")>; + +def SKLWriteResGroup86 : SchedWriteRes<[SKLPort0,SKLPort5]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup137], (instregex "SHLD(16|32|64)mrCL")>; -def: InstRW<[SKLWriteResGroup137], (instregex "SHRD(16|32|64)mrCL")>; +def: InstRW<[SKLWriteResGroup86], (instregex "VCVTDQ2PDYrr")>; -def SKLWriteResGroup142 : SchedWriteRes<[SKLPort0,SKLPort5]> { +def SKLWriteResGroup87 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 7; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup142], (instregex "VCVTDQ2PDYrr")>; +def: InstRW<[SKLWriteResGroup87], (instregex "COMISDrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "COMISSrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "UCOMISDrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "UCOMISSrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "VCOMISDrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "VCOMISSrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "VUCOMISDrm")>; +def: InstRW<[SKLWriteResGroup87], (instregex "VUCOMISSrm")>; -def SKLWriteResGroup143 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup88 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 7; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTPD2DQYrr")>; -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTPD2PSYrr")>; -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTPH2PSYrr")>; -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTPS2PDYrr")>; -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTPS2PHYrr")>; -def: InstRW<[SKLWriteResGroup143], (instregex "VCVTTPD2DQYrr")>; +def: InstRW<[SKLWriteResGroup88], (instregex "INSERTPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PACKSSDWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PACKSSWBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PACKUSDWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PACKUSWBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PALIGNRrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PBLENDWrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PSHUFBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PSHUFDmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PSHUFHWmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PSHUFLWmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKHBWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKHDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKHQDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKHWDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKLBWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKLDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKLQDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "PUNPCKLWDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "SHUFPDrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "SHUFPSrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "UNPCKHPDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "UNPCKHPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "UNPCKLPDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "UNPCKLPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VINSERTPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPACKSSDWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPACKSSWBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPACKUSDWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPACKUSWBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPALIGNRrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPBLENDWrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPBROADCASTBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPBROADCASTWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPERMILPDmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPERMILPDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPERMILPSmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPERMILPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPSHUFBrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPSHUFDmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPSHUFHWmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPSHUFLWmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKHBWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKHDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKHWDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKLDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKLQDQrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VPUNPCKLWDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VSHUFPDrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VSHUFPSrmi")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VUNPCKHPDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VUNPCKHPSrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VUNPCKLPDrm")>; +def: InstRW<[SKLWriteResGroup88], (instregex "VUNPCKLPSrm")>; + +def SKLWriteResGroup89 : SchedWriteRes<[SKLPort5,SKLPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPD2DQYrr")>; +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPD2PSYrr")>; +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPH2PSYrr")>; +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTPS2PHYrr")>; +def: InstRW<[SKLWriteResGroup89], (instregex "VCVTTPD2DQYrr")>; + +def SKLWriteResGroup90 : SchedWriteRes<[SKLPort01,SKLPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup90], (instregex "PABSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PABSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PABSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PADDSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PADDSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PADDUSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PADDUSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PAVGBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PAVGWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPEQBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPEQDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPEQQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPEQWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPGTBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPGTDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PCMPGTWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXUBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXUDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMAXUWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINUBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINUDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PMINUWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSIGNBrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSIGNDrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSIGNWrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSLLDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSLLQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSLLWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSRADrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSRAWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSRLDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSRLQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSRLWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSUBSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSUBSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSUBUSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "PSUBUSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPABSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPABSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPABSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPADDSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPADDSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPADDUSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPADDUSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPAVGBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPAVGWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPEQBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPEQDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPEQQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPEQWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPGTBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPGTDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPCMPGTWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXUBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXUDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMAXUWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINSDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINUBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINUDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPMINUWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSIGNBrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSIGNDrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSIGNWrm128")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSLLDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSLLQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSLLVDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSLLVQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSLLWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRADrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRAVDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRAWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRLDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRLQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRLVDrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRLVQrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSRLWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSUBSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSUBSWrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSUBUSBrm")>; +def: InstRW<[SKLWriteResGroup90], (instregex "VPSUBUSWrm")>; + +def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup91], (instregex "ANDNPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "ANDNPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "ANDPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "ANDPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "BLENDPDrmi")>; +def: InstRW<[SKLWriteResGroup91], (instregex "BLENDPSrmi")>; +def: InstRW<[SKLWriteResGroup91], (instregex "ORPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "ORPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PADDBrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PADDDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PADDQrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PADDWrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PANDNrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PANDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PORrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PSUBBrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PSUBDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PSUBQrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PSUBWrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "PXORrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VANDNPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VANDNPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VANDPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VANDPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VBLENDPDrmi")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VBLENDPSrmi")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VINSERTF128rm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VINSERTI128rm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VMASKMOVPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VMASKMOVPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VORPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VORPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPADDBrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPADDDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPADDQrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPADDWrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPANDNrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPANDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPBLENDDrmi")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPMASKMOVDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPMASKMOVQrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPORrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPSUBBrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPSUBDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPSUBQrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPSUBWrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VPXORrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VXORPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "VXORPSrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "XORPDrm")>; +def: InstRW<[SKLWriteResGroup91], (instregex "XORPSrm")>; + +def SKLWriteResGroup92 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKLWriteResGroup92], (instregex "MMX_PACKSSDWirm")>; +def: InstRW<[SKLWriteResGroup92], (instregex "MMX_PACKSSWBirm")>; +def: InstRW<[SKLWriteResGroup92], (instregex "MMX_PACKUSWBirm")>; + +def SKLWriteResGroup93 : SchedWriteRes<[SKLPort23,SKLPort06]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup93], (instregex "CMOVA(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup93], (instregex "CMOVBE(16|32|64)rm")>; + +def SKLWriteResGroup94 : SchedWriteRes<[SKLPort23,SKLPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup94], (instregex "LEAVE64")>; +def: InstRW<[SKLWriteResGroup94], (instregex "SCASB")>; +def: InstRW<[SKLWriteResGroup94], (instregex "SCASL")>; +def: InstRW<[SKLWriteResGroup94], (instregex "SCASQ")>; +def: InstRW<[SKLWriteResGroup94], (instregex "SCASW")>; -def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { +def SKLWriteResGroup95 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup145], (instregex "MUL_FI16m")>; -def: InstRW<[SKLWriteResGroup145], (instregex "MUL_FI32m")>; -def: InstRW<[SKLWriteResGroup145], (instregex "VCVTDQ2PDYrm")>; +def: InstRW<[SKLWriteResGroup95], (instregex "CVTTSS2SI64rr")>; +def: InstRW<[SKLWriteResGroup95], (instregex "CVTTSS2SIrr")>; +def: InstRW<[SKLWriteResGroup95], (instregex "VCVTTSS2SI64rr")>; +def: InstRW<[SKLWriteResGroup95], (instregex "VCVTTSS2SIrr")>; -def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015]> { +def SKLWriteResGroup96 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup146], (instregex "CVTTSS2SI64rr")>; -def: InstRW<[SKLWriteResGroup146], (instregex "CVTTSS2SIrr")>; -def: InstRW<[SKLWriteResGroup146], (instregex "VCVTTSS2SI64rr")>; -def: InstRW<[SKLWriteResGroup146], (instregex "VCVTTSS2SIrr")>; +def: InstRW<[SKLWriteResGroup96], (instregex "FLDCW16m")>; -def SKLWriteResGroup149 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup97 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort0156]> { let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup149], (instregex "CVTTSS2SI64rm")>; +def: InstRW<[SKLWriteResGroup97], (instregex "LDMXCSR")>; +def: InstRW<[SKLWriteResGroup97], (instregex "VLDMXCSR")>; -def SKLWriteResGroup150 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort015]> { +def SKLWriteResGroup98 : SchedWriteRes<[SKLPort6,SKLPort23,SKLPort0156]> { let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup98], (instregex "LRETQ")>; +def: InstRW<[SKLWriteResGroup98], (instregex "RETQ")>; + +def SKLWriteResGroup99 : SchedWriteRes<[SKLPort23,SKLPort06,SKLPort15]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup99], (instregex "BEXTR32rm")>; +def: InstRW<[SKLWriteResGroup99], (instregex "BEXTR64rm")>; + +def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; } -def: InstRW<[SKLWriteResGroup150], (instregex "VCVTPS2PHYmr")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROL(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROL(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROL8m1")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROL8mi")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROR(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROR(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROR8m1")>; +def: InstRW<[SKLWriteResGroup100], (instregex "ROR8mi")>; -def SKLWriteResGroup151 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort0156]> { +def SKLWriteResGroup101 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKLWriteResGroup101], (instregex "XADD(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup101], (instregex "XADD8rm")>; + +def SKLWriteResGroup102 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,1,1]; +} +def: InstRW<[SKLWriteResGroup102], (instregex "CALL(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup102], (instregex "FARCALL64")>; + +def SKLWriteResGroup103 : SchedWriteRes<[SKLPort6,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 7; let NumMicroOps = 7; let ResourceCycles = [1,3,1,2]; } -def: InstRW<[SKLWriteResGroup151], (instregex "LOOP")>; +def: InstRW<[SKLWriteResGroup103], (instregex "LOOP")>; -def SKLWriteResGroup156 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup104 : SchedWriteRes<[SKLPort0]> { let Latency = 8; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup156], (instregex "AESIMCrr")>; -def: InstRW<[SKLWriteResGroup156], (instregex "VAESIMCrr")>; +def: InstRW<[SKLWriteResGroup104], (instregex "AESIMCrr")>; +def: InstRW<[SKLWriteResGroup104], (instregex "VAESIMCrr")>; -def SKLWriteResGroup157 : SchedWriteRes<[SKLPort015]> { +def SKLWriteResGroup105 : SchedWriteRes<[SKLPort015]> { let Latency = 8; let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[SKLWriteResGroup157], (instregex "PMULLDrr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "ROUNDPDr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "ROUNDPSr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "ROUNDSDr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "ROUNDSSr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VPMULLDYrr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VPMULLDrr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDPDr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDPSr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDSDr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDSSr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDYPDr")>; -def: InstRW<[SKLWriteResGroup157], (instregex "VROUNDYPSr")>; - -def SKLWriteResGroup160 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def: InstRW<[SKLWriteResGroup105], (instregex "PMULLDrr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDPDr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDPSr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDSDr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "ROUNDSSr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VPMULLDYrr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VPMULLDrr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDPDr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDPSr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDSDr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDSSr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDYPDr")>; +def: InstRW<[SKLWriteResGroup105], (instregex "VROUNDYPSr")>; + +def SKLWriteResGroup106 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup106], (instregex "VTESTPDrm")>; +def: InstRW<[SKLWriteResGroup106], (instregex "VTESTPSrm")>; + +def SKLWriteResGroup107 : SchedWriteRes<[SKLPort1,SKLPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup107], (instregex "BSF(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "BSR(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "IMUL64m")>; +def: InstRW<[SKLWriteResGroup107], (instregex "IMUL(32|64)rm(i8?)")>; +def: InstRW<[SKLWriteResGroup107], (instregex "IMUL8m")>; +def: InstRW<[SKLWriteResGroup107], (instregex "LZCNT(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "MUL(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup107], (instregex "MUL8m")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PDEP32rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PDEP64rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PEXT32rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "PEXT64rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "POPCNT(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup107], (instregex "TZCNT(16|32|64)rm")>; + +def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { + let Latency = 3; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup107_16], (instregex "IMUL16rm(i8?)")>; + +def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { + let Latency = 3; + let NumMicroOps = 5; +} +def: InstRW<[SKLWriteResGroup107_16_2], (instregex "IMUL16m")>; +def: InstRW<[SKLWriteResGroup107_16_2], (instregex "MUL16m")>; + +def SKLWriteResGroup107_32 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup107_32], (instregex "IMUL32m")>; +def: InstRW<[SKLWriteResGroup107_32], (instregex "MUL32m")>; + +def SKLWriteResGroup108 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup108], (instregex "FCOM32m")>; +def: InstRW<[SKLWriteResGroup108], (instregex "FCOM64m")>; +def: InstRW<[SKLWriteResGroup108], (instregex "FCOMP32m")>; +def: InstRW<[SKLWriteResGroup108], (instregex "FCOMP64m")>; +def: InstRW<[SKLWriteResGroup108], (instregex "MMX_PSADBWirm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPACKSSDWYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPACKSSWBYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPACKUSDWYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPACKUSWBYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPALIGNRYrmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPBLENDWYrmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPBROADCASTBYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPBROADCASTWYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPERMILPDYmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPERMILPDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPERMILPSYmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPERMILPSYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPMOVSXBDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPMOVSXBQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPMOVSXWQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPSHUFBYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPSHUFDYmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPSHUFHWYmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPSHUFLWYmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKHBWYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKHDQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKHQDQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKHWDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKLBWYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKLDQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKLQDQYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VPUNPCKLWDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VSHUFPDYrmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VSHUFPSYrmi")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VUNPCKHPDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VUNPCKHPSYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[SKLWriteResGroup108], (instregex "VUNPCKLPSYrm")>; + +def SKLWriteResGroup109 : SchedWriteRes<[SKLPort01,SKLPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup109], (instregex "VPABSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPABSDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPABSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPADDSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPADDSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPADDUSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPADDUSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPAVGBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPAVGWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPEQBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPEQDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPEQQYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPEQWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPGTBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPGTDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPCMPGTWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXSDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXUBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXUDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMAXUWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINSDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINUBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINUDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPMINUWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSIGNBYrm256")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSIGNDYrm256")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSIGNWYrm256")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLQYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLVDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLVQYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSLLWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRADYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRAVDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRAWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRLDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRLQYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRLVDYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRLVQYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSRLWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSUBSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSUBSWYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSUBUSBYrm")>; +def: InstRW<[SKLWriteResGroup109], (instregex "VPSUBUSWYrm")>; + +def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup110], (instregex "VANDNPDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VANDNPSYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VANDPDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VANDPSYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VBLENDPDYrmi")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VBLENDPSYrmi")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VORPDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VORPSYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPADDBYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPADDDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPADDQYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPADDWYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPANDNYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPANDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPBLENDDYrmi")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPMASKMOVDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPMASKMOVQYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPORYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPSUBBYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPSUBDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPSUBQYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPSUBWYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VPXORYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VXORPDYrm")>; +def: InstRW<[SKLWriteResGroup110], (instregex "VXORPSYrm")>; + +def SKLWriteResGroup111 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup111], (instregex "BLENDVPDrm0")>; +def: InstRW<[SKLWriteResGroup111], (instregex "BLENDVPSrm0")>; +def: InstRW<[SKLWriteResGroup111], (instregex "PBLENDVBrm0")>; +def: InstRW<[SKLWriteResGroup111], (instregex "VBLENDVPDrm")>; +def: InstRW<[SKLWriteResGroup111], (instregex "VBLENDVPSrm")>; +def: InstRW<[SKLWriteResGroup111], (instregex "VPBLENDVBYrm")>; +def: InstRW<[SKLWriteResGroup111], (instregex "VPBLENDVBrm")>; + +def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PHADDSWrm64")>; +def: InstRW<[SKLWriteResGroup112], (instregex "MMX_PHSUBSWrm64")>; + +def SKLWriteResGroup113 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort05]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; } -def: InstRW<[SKLWriteResGroup160], (instregex "AESIMCrm")>; -def: InstRW<[SKLWriteResGroup160], (instregex "VAESIMCrm")>; +def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PHADDWrm64")>; +def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PHADDrm64")>; +def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PHSUBDrm64")>; +def: InstRW<[SKLWriteResGroup113], (instregex "MMX_PHSUBWrm64")>; -def SKLWriteResGroup161 : SchedWriteRes<[SKLPort23,SKLPort015]> { +def SKLWriteResGroup114 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237,SKLPort015]> { let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKLWriteResGroup114], (instregex "VCVTPS2PHYmr")>; + +def SKLWriteResGroup115 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKLWriteResGroup115], (instregex "ROR(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup115], (instregex "ROR8mCL")>; + +def SKLWriteResGroup116 : SchedWriteRes<[SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKLWriteResGroup116], (instregex "RCL(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCL(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCL8m1")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCL8mi")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCR(16|32|64)m1")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCR(16|32|64)mi")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCR8m1")>; +def: InstRW<[SKLWriteResGroup116], (instregex "RCR8mi")>; + +def SKLWriteResGroup117 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; +} +def: InstRW<[SKLWriteResGroup117], (instregex "ROL(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "ROL8mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SAR(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SAR8mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SHL(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SHL8mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SHR(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup117], (instregex "SHR8mCL")>; + +def SKLWriteResGroup118 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; +} +def: InstRW<[SKLWriteResGroup118], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup118], (instregex "ADC8mi")>; + +def SKLWriteResGroup119 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} +def: InstRW<[SKLWriteResGroup119], (instregex "ADC(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup119], (instregex "ADC8mr")>; +def: InstRW<[SKLWriteResGroup119], (instregex "CMPXCHG(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup119], (instregex "CMPXCHG8rm")>; +def: InstRW<[SKLWriteResGroup119], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[SKLWriteResGroup119], (instregex "SBB(16|32|64)mr")>; +def: InstRW<[SKLWriteResGroup119], (instregex "SBB8mi")>; +def: InstRW<[SKLWriteResGroup119], (instregex "SBB8mr")>; + +def SKLWriteResGroup120 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMADDUBSWrm64")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMADDWDirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMULHRSWrm64")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMULHUWirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMULHWirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMULLWirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "MMX_PMULUDQirm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "RCPSSm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "RSQRTSSm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "VRCPSSm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "VRSQRTSSm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "VTESTPDYrm")>; +def: InstRW<[SKLWriteResGroup120], (instregex "VTESTPSYrm")>; + +def SKLWriteResGroup121 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup121], (instregex "PCMPGTQrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "PSADBWrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPCMPGTQrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPMOVSXBWYrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPMOVSXDQYrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPMOVSXWDYrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPMOVZXWDYrm")>; +def: InstRW<[SKLWriteResGroup121], (instregex "VPSADBWrm")>; + +def SKLWriteResGroup122 : SchedWriteRes<[SKLPort01,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup122], (instregex "ADDSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "ADDSSrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "MULSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "MULSSrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "SUBSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "SUBSSrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VADDSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VADDSSrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD132SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD132SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD213SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD213SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD231SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMADD231SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB132SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB132SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB213SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB213SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB231SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFMSUB231SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD132SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD132SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD213SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD213SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD231SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMADD231SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB132SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB132SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB213SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB213SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB231SDm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VFNMSUB231SSm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VMULSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VMULSSrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VSUBSDrm")>; +def: InstRW<[SKLWriteResGroup122], (instregex "VSUBSSrm")>; + +def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup123], (instregex "CMPSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "CVTPS2PDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MAXSDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MAXSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MINSDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MINSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VCMPSDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VCMPSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VCVTPH2PSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VCVTPS2PDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMAXSDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMAXSSrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMINSDrm")>; +def: InstRW<[SKLWriteResGroup123], (instregex "VMINSSrm")>; + +def SKLWriteResGroup124 : SchedWriteRes<[SKLPort5,SKLPort015]> { + let Latency = 9; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup161], (instregex "PMULLDrm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "ROUNDPDm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "ROUNDPSm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "ROUNDSDm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "ROUNDSSm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VPMULLDYrm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VPMULLDrm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDPDm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDPSm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDSDm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDSSm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDYPDm")>; -def: InstRW<[SKLWriteResGroup161], (instregex "VROUNDYPSm")>; - -def SKLWriteResGroup165 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def: InstRW<[SKLWriteResGroup124], (instregex "DPPDrri")>; +def: InstRW<[SKLWriteResGroup124], (instregex "VDPPDrri")>; + +def SKLWriteResGroup125 : SchedWriteRes<[SKLPort23,SKLPort015]> { let Latency = 9; let NumMicroOps = 3; let ResourceCycles = [1,2]; } -def: InstRW<[SKLWriteResGroup165], (instregex "DPPDrri")>; -def: InstRW<[SKLWriteResGroup165], (instregex "VDPPDrri")>; +def: InstRW<[SKLWriteResGroup125], (instregex "VBLENDVPDYrm")>; +def: InstRW<[SKLWriteResGroup125], (instregex "VBLENDVPSYrm")>; + +def SKLWriteResGroup126 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup126], (instregex "PTESTrm")>; +def: InstRW<[SKLWriteResGroup126], (instregex "VPTESTrm")>; + +def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup127], (instregex "MULX64rm")>; -def SKLWriteResGroup167 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { let Latency = 9; let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKLWriteResGroup128], (instregex "PHADDSWrm128")>; +def: InstRW<[SKLWriteResGroup128], (instregex "PHSUBSWrm128")>; +def: InstRW<[SKLWriteResGroup128], (instregex "VPHADDSWrm128")>; +def: InstRW<[SKLWriteResGroup128], (instregex "VPHSUBSWrm128")>; + +def SKLWriteResGroup129 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKLWriteResGroup129], (instregex "PHADDDrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "PHADDWrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "PHSUBDrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "PHSUBWrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "VPHADDDrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "VPHADDWrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "VPHSUBDrm")>; +def: InstRW<[SKLWriteResGroup129], (instregex "VPHSUBWrm")>; + +def SKLWriteResGroup130 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup167], (instregex "DPPDrmi")>; -def: InstRW<[SKLWriteResGroup167], (instregex "VDPPDrmi")>; +def: InstRW<[SKLWriteResGroup130], (instregex "SHLD(16|32|64)mri8")>; +def: InstRW<[SKLWriteResGroup130], (instregex "SHRD(16|32|64)mri8")>; -def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup131 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> { + let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKLWriteResGroup131], (instregex "LAR(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup131], (instregex "LSL(16|32|64)rm")>; + +def SKLWriteResGroup132 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup132], (instregex "AESDECLASTrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "AESDECrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "AESENCLASTrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "AESENCrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "RCPPSm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "RSQRTPSm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VAESDECLASTrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VAESDECrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VAESENCLASTrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VAESENCrm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VRCPPSm")>; +def: InstRW<[SKLWriteResGroup132], (instregex "VRSQRTPSm")>; + +def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup133], (instregex "ADD_F32m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "ADD_F64m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "ILD_F16m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "ILD_F32m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "ILD_F64m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "SUBR_F32m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "SUBR_F64m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "SUB_F32m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "SUB_F64m")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPCMPGTQYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERM2F128rm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERM2I128rm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERMDYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERMPDYmi")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERMPSYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPERMQYmi")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPMOVZXBDYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPMOVZXBQYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPMOVZXBWYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPMOVZXDQYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPMOVZXWQYrm")>; +def: InstRW<[SKLWriteResGroup133], (instregex "VPSADBWYrm")>; + +def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup134], (instregex "ADDPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "ADDPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "ADDSUBPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "ADDSUBPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "MULPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "MULPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "SUBPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "SUBPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VADDPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VADDPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VADDSUBPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VADDSUBPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADD231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMADDSUB231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUB231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFMSUBADD231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMADD231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB132PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB132PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB213PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB213PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB231PDm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VFNMSUB231PSm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VMULPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VMULPSrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VSUBPDrm")>; +def: InstRW<[SKLWriteResGroup134], (instregex "VSUBPSrm")>; + +def SKLWriteResGroup135 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup135], (instregex "CMPPDrmi")>; +def: InstRW<[SKLWriteResGroup135], (instregex "CMPPSrmi")>; +def: InstRW<[SKLWriteResGroup135], (instregex "CVTDQ2PSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "CVTPS2DQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "CVTSS2SDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "CVTTPS2DQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MAXPDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MAXPSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MINPDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "MINPSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMADDUBSWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMADDWDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULDQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULHRSWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULHUWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULHWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULLWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "PMULUDQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCMPPDrmi")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCMPPSrmi")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCVTDQ2PSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCVTPH2PSYrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCVTPS2DQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCVTSS2SDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMAXPDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMAXPSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMINPDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VMINPSrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMADDUBSWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMADDWDrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULDQrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULHRSWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULHUWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULHWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULLWrm")>; +def: InstRW<[SKLWriteResGroup135], (instregex "VPMULUDQrm")>; + +def SKLWriteResGroup136 : SchedWriteRes<[SKLPort0]> { let Latency = 10; let NumMicroOps = 3; let ResourceCycles = [3]; } -def: InstRW<[SKLWriteResGroup169], (instregex "PCMPISTRIrr")>; -def: InstRW<[SKLWriteResGroup169], (instregex "PCMPISTRM128rr")>; -def: InstRW<[SKLWriteResGroup169], (instregex "VPCMPISTRIrr")>; -def: InstRW<[SKLWriteResGroup169], (instregex "VPCMPISTRM128rr")>; +def: InstRW<[SKLWriteResGroup136], (instregex "PCMPISTRIrr")>; +def: InstRW<[SKLWriteResGroup136], (instregex "PCMPISTRM128rr")>; +def: InstRW<[SKLWriteResGroup136], (instregex "VPCMPISTRIrr")>; +def: InstRW<[SKLWriteResGroup136], (instregex "VPCMPISTRM128rr")>; -def SKLWriteResGroup170 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup137 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKLWriteResGroup137], (instregex "MPSADBWrmi")>; +def: InstRW<[SKLWriteResGroup137], (instregex "VMPSADBWrmi")>; + +def SKLWriteResGroup138 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup138], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[SKLWriteResGroup138], (instregex "VPTESTYrm")>; + +def SKLWriteResGroup139 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup139], (instregex "CVTSD2SSrm")>; +def: InstRW<[SKLWriteResGroup139], (instregex "VCVTSD2SSrm")>; + +def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { let Latency = 10; let NumMicroOps = 4; - let ResourceCycles = [3,1]; + let ResourceCycles = [2,1,1]; } -def: InstRW<[SKLWriteResGroup170], (instregex "PCMPISTRIrm")>; -def: InstRW<[SKLWriteResGroup170], (instregex "PCMPISTRM128rm")>; -def: InstRW<[SKLWriteResGroup170], (instregex "VPCMPISTRIrm")>; -def: InstRW<[SKLWriteResGroup170], (instregex "VPCMPISTRM128rm")>; +def: InstRW<[SKLWriteResGroup140], (instregex "VPHADDSWrm256")>; +def: InstRW<[SKLWriteResGroup140], (instregex "VPHSUBSWrm256")>; -def SKLWriteResGroup171 : SchedWriteRes<[SKLPort05,SKLPort0156]> { +def SKLWriteResGroup141 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { let Latency = 10; - let NumMicroOps = 10; - let ResourceCycles = [9,1]; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKLWriteResGroup141], (instregex "VPHADDDYrm")>; +def: InstRW<[SKLWriteResGroup141], (instregex "VPHADDWYrm")>; +def: InstRW<[SKLWriteResGroup141], (instregex "VPHSUBDYrm")>; +def: InstRW<[SKLWriteResGroup141], (instregex "VPHSUBWYrm")>; + +def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKLWriteResGroup142], (instregex "MULX32rm")>; + +def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 10; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,3]; } -def: InstRW<[SKLWriteResGroup171], (instregex "MMX_EMMS")>; +def: InstRW<[SKLWriteResGroup143], (instregex "ADD8mi")>; +def: InstRW<[SKLWriteResGroup143], (instregex "AND8mi")>; +def: InstRW<[SKLWriteResGroup143], (instregex "OR8mi")>; +def: InstRW<[SKLWriteResGroup143], (instregex "SUB8mi")>; +def: InstRW<[SKLWriteResGroup143], (instregex "XCHG(16|32|64)rm")>; +def: InstRW<[SKLWriteResGroup143], (instregex "XCHG8rm")>; +def: InstRW<[SKLWriteResGroup143], (instregex "XOR8mi")>; -def SKLWriteResGroup172 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { +def SKLWriteResGroup144 : SchedWriteRes<[SKLPort05,SKLPort0156]> { let Latency = 10; let NumMicroOps = 10; - let ResourceCycles = [1,1,1,5,1,1]; + let ResourceCycles = [9,1]; } -def: InstRW<[SKLWriteResGroup172], (instregex "RCL(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup172], (instregex "RCL8mCL")>; +def: InstRW<[SKLWriteResGroup144], (instregex "MMX_EMMS")>; -def SKLWriteResGroup173 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup145 : SchedWriteRes<[SKLPort0]> { let Latency = 11; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup173], (instregex "DIVPSrr")>; -def: InstRW<[SKLWriteResGroup173], (instregex "DIVSSrr")>; -def: InstRW<[SKLWriteResGroup173], (instregex "VDIVPSYrr")>; -def: InstRW<[SKLWriteResGroup173], (instregex "VDIVPSrr")>; -def: InstRW<[SKLWriteResGroup173], (instregex "VDIVSSrr")>; +def: InstRW<[SKLWriteResGroup145], (instregex "DIVPSrr")>; +def: InstRW<[SKLWriteResGroup145], (instregex "DIVSSrr")>; +def: InstRW<[SKLWriteResGroup145], (instregex "VDIVPSYrr")>; +def: InstRW<[SKLWriteResGroup145], (instregex "VDIVPSrr")>; +def: InstRW<[SKLWriteResGroup145], (instregex "VDIVSSrr")>; -def SKLWriteResGroup174 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup146 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 11; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup174], (instregex "DIVPSrm")>; -def: InstRW<[SKLWriteResGroup174], (instregex "DIVSSrm")>; -def: InstRW<[SKLWriteResGroup174], (instregex "VDIVPSYrm")>; -def: InstRW<[SKLWriteResGroup174], (instregex "VDIVPSrm")>; -def: InstRW<[SKLWriteResGroup174], (instregex "VDIVSSrm")>; +def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F32m")>; +def: InstRW<[SKLWriteResGroup146], (instregex "MUL_F64m")>; +def: InstRW<[SKLWriteResGroup146], (instregex "VRCPPSYm")>; +def: InstRW<[SKLWriteResGroup146], (instregex "VRSQRTPSYm")>; -def SKLWriteResGroup175 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup147 : SchedWriteRes<[SKLPort01,SKLPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup147], (instregex "VADDPDYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VADDPSYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VADDSUBPDYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VADDSUBPSYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADD231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMADDSUB231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUB231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFMSUBADD231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMADD231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB132PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB132PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB213PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB213PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB231PDYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VFNMSUB231PSYm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VMULPDYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VMULPSYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VSUBPDYrm")>; +def: InstRW<[SKLWriteResGroup147], (instregex "VSUBPSYrm")>; + +def SKLWriteResGroup148 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup148], (instregex "VCMPPDYrmi")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCMPPSYrmi")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCVTPS2PDYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VCVTTPS2DQYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMAXPDYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMAXPSYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMINPDYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VMINPSYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMADDUBSWYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMADDWDYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULDQYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULHRSWYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULHUWYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULHWYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULLWYrm")>; +def: InstRW<[SKLWriteResGroup148], (instregex "VPMULUDQYrm")>; + +def SKLWriteResGroup149 : SchedWriteRes<[SKLPort5,SKLPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKLWriteResGroup149], (instregex "FICOM16m")>; +def: InstRW<[SKLWriteResGroup149], (instregex "FICOM32m")>; +def: InstRW<[SKLWriteResGroup149], (instregex "FICOMP16m")>; +def: InstRW<[SKLWriteResGroup149], (instregex "FICOMP32m")>; +def: InstRW<[SKLWriteResGroup149], (instregex "VMPSADBWYrmi")>; + +def SKLWriteResGroup150 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup150], (instregex "CVTDQ2PDrm")>; +def: InstRW<[SKLWriteResGroup150], (instregex "VCVTDQ2PDrm")>; + +def SKLWriteResGroup151 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup151], (instregex "CVTSD2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTSD2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTSS2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTSS2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTTSD2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTTSD2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "CVTTSS2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTSD2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTSD2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTSS2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTSS2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTTSD2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTTSD2SIrm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTTSS2SI64rm")>; +def: InstRW<[SKLWriteResGroup151], (instregex "VCVTTSS2SIrm")>; + +def SKLWriteResGroup152 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2DQrm")>; +def: InstRW<[SKLWriteResGroup152], (instregex "CVTPD2PSrm")>; +def: InstRW<[SKLWriteResGroup152], (instregex "CVTTPD2DQrm")>; +def: InstRW<[SKLWriteResGroup152], (instregex "MMX_CVTPD2PIirm")>; +def: InstRW<[SKLWriteResGroup152], (instregex "MMX_CVTTPD2PIirm")>; + +def SKLWriteResGroup153 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 11; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} +def: InstRW<[SKLWriteResGroup153], (instregex "SHLD(16|32|64)mrCL")>; +def: InstRW<[SKLWriteResGroup153], (instregex "SHRD(16|32|64)mrCL")>; + +def SKLWriteResGroup154 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 11; let NumMicroOps = 7; let ResourceCycles = [2,3,2]; } -def: InstRW<[SKLWriteResGroup175], (instregex "RCL(16|32|64)rCL")>; -def: InstRW<[SKLWriteResGroup175], (instregex "RCR(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup154], (instregex "RCL(16|32|64)rCL")>; +def: InstRW<[SKLWriteResGroup154], (instregex "RCR(16|32|64)rCL")>; -def SKLWriteResGroup176 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort0156]> { +def SKLWriteResGroup155 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 11; let NumMicroOps = 9; let ResourceCycles = [1,5,1,2]; } -def: InstRW<[SKLWriteResGroup176], (instregex "RCL8rCL")>; +def: InstRW<[SKLWriteResGroup155], (instregex "RCL8rCL")>; -def SKLWriteResGroup177 : SchedWriteRes<[SKLPort06,SKLPort0156]> { +def SKLWriteResGroup156 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 11; let NumMicroOps = 11; let ResourceCycles = [2,9]; } -def: InstRW<[SKLWriteResGroup177], (instregex "LOOPE")>; -def: InstRW<[SKLWriteResGroup177], (instregex "LOOPNE")>; +def: InstRW<[SKLWriteResGroup156], (instregex "LOOPE")>; +def: InstRW<[SKLWriteResGroup156], (instregex "LOOPNE")>; -def SKLWriteResGroup178 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { - let Latency = 11; - let NumMicroOps = 14; - let ResourceCycles = [1,1,1,4,2,5]; -} -def: InstRW<[SKLWriteResGroup178], (instregex "CMPXCHG8B")>; - -def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup157 : SchedWriteRes<[SKLPort0]> { let Latency = 12; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup179], (instregex "VSQRTPSYr")>; -def: InstRW<[SKLWriteResGroup179], (instregex "VSQRTPSr")>; -def: InstRW<[SKLWriteResGroup179], (instregex "VSQRTSSr")>; +def: InstRW<[SKLWriteResGroup157], (instregex "VSQRTPSYr")>; +def: InstRW<[SKLWriteResGroup157], (instregex "VSQRTPSr")>; +def: InstRW<[SKLWriteResGroup157], (instregex "VSQRTSSr")>; -def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup158 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 12; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup180], (instregex "VSQRTPSYm")>; -def: InstRW<[SKLWriteResGroup180], (instregex "VSQRTPSm")>; -def: InstRW<[SKLWriteResGroup180], (instregex "VSQRTSSm")>; +def: InstRW<[SKLWriteResGroup158], (instregex "PCLMULQDQrm")>; +def: InstRW<[SKLWriteResGroup158], (instregex "VPCLMULQDQrm")>; -def SKLWriteResGroup181 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup159 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKLWriteResGroup159], (instregex "HADDPDrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "HADDPSrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "HSUBPDrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "HSUBPSrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "VHADDPDrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "VHADDPSrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "VHSUBPDrm")>; +def: InstRW<[SKLWriteResGroup159], (instregex "VHSUBPSrm")>; + +def SKLWriteResGroup160 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKLWriteResGroup160], (instregex "CVTTSS2SI64rm")>; + +def SKLWriteResGroup161 : SchedWriteRes<[SKLPort0]> { let Latency = 13; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup181], (instregex "SQRTPSr")>; -def: InstRW<[SKLWriteResGroup181], (instregex "SQRTSSr")>; +def: InstRW<[SKLWriteResGroup161], (instregex "SQRTPSr")>; +def: InstRW<[SKLWriteResGroup161], (instregex "SQRTSSr")>; -def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup162 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 13; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup182], (instregex "SQRTPSm")>; -def: InstRW<[SKLWriteResGroup182], (instregex "SQRTSSm")>; +def: InstRW<[SKLWriteResGroup162], (instregex "ADD_FI16m")>; +def: InstRW<[SKLWriteResGroup162], (instregex "ADD_FI32m")>; +def: InstRW<[SKLWriteResGroup162], (instregex "SUBR_FI16m")>; +def: InstRW<[SKLWriteResGroup162], (instregex "SUBR_FI32m")>; +def: InstRW<[SKLWriteResGroup162], (instregex "SUB_FI16m")>; +def: InstRW<[SKLWriteResGroup162], (instregex "SUB_FI32m")>; -def SKLWriteResGroup187 : SchedWriteRes<[SKLPort5,SKLPort015]> { +def SKLWriteResGroup163 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 13; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup187], (instregex "DPPSrri")>; -def: InstRW<[SKLWriteResGroup187], (instregex "VDPPSYrri")>; -def: InstRW<[SKLWriteResGroup187], (instregex "VDPPSrri")>; +def: InstRW<[SKLWriteResGroup163], (instregex "VCVTDQ2PDYrm")>; -def SKLWriteResGroup188 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup164 : SchedWriteRes<[SKLPort5,SKLPort015]> { let Latency = 13; - let NumMicroOps = 5; - let ResourceCycles = [1,1,3]; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; } -def: InstRW<[SKLWriteResGroup188], (instregex "DPPSrmi")>; -def: InstRW<[SKLWriteResGroup188], (instregex "VDPPSYrmi")>; -def: InstRW<[SKLWriteResGroup188], (instregex "VDPPSrmi")>; +def: InstRW<[SKLWriteResGroup164], (instregex "DPPSrri")>; +def: InstRW<[SKLWriteResGroup164], (instregex "VDPPSYrri")>; +def: InstRW<[SKLWriteResGroup164], (instregex "VDPPSrri")>; -def SKLWriteResGroup189 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { +def SKLWriteResGroup165 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { let Latency = 13; - let NumMicroOps = 11; - let ResourceCycles = [2,1,1,4,1,2]; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; } -def: InstRW<[SKLWriteResGroup189], (instregex "RCR(16|32|64)mCL")>; -def: InstRW<[SKLWriteResGroup189], (instregex "RCR8mCL")>; +def: InstRW<[SKLWriteResGroup165], (instregex "VHADDPDYrm")>; +def: InstRW<[SKLWriteResGroup165], (instregex "VHADDPSYrm")>; +def: InstRW<[SKLWriteResGroup165], (instregex "VHSUBPDYrm")>; +def: InstRW<[SKLWriteResGroup165], (instregex "VHSUBPSYrm")>; -def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup166 : SchedWriteRes<[SKLPort0]> { let Latency = 14; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup190], (instregex "DIVPDrr")>; -def: InstRW<[SKLWriteResGroup190], (instregex "DIVSDrr")>; -def: InstRW<[SKLWriteResGroup190], (instregex "VDIVPDYrr")>; -def: InstRW<[SKLWriteResGroup190], (instregex "VDIVPDrr")>; -def: InstRW<[SKLWriteResGroup190], (instregex "VDIVSDrr")>; +def: InstRW<[SKLWriteResGroup166], (instregex "DIVPDrr")>; +def: InstRW<[SKLWriteResGroup166], (instregex "DIVSDrr")>; +def: InstRW<[SKLWriteResGroup166], (instregex "VDIVPDYrr")>; +def: InstRW<[SKLWriteResGroup166], (instregex "VDIVPDrr")>; +def: InstRW<[SKLWriteResGroup166], (instregex "VDIVSDrr")>; -def SKLWriteResGroup191 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup167 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 14; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; } -def: InstRW<[SKLWriteResGroup191], (instregex "DIVPDrm")>; -def: InstRW<[SKLWriteResGroup191], (instregex "DIVSDrm")>; -def: InstRW<[SKLWriteResGroup191], (instregex "VDIVPDYrm")>; -def: InstRW<[SKLWriteResGroup191], (instregex "VDIVPDrm")>; -def: InstRW<[SKLWriteResGroup191], (instregex "VDIVSDrm")>; +def: InstRW<[SKLWriteResGroup167], (instregex "AESIMCrm")>; +def: InstRW<[SKLWriteResGroup167], (instregex "VAESIMCrm")>; -def SKLWriteResGroup192 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort0156]> { +def SKLWriteResGroup168 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup168], (instregex "PMULLDrm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDPDm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDPSm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDSDm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "ROUNDSSm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "VPMULLDrm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDPDm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDPSm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDSDm")>; +def: InstRW<[SKLWriteResGroup168], (instregex "VROUNDSSm")>; + +def SKLWriteResGroup169 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup169], (instregex "MUL_FI16m")>; +def: InstRW<[SKLWriteResGroup169], (instregex "MUL_FI32m")>; + +def SKLWriteResGroup170 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 14; let NumMicroOps = 10; let ResourceCycles = [2,4,1,3]; } -def: InstRW<[SKLWriteResGroup192], (instregex "RCR8rCL")>; +def: InstRW<[SKLWriteResGroup170], (instregex "RCR8rCL")>; -def SKLWriteResGroup193 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup171 : SchedWriteRes<[SKLPort0]> { let Latency = 15; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup193], (instregex "DIVR_FPrST0")>; -def: InstRW<[SKLWriteResGroup193], (instregex "DIVR_FST0r")>; -def: InstRW<[SKLWriteResGroup193], (instregex "DIVR_FrST0")>; +def: InstRW<[SKLWriteResGroup171], (instregex "DIVR_FPrST0")>; +def: InstRW<[SKLWriteResGroup171], (instregex "DIVR_FST0r")>; +def: InstRW<[SKLWriteResGroup171], (instregex "DIVR_FrST0")>; + +def SKLWriteResGroup172 : SchedWriteRes<[SKLPort23,SKLPort015]> { + let Latency = 15; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKLWriteResGroup172], (instregex "VPMULLDYrm")>; +def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDYPDm")>; +def: InstRW<[SKLWriteResGroup172], (instregex "VROUNDYPSm")>; + +def SKLWriteResGroup173 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 15; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SKLWriteResGroup173], (instregex "DPPDrmi")>; +def: InstRW<[SKLWriteResGroup173], (instregex "VDPPDrmi")>; -def SKLWriteResGroup194 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup174 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 15; + let NumMicroOps = 10; + let ResourceCycles = [1,1,1,5,1,1]; +} +def: InstRW<[SKLWriteResGroup174], (instregex "RCL(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup174], (instregex "RCL8mCL")>; + +def SKLWriteResGroup175 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 16; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup194], (instregex "DIV_F32m")>; -def: InstRW<[SKLWriteResGroup194], (instregex "DIV_F64m")>; +def: InstRW<[SKLWriteResGroup175], (instregex "DIVSSrm")>; +def: InstRW<[SKLWriteResGroup175], (instregex "VDIVSSrm")>; -def SKLWriteResGroup195 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 15; - let NumMicroOps = 8; - let ResourceCycles = [1,1,1,1,1,1,2]; +def SKLWriteResGroup176 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 16; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; } -def: InstRW<[SKLWriteResGroup195], (instregex "INSB")>; -def: InstRW<[SKLWriteResGroup195], (instregex "INSL")>; -def: InstRW<[SKLWriteResGroup195], (instregex "INSW")>; +def: InstRW<[SKLWriteResGroup176], (instregex "PCMPISTRIrm")>; +def: InstRW<[SKLWriteResGroup176], (instregex "PCMPISTRM128rm")>; +def: InstRW<[SKLWriteResGroup176], (instregex "VPCMPISTRIrm")>; +def: InstRW<[SKLWriteResGroup176], (instregex "VPCMPISTRM128rm")>; -def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0156]> { +def SKLWriteResGroup177 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { + let Latency = 16; + let NumMicroOps = 14; + let ResourceCycles = [1,1,1,4,2,5]; +} +def: InstRW<[SKLWriteResGroup177], (instregex "CMPXCHG8B")>; + +def SKLWriteResGroup178 : SchedWriteRes<[SKLPort0156]> { let Latency = 16; let NumMicroOps = 16; let ResourceCycles = [16]; } -def: InstRW<[SKLWriteResGroup196], (instregex "VZEROALL")>; +def: InstRW<[SKLWriteResGroup178], (instregex "VZEROALL")>; + +def SKLWriteResGroup179 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 17; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup179], (instregex "DIVPSrm")>; +def: InstRW<[SKLWriteResGroup179], (instregex "VDIVPSrm")>; +def: InstRW<[SKLWriteResGroup179], (instregex "VSQRTSSm")>; -def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { +def SKLWriteResGroup180 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { let Latency = 17; let NumMicroOps = 15; let ResourceCycles = [2,1,2,4,2,4]; } -def: InstRW<[SKLWriteResGroup197], (instregex "XCH_F")>; +def: InstRW<[SKLWriteResGroup180], (instregex "XCH_F")>; -def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup181 : SchedWriteRes<[SKLPort0]> { let Latency = 18; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup198], (instregex "VSQRTPDYr")>; -def: InstRW<[SKLWriteResGroup198], (instregex "VSQRTPDr")>; -def: InstRW<[SKLWriteResGroup198], (instregex "VSQRTSDr")>; +def: InstRW<[SKLWriteResGroup181], (instregex "VSQRTPDYr")>; +def: InstRW<[SKLWriteResGroup181], (instregex "VSQRTPDr")>; +def: InstRW<[SKLWriteResGroup181], (instregex "VSQRTSDr")>; -def SKLWriteResGroup199 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup182 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 18; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup199], (instregex "VSQRTPDYm")>; -def: InstRW<[SKLWriteResGroup199], (instregex "VSQRTPDm")>; -def: InstRW<[SKLWriteResGroup199], (instregex "VSQRTSDm")>; - -def SKLWriteResGroup200 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { - let Latency = 18; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup200], (instregex "DIV_FI16m")>; -def: InstRW<[SKLWriteResGroup200], (instregex "DIV_FI32m")>; +def: InstRW<[SKLWriteResGroup182], (instregex "SQRTSSm")>; +def: InstRW<[SKLWriteResGroup182], (instregex "VDIVPSYrm")>; +def: InstRW<[SKLWriteResGroup182], (instregex "VSQRTPSm")>; -def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort0156]> { +def SKLWriteResGroup183 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort0156]> { let Latency = 18; let NumMicroOps = 8; let ResourceCycles = [4,3,1]; } -def: InstRW<[SKLWriteResGroup201], (instregex "PCMPESTRIrr")>; -def: InstRW<[SKLWriteResGroup201], (instregex "VPCMPESTRIrr")>; +def: InstRW<[SKLWriteResGroup183], (instregex "PCMPESTRIrr")>; +def: InstRW<[SKLWriteResGroup183], (instregex "VPCMPESTRIrr")>; -def SKLWriteResGroup202 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup184 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 18; let NumMicroOps = 8; let ResourceCycles = [1,1,1,5]; } -def: InstRW<[SKLWriteResGroup202], (instregex "CPUID")>; -def: InstRW<[SKLWriteResGroup202], (instregex "RDTSC")>; +def: InstRW<[SKLWriteResGroup184], (instregex "CPUID")>; +def: InstRW<[SKLWriteResGroup184], (instregex "RDTSC")>; -def SKLWriteResGroup203 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> { +def SKLWriteResGroup185 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort237,SKLPort06,SKLPort15,SKLPort0156]> { let Latency = 18; - let NumMicroOps = 9; - let ResourceCycles = [4,3,1,1]; -} -def: InstRW<[SKLWriteResGroup203], (instregex "PCMPESTRIrm")>; -def: InstRW<[SKLWriteResGroup203], (instregex "VPCMPESTRIrm")>; - -def SKLWriteResGroup204 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 18; - let NumMicroOps = 19; - let ResourceCycles = [2,1,4,1,1,4,6]; + let NumMicroOps = 11; + let ResourceCycles = [2,1,1,4,1,2]; } -def: InstRW<[SKLWriteResGroup204], (instregex "CMPXCHG16B")>; +def: InstRW<[SKLWriteResGroup185], (instregex "RCR(16|32|64)mCL")>; +def: InstRW<[SKLWriteResGroup185], (instregex "RCR8mCL")>; -def SKLWriteResGroup205 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015,SKLPort0156]> { +def SKLWriteResGroup186 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 19; - let NumMicroOps = 9; - let ResourceCycles = [4,3,1,1]; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup205], (instregex "PCMPESTRM128rr")>; -def: InstRW<[SKLWriteResGroup205], (instregex "VPCMPESTRM128rr")>; +def: InstRW<[SKLWriteResGroup186], (instregex "DIVSDrm")>; +def: InstRW<[SKLWriteResGroup186], (instregex "SQRTPSm")>; +def: InstRW<[SKLWriteResGroup186], (instregex "VDIVSDrm")>; +def: InstRW<[SKLWriteResGroup186], (instregex "VSQRTPSYm")>; -def SKLWriteResGroup206 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015,SKLPort0156]> { +def SKLWriteResGroup187 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { let Latency = 19; - let NumMicroOps = 10; - let ResourceCycles = [4,3,1,1,1]; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; } -def: InstRW<[SKLWriteResGroup206], (instregex "PCMPESTRM128rm")>; -def: InstRW<[SKLWriteResGroup206], (instregex "VPCMPESTRM128rm")>; +def: InstRW<[SKLWriteResGroup187], (instregex "DPPSrmi")>; +def: InstRW<[SKLWriteResGroup187], (instregex "VDPPSrmi")>; -def SKLWriteResGroup207 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015]> { +def SKLWriteResGroup188 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015,SKLPort0156]> { let Latency = 19; - let NumMicroOps = 11; - let ResourceCycles = [3,6,1,1]; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; } -def: InstRW<[SKLWriteResGroup207], (instregex "AESKEYGENASSIST128rm")>; -def: InstRW<[SKLWriteResGroup207], (instregex "VAESKEYGENASSIST128rm")>; +def: InstRW<[SKLWriteResGroup188], (instregex "PCMPESTRM128rr")>; +def: InstRW<[SKLWriteResGroup188], (instregex "VPCMPESTRM128rr")>; -def SKLWriteResGroup208 : SchedWriteRes<[SKLPort0]> { +def SKLWriteResGroup189 : SchedWriteRes<[SKLPort0]> { let Latency = 20; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup208], (instregex "DIV_FPrST0")>; -def: InstRW<[SKLWriteResGroup208], (instregex "DIV_FST0r")>; -def: InstRW<[SKLWriteResGroup208], (instregex "DIV_FrST0")>; -def: InstRW<[SKLWriteResGroup208], (instregex "SQRTPDr")>; -def: InstRW<[SKLWriteResGroup208], (instregex "SQRTSDr")>; +def: InstRW<[SKLWriteResGroup189], (instregex "DIV_FPrST0")>; +def: InstRW<[SKLWriteResGroup189], (instregex "DIV_FST0r")>; +def: InstRW<[SKLWriteResGroup189], (instregex "DIV_FrST0")>; +def: InstRW<[SKLWriteResGroup189], (instregex "SQRTPDr")>; +def: InstRW<[SKLWriteResGroup189], (instregex "SQRTSDr")>; -def SKLWriteResGroup209 : SchedWriteRes<[SKLPort0,SKLPort23]> { +def SKLWriteResGroup190 : SchedWriteRes<[SKLPort0,SKLPort23]> { let Latency = 20; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup209], (instregex "DIVR_F32m")>; -def: InstRW<[SKLWriteResGroup209], (instregex "DIVR_F64m")>; -def: InstRW<[SKLWriteResGroup209], (instregex "SQRTPDm")>; -def: InstRW<[SKLWriteResGroup209], (instregex "SQRTSDm")>; +def: InstRW<[SKLWriteResGroup190], (instregex "DIVPDrm")>; +def: InstRW<[SKLWriteResGroup190], (instregex "VDIVPDrm")>; + +def SKLWriteResGroup191 : SchedWriteRes<[SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 20; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKLWriteResGroup191], (instregex "VDPPSYrmi")>; + +def SKLWriteResGroup192 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 20; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,1,2]; +} +def: InstRW<[SKLWriteResGroup192], (instregex "INSB")>; +def: InstRW<[SKLWriteResGroup192], (instregex "INSL")>; +def: InstRW<[SKLWriteResGroup192], (instregex "INSW")>; -def SKLWriteResGroup210 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort0156]> { +def SKLWriteResGroup193 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort0156]> { let Latency = 20; let NumMicroOps = 10; let ResourceCycles = [1,2,7]; } -def: InstRW<[SKLWriteResGroup210], (instregex "MWAITrr")>; +def: InstRW<[SKLWriteResGroup193], (instregex "MWAITrr")>; -def SKLWriteResGroup211 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015]> { +def SKLWriteResGroup194 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort015]> { let Latency = 20; let NumMicroOps = 11; let ResourceCycles = [3,6,2]; } -def: InstRW<[SKLWriteResGroup211], (instregex "AESKEYGENASSIST128rr")>; -def: InstRW<[SKLWriteResGroup211], (instregex "VAESKEYGENASSIST128rr")>; +def: InstRW<[SKLWriteResGroup194], (instregex "AESKEYGENASSIST128rr")>; +def: InstRW<[SKLWriteResGroup194], (instregex "VAESKEYGENASSIST128rr")>; -def SKLWriteResGroup212 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { - let Latency = 17; +def SKLWriteResGroup195 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 21; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup195], (instregex "VDIVPDYrm")>; + +def SKLWriteResGroup196 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 22; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup196], (instregex "DIV_F32m")>; +def: InstRW<[SKLWriteResGroup196], (instregex "DIV_F64m")>; + +def SKLWriteResGroup196_1 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { + let Latency = 22; let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERDPSrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERDPDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERQPDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERQPSrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERDDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERDQrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERQDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERQQrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERDDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERQDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERDQrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VPGATHERQQrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERDPSrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERQPSrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERDPDrm")>; -def: InstRW<[SKLWriteResGroup212], (instregex "VGATHERQPDrm")>; - -def SKLWriteResGroup213 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { - let Latency = 20; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPSrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPSrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDQrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQQrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERDQrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VPGATHERQQrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPSrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPSrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERDPDrm")>; +def: InstRW<[SKLWriteResGroup196_1], (instregex "VGATHERQPDrm")>; + +def SKLWriteResGroup196_2 : SchedWriteRes<[SKLPort0, SKLPort23, SKLPort5, SKLPort015]> { + let Latency = 25; let NumMicroOps = 5; let ResourceCycles = [1,2,1,1]; } -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERDPSYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERQPDYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERQPSYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERDDYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERDQYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERQDYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERQQYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERDDYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERQDYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERDQYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VPGATHERQQYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERDPSYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERQPSYrm")>; -def: InstRW<[SKLWriteResGroup213], (instregex "VGATHERDPDYrm")>; - -def SKLWriteResGroup215 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERDPSYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERQPDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERQPSYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDQYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQQYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQDYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERDQYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VPGATHERQQYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERDPSYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERQPSYrm")>; +def: InstRW<[SKLWriteResGroup196_2], (instregex "VGATHERDPDYrm")>; + +def SKLWriteResGroup197 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 23; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup197], (instregex "VSQRTSDm")>; + +def SKLWriteResGroup198 : SchedWriteRes<[SKLPort0,SKLPort4,SKLPort5,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 23; + let NumMicroOps = 19; + let ResourceCycles = [2,1,4,1,1,4,6]; +} +def: InstRW<[SKLWriteResGroup198], (instregex "CMPXCHG16B")>; + +def SKLWriteResGroup199 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 24; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup199], (instregex "VSQRTPDm")>; + +def SKLWriteResGroup200 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> { + let Latency = 24; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; +} +def: InstRW<[SKLWriteResGroup200], (instregex "PCMPESTRIrm")>; +def: InstRW<[SKLWriteResGroup200], (instregex "VPCMPESTRIrm")>; + +def SKLWriteResGroup201 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 25; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup201], (instregex "SQRTSDm")>; +def: InstRW<[SKLWriteResGroup201], (instregex "VSQRTPDYm")>; + +def SKLWriteResGroup202 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { + let Latency = 25; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup215], (instregex "DIVR_FI16m")>; -def: InstRW<[SKLWriteResGroup215], (instregex "DIVR_FI32m")>; +def: InstRW<[SKLWriteResGroup202], (instregex "DIV_FI16m")>; +def: InstRW<[SKLWriteResGroup202], (instregex "DIV_FI32m")>; -def SKLWriteResGroup217 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> { - let Latency = 23; +def SKLWriteResGroup203 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015,SKLPort0156]> { + let Latency = 25; + let NumMicroOps = 10; + let ResourceCycles = [4,3,1,1,1]; +} +def: InstRW<[SKLWriteResGroup203], (instregex "PCMPESTRM128rm")>; +def: InstRW<[SKLWriteResGroup203], (instregex "VPCMPESTRM128rm")>; + +def SKLWriteResGroup204 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort015]> { + let Latency = 25; + let NumMicroOps = 11; + let ResourceCycles = [3,6,1,1]; +} +def: InstRW<[SKLWriteResGroup204], (instregex "AESKEYGENASSIST128rm")>; +def: InstRW<[SKLWriteResGroup204], (instregex "VAESKEYGENASSIST128rm")>; + +def SKLWriteResGroup205 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 26; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup205], (instregex "SQRTPDm")>; + +def SKLWriteResGroup206 : SchedWriteRes<[SKLPort0,SKLPort23]> { + let Latency = 27; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKLWriteResGroup206], (instregex "DIVR_F32m")>; +def: InstRW<[SKLWriteResGroup206], (instregex "DIVR_F64m")>; + +def SKLWriteResGroup207 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23,SKLPort0156]> { + let Latency = 28; let NumMicroOps = 8; let ResourceCycles = [2,4,1,1]; } -def: InstRW<[SKLWriteResGroup217], (instregex "IDIV(16|32|64)m")>; -def: InstRW<[SKLWriteResGroup217], (instregex "IDIV8m")>; +def: InstRW<[SKLWriteResGroup207], (instregex "IDIV(16|32|64)m")>; +def: InstRW<[SKLWriteResGroup207], (instregex "IDIV8m")>; -def SKLWriteResGroup222 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup208 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 30; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKLWriteResGroup208], (instregex "DIVR_FI16m")>; +def: InstRW<[SKLWriteResGroup208], (instregex "DIVR_FI32m")>; + +def SKLWriteResGroup209 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort06,SKLPort0156]> { + let Latency = 35; let NumMicroOps = 23; let ResourceCycles = [1,5,3,4,10]; } -def: InstRW<[SKLWriteResGroup222], (instregex "IN32ri")>; -def: InstRW<[SKLWriteResGroup222], (instregex "IN32rr")>; -def: InstRW<[SKLWriteResGroup222], (instregex "IN8ri")>; -def: InstRW<[SKLWriteResGroup222], (instregex "IN8rr")>; +def: InstRW<[SKLWriteResGroup209], (instregex "IN32ri")>; +def: InstRW<[SKLWriteResGroup209], (instregex "IN32rr")>; +def: InstRW<[SKLWriteResGroup209], (instregex "IN8ri")>; +def: InstRW<[SKLWriteResGroup209], (instregex "IN8rr")>; -def SKLWriteResGroup223 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 30; +def SKLWriteResGroup210 : SchedWriteRes<[SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 35; let NumMicroOps = 23; let ResourceCycles = [1,5,2,1,4,10]; } -def: InstRW<[SKLWriteResGroup223], (instregex "OUT32ir")>; -def: InstRW<[SKLWriteResGroup223], (instregex "OUT32rr")>; -def: InstRW<[SKLWriteResGroup223], (instregex "OUT8ir")>; -def: InstRW<[SKLWriteResGroup223], (instregex "OUT8rr")>; +def: InstRW<[SKLWriteResGroup210], (instregex "OUT32ir")>; +def: InstRW<[SKLWriteResGroup210], (instregex "OUT32rr")>; +def: InstRW<[SKLWriteResGroup210], (instregex "OUT8ir")>; +def: InstRW<[SKLWriteResGroup210], (instregex "OUT8rr")>; -def SKLWriteResGroup224 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> { - let Latency = 32; +def SKLWriteResGroup211 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort23,SKLPort0156]> { + let Latency = 37; let NumMicroOps = 31; let ResourceCycles = [1,8,1,21]; } -def: InstRW<[SKLWriteResGroup224], (instregex "XRSTOR(64?)")>; +def: InstRW<[SKLWriteResGroup211], (instregex "XRSTOR(64?)")>; -def SKLWriteResGroup225 : SchedWriteRes<[SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort15,SKLPort0156]> { - let Latency = 35; +def SKLWriteResGroup212 : SchedWriteRes<[SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort23,SKLPort237,SKLPort15,SKLPort0156]> { + let Latency = 40; let NumMicroOps = 18; let ResourceCycles = [1,1,2,3,1,1,1,8]; } -def: InstRW<[SKLWriteResGroup225], (instregex "VMCLEARm")>; +def: InstRW<[SKLWriteResGroup212], (instregex "VMCLEARm")>; -def SKLWriteResGroup226 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 36; +def SKLWriteResGroup213 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 41; let NumMicroOps = 39; let ResourceCycles = [1,10,1,1,26]; } -def: InstRW<[SKLWriteResGroup226], (instregex "XSAVE64")>; +def: InstRW<[SKLWriteResGroup213], (instregex "XSAVE64")>; -def SKLWriteResGroup231 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 37; +def SKLWriteResGroup214 : SchedWriteRes<[SKLPort5,SKLPort0156]> { + let Latency = 42; + let NumMicroOps = 22; + let ResourceCycles = [2,20]; +} +def: InstRW<[SKLWriteResGroup214], (instregex "RDTSCP")>; + +def SKLWriteResGroup215 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 42; let NumMicroOps = 40; let ResourceCycles = [1,11,1,1,26]; } -def: InstRW<[SKLWriteResGroup231], (instregex "XSAVE")>; +def: InstRW<[SKLWriteResGroup215], (instregex "XSAVE")>; -def SKLWriteResGroup232 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { - let Latency = 41; +def SKLWriteResGroup216 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort0156]> { + let Latency = 46; let NumMicroOps = 44; let ResourceCycles = [1,11,1,1,30]; } -def: InstRW<[SKLWriteResGroup232], (instregex "XSAVEOPT")>; - -def SKLWriteResGroup233 : SchedWriteRes<[SKLPort5,SKLPort0156]> { - let Latency = 42; - let NumMicroOps = 22; - let ResourceCycles = [2,20]; -} -def: InstRW<[SKLWriteResGroup233], (instregex "RDTSCP")>; +def: InstRW<[SKLWriteResGroup216], (instregex "XSAVEOPT")>; -def SKLWriteResGroup234 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05,SKLPort06,SKLPort0156]> { - let Latency = 57; +def SKLWriteResGroup217 : SchedWriteRes<[SKLPort0,SKLPort23,SKLPort05,SKLPort06,SKLPort0156]> { + let Latency = 62; let NumMicroOps = 64; let ResourceCycles = [2,8,5,10,39]; } -def: InstRW<[SKLWriteResGroup234], (instregex "FLDENVm")>; -def: InstRW<[SKLWriteResGroup234], (instregex "FLDENVm")>; +def: InstRW<[SKLWriteResGroup217], (instregex "FLDENVm")>; +def: InstRW<[SKLWriteResGroup217], (instregex "FLDENVm")>; -def SKLWriteResGroup235 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> { - let Latency = 58; +def SKLWriteResGroup218 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> { + let Latency = 63; let NumMicroOps = 88; let ResourceCycles = [4,4,31,1,2,1,45]; } -def: InstRW<[SKLWriteResGroup235], (instregex "FXRSTOR64")>; +def: InstRW<[SKLWriteResGroup218], (instregex "FXRSTOR64")>; -def SKLWriteResGroup236 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> { - let Latency = 58; +def SKLWriteResGroup219 : SchedWriteRes<[SKLPort0,SKLPort6,SKLPort23,SKLPort05,SKLPort06,SKLPort15,SKLPort0156]> { + let Latency = 63; let NumMicroOps = 90; let ResourceCycles = [4,2,33,1,2,1,47]; } -def: InstRW<[SKLWriteResGroup236], (instregex "FXRSTOR")>; +def: InstRW<[SKLWriteResGroup219], (instregex "FXRSTOR")>; -def SKLWriteResGroup239 : SchedWriteRes<[SKLPort5,SKLPort05,SKLPort0156]> { +def SKLWriteResGroup220 : SchedWriteRes<[SKLPort5,SKLPort05,SKLPort0156]> { let Latency = 75; let NumMicroOps = 15; let ResourceCycles = [6,3,6]; } -def: InstRW<[SKLWriteResGroup239], (instregex "FNINIT")>; +def: InstRW<[SKLWriteResGroup220], (instregex "FNINIT")>; -def SKLWriteResGroup240 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { +def SKLWriteResGroup221 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort05,SKLPort0156]> { let Latency = 76; let NumMicroOps = 32; let ResourceCycles = [7,2,8,3,1,11]; } -def: InstRW<[SKLWriteResGroup240], (instregex "DIV(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup221], (instregex "DIV(16|32|64)r")>; -def SKLWriteResGroup241 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { +def SKLWriteResGroup222 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort5,SKLPort6,SKLPort06,SKLPort0156]> { let Latency = 102; let NumMicroOps = 66; let ResourceCycles = [4,2,4,8,14,34]; } -def: InstRW<[SKLWriteResGroup241], (instregex "IDIV(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup222], (instregex "IDIV(16|32|64)r")>; -def SKLWriteResGroup242 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort237,SKLPort06,SKLPort0156]> { - let Latency = 105; +def SKLWriteResGroup223 : SchedWriteRes<[SKLPort0,SKLPort1,SKLPort4,SKLPort5,SKLPort6,SKLPort237,SKLPort06,SKLPort0156]> { + let Latency = 106; let NumMicroOps = 100; let ResourceCycles = [9,1,11,16,1,11,21,30]; } -def: InstRW<[SKLWriteResGroup242], (instregex "FSTENVm")>; -def: InstRW<[SKLWriteResGroup242], (instregex "FSTENVm")>; +def: InstRW<[SKLWriteResGroup223], (instregex "FSTENVm")>; +def: InstRW<[SKLWriteResGroup223], (instregex "FSTENVm")>; } // SchedModel diff --git a/lib/Target/X86/X86SchedSkylakeServer.td b/lib/Target/X86/X86SchedSkylakeServer.td new file mode 100755 index 0000000000000..bd80102e096f7 --- /dev/null +++ b/lib/Target/X86/X86SchedSkylakeServer.td @@ -0,0 +1,6949 @@ +//=- X86SchedSkylake.td - X86 Skylake Server Scheduling ------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Skylake Server to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def SkylakeServerModel : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and SKylake can + // decode 6 instructions per cycle. + let IssueWidth = 6; + let MicroOpBufferSize = 224; // Based on the reorder buffer. + let LoadLatency = 5; + let MispredictPenalty = 14; + + // Based on the LSD (loop-stream detector) queue size and benchmarking data. + let LoopMicroOpBufferSize = 50; + + // This flag is set to allow the scheduler to assign a default model to + // unrecognized opcodes. + let CompleteModel = 0; +} + +let SchedModel = SkylakeServerModel in { + +// Skylake Server can issue micro-ops to 8 different ports in one cycle. + +// Ports 0, 1, 5, and 6 handle all computation. +// Port 4 gets the data half of stores. Store data can be available later than +// the store address, but since we don't model the latency of stores, we can +// ignore that. +// Ports 2 and 3 are identical. They handle loads and the address half of +// stores. Port 7 can handle address calculations. +def SKXPort0 : ProcResource<1>; +def SKXPort1 : ProcResource<1>; +def SKXPort2 : ProcResource<1>; +def SKXPort3 : ProcResource<1>; +def SKXPort4 : ProcResource<1>; +def SKXPort5 : ProcResource<1>; +def SKXPort6 : ProcResource<1>; +def SKXPort7 : ProcResource<1>; + +// Many micro-ops are capable of issuing on multiple ports. +def SKXPort01 : ProcResGroup<[SKXPort0, SKXPort1]>; +def SKXPort23 : ProcResGroup<[SKXPort2, SKXPort3]>; +def SKXPort237 : ProcResGroup<[SKXPort2, SKXPort3, SKXPort7]>; +def SKXPort04 : ProcResGroup<[SKXPort0, SKXPort4]>; +def SKXPort05 : ProcResGroup<[SKXPort0, SKXPort5]>; +def SKXPort06 : ProcResGroup<[SKXPort0, SKXPort6]>; +def SKXPort15 : ProcResGroup<[SKXPort1, SKXPort5]>; +def SKXPort16 : ProcResGroup<[SKXPort1, SKXPort6]>; +def SKXPort56 : ProcResGroup<[SKXPort5, SKXPort6]>; +def SKXPort015 : ProcResGroup<[SKXPort0, SKXPort1, SKXPort5]>; +def SKXPort056 : ProcResGroup<[SKXPort0, SKXPort5, SKXPort6]>; +def SKXPort0156: ProcResGroup<[SKXPort0, SKXPort1, SKXPort5, SKXPort6]>; + +// 60 Entry Unified Scheduler +def SKXPortAny : ProcResGroup<[SKXPort0, SKXPort1, SKXPort2, SKXPort3, SKXPort4, + SKXPort5, SKXPort6, SKXPort7]> { + let BufferSize=60; +} + +// Loads are 5 cycles, so ReadAfterLd registers needn't be available until 5 +// cycles after the memory operand. +def : ReadAdvance; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when queued in the reservation station. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass SKXWriteResPair { + // Register variant is using a single cycle on ExePort. + def : WriteRes { let Latency = Lat; } + + // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the + // latency. + def : WriteRes { + let Latency = !add(Lat, 5); + } +} + +// A folded store needs a cycle on port 4 for the store data, but it does not +// need an extra port 2/3 cycle to recompute the address. +def : WriteRes; + +// Arithmetic. +defm : SKXWriteResPair; // Simple integer ALU op. +defm : SKXWriteResPair; // Integer multiplication. +def : WriteRes { let Latency = 3; } // Integer multiplication, high part. +def SKXDivider : ProcResource<1>; // Integer division issued on port 0. +def : WriteRes { // Integer division. + let Latency = 25; + let ResourceCycles = [1, 10]; +} +def : WriteRes { + let Latency = 29; + let ResourceCycles = [1, 1, 10]; +} + +def : WriteRes; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm : SKXWriteResPair; + +// Loads, stores, and moves, not folded with other operations. +def : WriteRes { let Latency = 5; } +def : WriteRes; +def : WriteRes; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def : WriteRes; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm : SKXWriteResPair; + +// Floating point. This covers both scalar and vector operations. +defm : SKXWriteResPair; // Floating point add/sub/compare. +defm : SKXWriteResPair; // Floating point multiplication. +defm : SKXWriteResPair; // 10-14 cycles. // Floating point division. +defm : SKXWriteResPair; // Floating point square root. +defm : SKXWriteResPair; // Floating point reciprocal estimate. +defm : SKXWriteResPair; // Floating point reciprocal square root estimate. +// defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. +defm : SKXWriteResPair; // Floating point vector shuffles. +defm : SKXWriteResPair; // Floating point vector blends. +def : WriteRes { // Fp vector variable blends. + let Latency = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +// FMA Scheduling helper class. +// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } + +// Vector integer operations. +defm : SKXWriteResPair; // Vector integer ALU op, no logicals. +defm : SKXWriteResPair; // Vector integer shifts. +defm : SKXWriteResPair; // Vector integer multiply. +defm : SKXWriteResPair; // Vector shuffles. +defm : SKXWriteResPair; // Vector blends. + +def : WriteRes { // Vector variable blends. + let Latency = 2; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1]; +} + +def : WriteRes { // Vector MPSAD. + let Latency = 6; + let ResourceCycles = [1, 2]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [1, 1, 2]; +} + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm : SKXWriteResPair; // Vector and/or/xor. + +// Conversion between integer and float. +defm : SKXWriteResPair; // Float -> Integer. +defm : SKXWriteResPair; // Integer -> Float. +defm : SKXWriteResPair; // Float -> Float size conversion. + +// Strings instructions. +// Packed Compare Implicit Length Strings, Return Mask +// String instructions. +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3, 1]; +} +// Packed Compare Explicit Length Strings, Return Mask +def : WriteRes { + let Latency = 10; + let ResourceCycles = [3, 2, 4]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [6, 2, 1]; +} + // Packed Compare Implicit Length Strings, Return Index +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3]; +} +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3, 1]; +} +// Packed Compare Explicit Length Strings, Return Index +def : WriteRes { + let Latency = 11; + let ResourceCycles = [6, 2]; +} +def : WriteRes { + let Latency = 11; + let ResourceCycles = [3, 2, 2, 1]; +} + +// AES instructions. +def : WriteRes { // Decryption, encryption. + let Latency = 7; + let ResourceCycles = [1]; +} +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} +def : WriteRes { // InvMixColumn. + let Latency = 14; + let ResourceCycles = [2]; +} +def : WriteRes { + let Latency = 14; + let ResourceCycles = [2, 1]; +} +def : WriteRes { // Key Generation. + let Latency = 10; + let ResourceCycles = [2, 8]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [2, 7, 1]; +} + +// Carry-less multiplication instructions. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [2, 1]; +} +def : WriteRes { + let Latency = 7; + let ResourceCycles = [2, 1, 1]; +} + +// Catch-all for expensive system instructions. +def : WriteRes { let Latency = 100; } // def WriteSystem : SchedWrite; + +// AVX2. +defm : SKXWriteResPair; // Fp 256-bit width vector shuffles. +defm : SKXWriteResPair; // 256-bit width vector shuffles. +def : WriteRes { // Variable vector shifts. + let Latency = 2; + let ResourceCycles = [2, 1]; +} +def : WriteRes { + let Latency = 6; + let ResourceCycles = [2, 1, 1]; +} + +// Old microcoded instructions that nobody use. +def : WriteRes { let Latency = 100; } // def WriteMicrocoded : SchedWrite; + +// Fence instructions. +def : WriteRes; + +// Nop, not very useful expect it provides a model for nops! +def : WriteRes; + +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes; + +// v <- v,m. +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + +// Remaining instrs. + +def SKXWriteResGroup1 : SchedWriteRes<[SKXPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup1], (instregex "KANDBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDNBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDNDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDNQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDNWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KANDWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KMOVBkk")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KMOVDkk")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KMOVQkk")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KMOVWkk")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KNOTBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KNOTDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KNOTQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KNOTWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KORBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KORDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KORQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KORWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXNORBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXNORDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXNORQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXNORWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXORBrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXORDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXORQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "KXORWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PADDSBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PADDSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PADDUSBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PADDUSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PAVGBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PAVGWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPEQBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPEQDirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPEQWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPGTBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPGTDirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PCMPGTWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PMAXSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PMAXUBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PMINSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PMINUBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLDri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLQri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLWri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSLLWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRADri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRADrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRAWri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRAWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLDri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLDrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLQri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLQrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLWri")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSRLWrr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSUBSBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSUBSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSUBUSBirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "MMX_PSUBUSWirr")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVB2MZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVB2MZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVB2MZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVD2MZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVD2MZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVD2MZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVQ2MZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVQ2MZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVQ2MZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVW2MZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVW2MZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup1], (instregex "VPMOVW2MZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup2 : SchedWriteRes<[SKXPort1]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup2], (instregex "MMX_MASKMOVQ64")>; + +def SKXWriteResGroup3 : SchedWriteRes<[SKXPort5]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup3], (instregex "COMP_FST0r")>; +def: InstRW<[SKXWriteResGroup3], (instregex "COM_FST0r")>; +def: InstRW<[SKXWriteResGroup3], (instregex "INSERTPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "KMOVBkr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "KMOVDkr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "KMOVQkr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "KMOVWkr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_MOVD64rr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_MOVD64to64rr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PALIGNR64irr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PSHUFBrr64")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PSHUFWri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKHBWirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKHDQirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKHWDirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKLBWirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKLDQirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MMX_PUNPCKLWDirr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOV64toPQIrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVDDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVDI2PDIrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVHLPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVLHPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVSDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVSHDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVSLDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVUPDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "MOVUPSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PACKSSDWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PACKSSWBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PACKUSDWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PACKUSWBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PALIGNRrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PBLENDWrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXBDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXBQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVSXWQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXBDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXBQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PMOVZXWQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSHUFBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSHUFDri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSHUFHWri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSHUFLWri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSLLDQri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PSRLDQri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKHBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKHDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKHQDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKHWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKLBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKLDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKLQDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "PUNPCKLWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "SHUFPDrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "SHUFPSrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UCOM_FPr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UCOM_Fr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UNPCKHPDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UNPCKHPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UNPCKLPDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "UNPCKLPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VBROADCASTI32X2Z128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VBROADCASTSSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VINSERTPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VINSERTPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOV64toPQIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOV64toPQIrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDDUPYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDDUPZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDDUPZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDDUPZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDI2PDIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVDI2PDIrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVHLPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVHLPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVLHPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVLHPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSHDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSLDUPrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVSSZrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPDYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPSYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VMOVUPSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSDWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSWBYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSWBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSWBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSWBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKSSWBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSDWYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSDWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSWBYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSWBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSWBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSWBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPACKUSWBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPALIGNRYrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPALIGNRZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPALIGNRZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPALIGNRZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPALIGNRrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPBLENDWYrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPBLENDWrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPBROADCASTDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPBROADCASTQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPERMILPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXBDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXBQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVSXWQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXBDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXBQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPMOVZXWQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFBYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFBrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFDri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFHWri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSHUFLWri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQZ512rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSLLDQri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQYri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQZ512rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPSRLDQri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHDQYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHQDQYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHQDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHQDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHQDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHQDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHWDYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHWDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHWDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKHWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLBWYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLBWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLBWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLBWrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLDQYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLQDQrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VPUNPCKLWDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDYrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPDrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPSYrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VSHUFPSrri")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPDYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPSYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKHPSrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPDYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPDrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPSYrr")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup3], (instregex "VUNPCKLPSrr")>; + +def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup4], (instregex "JMP(16|32|64)r")>; + +def SKXWriteResGroup5 : SchedWriteRes<[SKXPort01]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup5], (instregex "PABSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PABSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PABSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PADDSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PADDSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PADDUSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PADDUSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PAVGBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PAVGWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPEQBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPEQDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPEQQrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPEQWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPGTBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPGTDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PCMPGTWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXUBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXUDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMAXUWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINUBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINUDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PMINUWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSIGNBrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSIGNDrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSIGNWrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSLLDri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSLLQri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSLLWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSRADri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSRAWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSRLDri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSRLQri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSRLWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSUBSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSUBSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSUBUSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "PSUBUSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPABSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPADDUSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPAVGWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQQYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQQrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPEQWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPCMPGTWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMAXUWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPMINUWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLQZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPROLVQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORQZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPRORVQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNBYrr256")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNBrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNDYrr256")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNDrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNWYrr256")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSIGNWrr128")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLDYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLDri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLQYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLQZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLQri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVQYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVQrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLVWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLWYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLWZ128ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLWZ256ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLWZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSLLWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRADYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRADZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRADZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRADZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRADri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAQZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAVWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAWYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAWZ128ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAWZ256ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAWZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRAWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLDYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLDZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLDri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLQYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLQZ128r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLQri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVDYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVDrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVQYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVQrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLVWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLWYri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLWZ128ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLWZ256ri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLWZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSRLWri")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBSWrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSBYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSBrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSWYrr")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup5], (instregex "VPSUBUSWrr")>; + +def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup6], (instregex "FINCSTP")>; +def: InstRW<[SKXWriteResGroup6], (instregex "FNOP")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PABSBrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PABSDrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PABSWrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PADDBirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PADDDirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PADDQirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PADDWirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PANDNirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PANDirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PORirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSIGNBrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSIGNDrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSIGNWrr64")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSUBBirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSUBDirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSUBQirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PSUBWirr")>; +def: InstRW<[SKXWriteResGroup6], (instregex "MMX_PXORirr")>; + +def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup7], (instregex "ADC(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADC(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADC8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADCX32rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADCX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADOX32rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "ADOX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTC(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTC(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTR(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTR(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTS(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "BTS(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CDQ")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CLAC")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVAE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVB(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVG(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVGE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVL(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVLE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNO(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNP(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVNS(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVO(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVP(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CMOVS(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "CQO")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JAE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JAE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JA_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JA_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JBE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JBE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JB_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JB_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JGE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JGE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JG_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JG_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JLE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JLE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JL_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JL_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JMP_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JMP_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNE_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNE_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNO_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNO_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNP_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNP_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNS_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JNS_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JO_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JO_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JP_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JP_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JS_1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "JS_4")>; +def: InstRW<[SKXWriteResGroup7], (instregex "RORX32ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "RORX64ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SAR(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SAR(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SAR8r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SAR8ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SARX32rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SARX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SBB(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SBB(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SBB8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETAEr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETBr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETEr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETGEr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETGr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETLEr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETLr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETNEr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETNOr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETNPr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETNSr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETOr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETPr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SETSr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHL(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHL(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHL8r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHL8ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHLX32rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHLX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHR(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHR(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHR8r1")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHR8ri")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHRX32rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "SHRX64rr")>; +def: InstRW<[SKXWriteResGroup7], (instregex "STAC")>; + +def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup8], (instregex "ANDN32rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "ANDN64rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSI32rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSI64rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSMSK32rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSMSK64rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSR32rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BLSR64rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BZHI32rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "BZHI64rr")>; +def: InstRW<[SKXWriteResGroup8], (instregex "LEA(16|32|64)r")>; + +def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "ANDNPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "ANDPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "ANDPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "BLENDPDrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "BLENDPSrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVAPDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVAPSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVDQArr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVDQUrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVPQI2QIrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "MOVSSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "ORPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "ORPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PADDBrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PADDDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PADDQrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PADDWrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PANDNrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PANDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PORrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PSUBBrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PSUBDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PSUBQrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PSUBWrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "PXORrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPSYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDNPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPSYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VANDPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPDYrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPDrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPSYrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDPSrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZ128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZ256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDZrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPDrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVAPSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Z256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA32Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQA64Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQAYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQArr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Z256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU16Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Z256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU32Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Z256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU64Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Z128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Z256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQU8Zrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQUYrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVDQUrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVPQI(2Q|Lo2PQ)IZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVPQI2QIrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVSSrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZ128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZ256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPDZrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZ128rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZ256rr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVUPSZrr(b?)(k?)(z?)(_REV?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VMOVZPQILo2PQIrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPSYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VORPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDBYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDBrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDQYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDQrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDWYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPADDWrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDNrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPANDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDDYrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDDrri")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPBLENDMWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPORrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBBYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBBrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBQYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBQrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBWYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPSUBWrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGQZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGQZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPTERNLOGQZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VPXORrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPDYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPSYrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup9], (instregex "VXORPSrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "XORPDrr")>; +def: InstRW<[SKXWriteResGroup9], (instregex "XORPSrr")>; + +def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup10], (instregex "ADD(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "ADD8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "AND8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CBW")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CLC")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMC")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CMP8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "CWDE")>; +def: InstRW<[SKXWriteResGroup10], (instregex "DEC(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "DEC8r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "INC(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "INC8r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "LAHF")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV8ri_alt")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOV8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOVSX(16|32|64)rr16")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOVSX(16|32|64)rr32")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOVSX(16|32|64)rr8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOVZX(16|32|64)rr16")>; +def: InstRW<[SKXWriteResGroup10], (instregex "MOVZX(16|32|64)rr8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "NEG(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "NEG8r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "NOOP")>; +def: InstRW<[SKXWriteResGroup10], (instregex "NOT(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "NOT8r")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "OR8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SAHF")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SIDT64m")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SLDT64m")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SMSW16m")>; +def: InstRW<[SKXWriteResGroup10], (instregex "STC")>; +def: InstRW<[SKXWriteResGroup10], (instregex "STRm")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SUB8rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "SYSCALL")>; +def: InstRW<[SKXWriteResGroup10], (instregex "TEST(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup10], (instregex "TEST8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "TEST8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "TEST8rr")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XCHG(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR(16|32|64)ri8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR(16|32|64)rr(_REV?)")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR8i8")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR8ri")>; +def: InstRW<[SKXWriteResGroup10], (instregex "XOR8rr(_REV?)")>; + +def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> { + let Latency = 1; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm")>; +def: InstRW<[SKXWriteResGroup11], (instregex "KMOVBmk")>; +def: InstRW<[SKXWriteResGroup11], (instregex "KMOVDmk")>; +def: InstRW<[SKXWriteResGroup11], (instregex "KMOVQmk")>; +def: InstRW<[SKXWriteResGroup11], (instregex "KMOVWmk")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MMX_MOVD64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MMX_MOVNTQmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MMX_MOVQ64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOV(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOV8mi")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOV8mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVAPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVAPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVDQAmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVDQUmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVHPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVHPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVLPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVLPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTDQmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTI_64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTImr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVNTPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVPDI2DImr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVPQI2QImr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVPQIto64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVSSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVUPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "MOVUPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "ST_FP32m")>; +def: InstRW<[SKXWriteResGroup11], (instregex "ST_FP64m")>; +def: InstRW<[SKXWriteResGroup11], (instregex "ST_FP80m")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF128mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF32x4Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF32x4Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF32x8Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF64x2Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF64x2Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTF64x4Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI128mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI32x4Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI32x4Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI32x8Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI64x2Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI64x2Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VEXTRACTI64x4Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPDYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPSYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPSZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVAPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA32Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA32Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA32Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA64Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA64Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQA64Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQAYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQAmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU16Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU16Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU16Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU32Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU32Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU32Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU64Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU64Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU64Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU8Z128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQU8Z256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQUYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVDQUmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVHPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVHPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVHPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVHPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVLPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVLPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVLPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVLPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTDQYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTDQZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTDQZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTDQZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTDQmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPDYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPSYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPSZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVNTPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVPDI2DIZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVPDI2DImr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVPQI(2QI|to64)Zmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVPQI2QImr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVPQIto64mr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVSDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVSDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVSSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVSSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPDYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPDmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPSYmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPSZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMOVUPSmr")>; +def: InstRW<[SKXWriteResGroup11], (instregex "VMPTRSTm")>; + +def SKXWriteResGroup12 : SchedWriteRes<[SKXPort0]> { + let Latency = 2; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup12], (instregex "COMISDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "COMISSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64grr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MMX_PMOVMSKBrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MOVMSKPDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MOVMSKPSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MOVPDI2DIrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "MOVPQIto64rr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "PMOVMSKBrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "UCOMISDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "UCOMISSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISDZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISSZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VCOMISSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVMSKPDYrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVMSKPDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVMSKPSYrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVMSKPSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVPDI2DIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVPDI2DIrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVPQIto64Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VMOVPQIto64rr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VPMOVMSKBYrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VPMOVMSKBrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPDYrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPSYrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VTESTPSrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISDZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISDrr")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISSZrb")>; +def: InstRW<[SKXWriteResGroup12], (instregex "VUCOMISSrr")>; + +def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup13], (instregex "MMX_MOVQ2DQrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "MMX_PINSRWirri")>; +def: InstRW<[SKXWriteResGroup13], (instregex "PINSRBrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "PINSRDrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "PINSRQrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "PINSRWrri")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRBrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRDrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRQrr")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup13], (instregex "VPINSRWrri")>; + +def SKXWriteResGroup14 : SchedWriteRes<[SKXPort05]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup14], (instregex "FDECSTP")>; +def: InstRW<[SKXWriteResGroup14], (instregex "MMX_MOVDQ2Qrr")>; + +def SKXWriteResGroup15 : SchedWriteRes<[SKXPort06]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup15], (instregex "CMOVA(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup15], (instregex "CMOVBE(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROL(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROL(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROL8r1")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROL8ri")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROR(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROR(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROR8r1")>; +def: InstRW<[SKXWriteResGroup15], (instregex "ROR8ri")>; +def: InstRW<[SKXWriteResGroup15], (instregex "SETAr")>; +def: InstRW<[SKXWriteResGroup15], (instregex "SETBEr")>; + +def SKXWriteResGroup16 : SchedWriteRes<[SKXPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup16], (instregex "BLENDVPDrr0")>; +def: InstRW<[SKXWriteResGroup16], (instregex "BLENDVPSrr0")>; +def: InstRW<[SKXWriteResGroup16], (instregex "PBLENDVBrr0")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VBLENDVPDYrr")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VBLENDVPDrr")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VBLENDVPSYrr")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VBLENDVPSrr")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VPBLENDVBYrr")>; +def: InstRW<[SKXWriteResGroup16], (instregex "VPBLENDVBrr")>; + +def SKXWriteResGroup17 : SchedWriteRes<[SKXPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup17], (instregex "LFENCE")>; +def: InstRW<[SKXWriteResGroup17], (instregex "WAIT")>; +def: InstRW<[SKXWriteResGroup17], (instregex "XGETBV")>; + +def SKXWriteResGroup18 : SchedWriteRes<[SKXPort0,SKXPort237]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup18], (instregex "MMX_MASKMOVQ64")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVDQU")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPDYmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPDmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPSYmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPSmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VPMASKMOVDYmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VPMASKMOVDmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VPMASKMOVQYmr")>; +def: InstRW<[SKXWriteResGroup18], (instregex "VPMASKMOVQmr")>; + +def SKXWriteResGroup19 : SchedWriteRes<[SKXPort5,SKXPort01]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup19], (instregex "PSLLDrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSLLQrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSLLWrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSRADrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSRAWrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSRLDrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSRLQrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "PSRLWrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLDrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLQrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSLLWrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRADZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRADrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRAQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRAWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRAWrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRLDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRLDrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRLQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRLQrr")>; +def: InstRW<[SKXWriteResGroup19], (instregex "VPSRLWrr")>; + +def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup20], (instregex "CLFLUSH")>; + +def SKXWriteResGroup21 : SchedWriteRes<[SKXPort237,SKXPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup21], (instregex "SFENCE")>; + +def SKXWriteResGroup22 : SchedWriteRes<[SKXPort06,SKXPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup22], (instregex "BEXTR32rr")>; +def: InstRW<[SKXWriteResGroup22], (instregex "BEXTR64rr")>; +def: InstRW<[SKXWriteResGroup22], (instregex "BSWAP(16|32|64)r")>; + +def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup23], (instregex "ADC8i8")>; +def: InstRW<[SKXWriteResGroup23], (instregex "ADC8ri")>; +def: InstRW<[SKXWriteResGroup23], (instregex "CWD")>; +def: InstRW<[SKXWriteResGroup23], (instregex "JRCXZ")>; +def: InstRW<[SKXWriteResGroup23], (instregex "SBB8i8")>; +def: InstRW<[SKXWriteResGroup23], (instregex "SBB8ri")>; + +def SKXWriteResGroup24 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup24], (instregex "EXTRACTPSmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "PEXTRBmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "PEXTRDmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "PEXTRQmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "PEXTRWmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "STMXCSR")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VEXTRACTPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VEXTRACTPSmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRBmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRDmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRQZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRQmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VPEXTRWmr")>; +def: InstRW<[SKXWriteResGroup24], (instregex "VSTMXCSR")>; + +def SKXWriteResGroup25 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup25], (instregex "FNSTCW16m")>; + +def SKXWriteResGroup26 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup26], (instregex "SETAEm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETBm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETEm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETGEm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETGm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETLEm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETLm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETNEm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETNOm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETNPm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETNSm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETOm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETPm")>; +def: InstRW<[SKXWriteResGroup26], (instregex "SETSm")>; + +def SKXWriteResGroup27 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>; + +def SKXWriteResGroup28 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>; +def: InstRW<[SKXWriteResGroup28], (instregex "PUSH64i8")>; +def: InstRW<[SKXWriteResGroup28], (instregex "STOSB")>; +def: InstRW<[SKXWriteResGroup28], (instregex "STOSL")>; +def: InstRW<[SKXWriteResGroup28], (instregex "STOSQ")>; +def: InstRW<[SKXWriteResGroup28], (instregex "STOSW")>; + +def SKXWriteResGroup29 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort15]> { + let Latency = 2; + let NumMicroOps = 5; + let ResourceCycles = [2,2,1]; +} +def: InstRW<[SKXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup30], (instregex "KADDBrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KADDDrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KADDQrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KADDWrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KMOVBrk")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KMOVDrk")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KMOVQrk")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KMOVWrk")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KORTESTBrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KORTESTDrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KORTESTQrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KORTESTWrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KTESTBrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KTESTDrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KTESTQrr")>; +def: InstRW<[SKXWriteResGroup30], (instregex "KTESTWrr")>; + +def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup31], (instregex "BSF(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "BSR(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "IMUL64rr(i8?)")>; +def: InstRW<[SKXWriteResGroup31], (instregex "IMUL8r")>; +def: InstRW<[SKXWriteResGroup31], (instregex "LZCNT(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "MUL8r")>; +def: InstRW<[SKXWriteResGroup31], (instregex "PDEP32rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "PDEP64rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "PEXT32rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "PEXT64rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "POPCNT(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup31], (instregex "SHLD(16|32|64)rri8")>; +def: InstRW<[SKXWriteResGroup31], (instregex "SHRD(16|32|64)rri8")>; +def: InstRW<[SKXWriteResGroup31], (instregex "TZCNT(16|32|64)rr")>; + +def SKXWriteResGroup31_16 : SchedWriteRes<[SKXPort1, SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup31_16], (instregex "IMUL16rr(i8?)")>; + +def SKXWriteResGroup31_32 : SchedWriteRes<[SKXPort1]> { + let Latency = 3; + let NumMicroOps = 1; +} +def: InstRW<[SKXWriteResGroup31_32], (instregex "IMUL32rr(i8?)")>; + +def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup32], (instregex "ADD_FPrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "ADD_FST0r")>; +def: InstRW<[SKXWriteResGroup32], (instregex "ADD_FrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTLBri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTLDri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTLQri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTLWri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTRBri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTRDri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTRQri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KSHIFTRWri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KUNPCKBWrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KUNPCKDQrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "KUNPCKWDrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "MMX_PSADBWirr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "PCMPGTQrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "PSADBWrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUBR_FPrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUBR_FST0r")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUBR_FrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUB_FPrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUB_FST0r")>; +def: InstRW<[SKXWriteResGroup32], (instregex "SUB_FrST0")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNQZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNQZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VALIGNQZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTF32X2Z256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTF32X2Zr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTI32X2Z256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTI32X2Zr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSSYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VBROADCASTSSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VCMPSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VDBPSADBWZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VDBPSADBWZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VDBPSADBWZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF32x4Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF32x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF32x8Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF64x2Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF64x2Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTF64x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI32x4Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI32x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI32x8Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI64x2Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI64x2Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VEXTRACTI64x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSSDrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VFPCLASSSSrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF32x4Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF32x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF32x8Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF64x2Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF64x2Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTF64x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI32x4Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI32x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI32x8Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI64x2Z256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI64x2Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VINSERTI64x4Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTBYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTBZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTBZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTBZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTBrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDrZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDrZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTDrZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQrZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQrZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTQrZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTWYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTWZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTWZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTWZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPBROADCASTWrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPBZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPBZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPBZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPEQWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTQrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPGTWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPQZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPQZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPQZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUBZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUBZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUBZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUQZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUQZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUQZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUWZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUWZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPUWZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPWZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPWZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPCMPWZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERM2F128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERM2I128rr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2D128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2D256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2Drr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PD128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PD256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PDrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PS128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PS256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2PSrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2Q128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2Q256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMI2Qrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPDYri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPDZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPDZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPSYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMQYri")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMQZ256r(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMQZri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2D128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2D256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2Drr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PD128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PD256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PDrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PS128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PS256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2PSrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2Q128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2Q256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPERMT2Qrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXSQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXSQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXSQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXUQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXUQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMAXUQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINSQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINSQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINSQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINUQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINUQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMINUQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVQDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVQDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVQDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBWYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXBWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXDQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVSXWQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBWYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXBWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXDQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWDYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWQYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPMOVZXWQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWYrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPSADBWrr")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTMWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VPTESTNMWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFF32X4Z256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFF32X4Zrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFF64X2Z256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFF64X2Zrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFI32X4Z256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFI32X4Zrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFI64X2Z256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup32], (instregex "VSHUFI64X2Zrri(b?)(k?)(z?)")>; + +def SKXWriteResGroup33 : SchedWriteRes<[SKXPort0,SKXPort5]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup33], (instregex "EXTRACTPSrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "MMX_PEXTRWirri")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PEXTRBrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PEXTRDrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PEXTRQrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PEXTRWri")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PEXTRWrr_REV")>; +def: InstRW<[SKXWriteResGroup33], (instregex "PTESTrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VEXTRACTPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VEXTRACTPSrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRBrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRDrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRQrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRWZrr(_REV?)")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRWri")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPEXTRWrr_REV")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPTESTYrr")>; +def: InstRW<[SKXWriteResGroup33], (instregex "VPTESTrr")>; + +def SKXWriteResGroup34 : SchedWriteRes<[SKXPort0,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup34], (instregex "FNSTSW16r")>; + +def SKXWriteResGroup35 : SchedWriteRes<[SKXPort06]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKXWriteResGroup35], (instregex "ROL(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "ROL8rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "ROR(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "ROR8rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SAR(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SAR8rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SHL(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SHL8rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SHR(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup35], (instregex "SHR8rCL")>; + +def SKXWriteResGroup36 : SchedWriteRes<[SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKXWriteResGroup36], (instregex "XADD(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup36], (instregex "XADD8rr")>; +def: InstRW<[SKXWriteResGroup36], (instregex "XCHG8rr")>; + +def SKXWriteResGroup37 : SchedWriteRes<[SKXPort0,SKXPort5]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup37], (instregex "MMX_PHADDSWrr64")>; +def: InstRW<[SKXWriteResGroup37], (instregex "MMX_PHSUBSWrr64")>; + +def SKXWriteResGroup38 : SchedWriteRes<[SKXPort5,SKXPort01]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup38], (instregex "PHADDSWrr128")>; +def: InstRW<[SKXWriteResGroup38], (instregex "PHSUBSWrr128")>; +def: InstRW<[SKXWriteResGroup38], (instregex "VPHADDSWrr128")>; +def: InstRW<[SKXWriteResGroup38], (instregex "VPHADDSWrr256")>; +def: InstRW<[SKXWriteResGroup38], (instregex "VPHSUBSWrr128")>; +def: InstRW<[SKXWriteResGroup38], (instregex "VPHSUBSWrr256")>; + +def SKXWriteResGroup39 : SchedWriteRes<[SKXPort5,SKXPort05]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PHADDWrr64")>; +def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PHADDrr64")>; +def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PHSUBDrr64")>; +def: InstRW<[SKXWriteResGroup39], (instregex "MMX_PHSUBWrr64")>; + +def SKXWriteResGroup40 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup40], (instregex "PHADDDrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "PHADDWrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "PHSUBDrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "PHSUBWrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHADDDYrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHADDDrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHADDWYrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHADDWrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHSUBDYrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHSUBDrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHSUBWYrr")>; +def: InstRW<[SKXWriteResGroup40], (instregex "VPHSUBWrr")>; + +def SKXWriteResGroup41 : SchedWriteRes<[SKXPort5,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup41], (instregex "MMX_PACKSSDWirr")>; +def: InstRW<[SKXWriteResGroup41], (instregex "MMX_PACKSSWBirr")>; +def: InstRW<[SKXWriteResGroup41], (instregex "MMX_PACKUSWBirr")>; + +def SKXWriteResGroup42 : SchedWriteRes<[SKXPort6,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup42], (instregex "CLD")>; + +def SKXWriteResGroup43 : SchedWriteRes<[SKXPort237,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup43], (instregex "MFENCE")>; + +def SKXWriteResGroup44 : SchedWriteRes<[SKXPort06,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup44], (instregex "RCL(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCL(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCL8r1")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCL8ri")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCR(16|32|64)r1")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCR(16|32|64)ri")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCR8r1")>; +def: InstRW<[SKXWriteResGroup44], (instregex "RCR8ri")>; + +def SKXWriteResGroup45 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup45], (instregex "FNSTSWm")>; + +def SKXWriteResGroup46 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SKXWriteResGroup46], (instregex "SETAm")>; +def: InstRW<[SKXWriteResGroup46], (instregex "SETBEm")>; + +def SKXWriteResGroup47 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort237,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup47], (instregex "CALL(16|32|64)r")>; + +def SKXWriteResGroup48 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 3; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup48], (instregex "CALL64pcrel32")>; + +def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> { + let Latency = 4; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup49], (instregex "AESDECLASTrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "AESDECrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "AESENCLASTrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "AESENCrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMADDUBSWrr64")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMADDWDirr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMULHRSWrr64")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMULHUWirr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMULHWirr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMULLWirr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MMX_PMULUDQirr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MUL_FPrST0")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MUL_FST0r")>; +def: InstRW<[SKXWriteResGroup49], (instregex "MUL_FrST0")>; +def: InstRW<[SKXWriteResGroup49], (instregex "RCPPSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "RCPSSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "RSQRTPSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "RSQRTSSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VAESDECLASTrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VAESDECrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VAESENCLASTrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VAESENCrr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14SDrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCP14SSrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCPPSYr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCPPSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRCPSSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14SDrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRT14SSrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRTPSYr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRTPSr")>; +def: InstRW<[SKXWriteResGroup49], (instregex "VRSQRTSSr")>; + +def SKXWriteResGroup50 : SchedWriteRes<[SKXPort015]> { + let Latency = 4; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup50], (instregex "ADDPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "ADDPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "ADDSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "ADDSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "ADDSUBPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "ADDSUBPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CMPPDrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CMPPSrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CMPSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CVTDQ2PSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CVTPS2DQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "CVTTPS2DQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MAXPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MAXPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MAXSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MAXSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MINPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MINPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MINSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MINSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MULPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MULPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MULSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "MULSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PHMINPOSUWrr128")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMADDUBSWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMADDWDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULDQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULHRSWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULHUWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULHWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULLWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "PMULUDQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "SUBPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "SUBPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "SUBSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "SUBSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSUBPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSUBPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSUBPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VADDSUBPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPPDYrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPPDrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPPSYrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPPSrri")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCMPSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2QQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2UQQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2UQQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPD2UQQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2DQYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2DQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2DQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2DQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2DQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2UDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2UDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTPS2UDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTQQ2PDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTQQ2PDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTQQ2PDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2QQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2QQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2QQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2UQQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2UQQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPD2UQQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2DQYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2DQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2DQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2DQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2DQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2UDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2UDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTTPS2UDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUDQ2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUDQ2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUDQ2PSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUQQ2PDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUQQ2PDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTUQQ2PDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMSDrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFIXUPIMMSSrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD132SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD213SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADD231SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMADDSUB231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB132SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB213SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUB231SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFMSUBADD231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD132SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD213SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMADD231SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB132SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB213SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSYr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231PSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SDr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VFNMSUB231SSr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPDr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPPSr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPSDr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETEXPSSr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTSDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VGETMANTSSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMAXSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMINSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VMULSSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPHMINPOSUWrr128")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPLZCNTQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDUBSWYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDUBSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDUBSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDUBSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDUBSWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDWDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDWDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDWDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDWDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMADDWDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULDQYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULDQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHRSWYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHRSWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHRSWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHRSWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHRSWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHUWYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHUWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHUWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHUWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHUWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULHWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULLWrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VPMULUDQrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGEPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGESDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VRANGESSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCEPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCESDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VREDUCESSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFSDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSCALEFSSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPDYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSYrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBPSrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSDrr")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup50], (instregex "VSUBSSrr")>; + +def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup51], (instregex "MPSADBWrri")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VMPSADBWYrri")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VMPSADBWrri")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPEXPANDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVQWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSQWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSWBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSWBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVSWBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSDWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSQWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSWBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSWBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVUSWBZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVWBZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVWBZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup51], (instregex "VPMOVWBZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup52 : SchedWriteRes<[SKXPort1,SKXPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup52], (instregex "IMUL(32|64)r")>; +def: InstRW<[SKXWriteResGroup52], (instregex "MUL(32|64)r")>; +def: InstRW<[SKXWriteResGroup52], (instregex "MULX64rr")>; + +def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { + let Latency = 4; + let NumMicroOps = 4; +} +def: InstRW<[SKXWriteResGroup52_16], (instregex "IMUL16r")>; +def: InstRW<[SKXWriteResGroup52_16], (instregex "MUL16r")>; + +def SKXWriteResGroup53 : SchedWriteRes<[SKXPort5,SKXPort01]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLDYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLQYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLWYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSLLWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRADYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRADZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRADZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRAQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRAQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRAWYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRAWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRAWZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLDYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLQYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLWYrr")>; +def: InstRW<[SKXWriteResGroup53], (instregex "VPSRLWZ256rr(b?)(k?)(z?)")>; + +def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup54], (instregex "ISTT_FP16m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "ISTT_FP32m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "ISTT_FP64m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "IST_F16m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "IST_F32m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "IST_FP16m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "IST_FP32m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "IST_FP64m")>; +def: InstRW<[SKXWriteResGroup54], (instregex "VPMOVQDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup54], (instregex "VPMOVQDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup54], (instregex "VPMOVQDZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup55 : SchedWriteRes<[SKXPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [4]; +} +def: InstRW<[SKXWriteResGroup55], (instregex "FNCLEX")>; + +def SKXWriteResGroup56 : SchedWriteRes<[SKXPort015,SKXPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup56], (instregex "VZEROUPPER")>; + +def SKXWriteResGroup57 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort0156]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SKXWriteResGroup57], (instregex "LAR(16|32|64)rr")>; + +def SKXWriteResGroup58 : SchedWriteRes<[SKXPort23]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup58], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MMX_MOVD64rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MMX_MOVD64to64rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MMX_MOVQ64rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOV(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOV64toPQIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOV8rm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVDDUPrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVDI2PDIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVSSrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm16")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm32")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm8")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVZX(16|32|64)rm16")>; +def: InstRW<[SKXWriteResGroup58], (instregex "MOVZX(16|32|64)rm8")>; +def: InstRW<[SKXWriteResGroup58], (instregex "PREFETCHNTA")>; +def: InstRW<[SKXWriteResGroup58], (instregex "PREFETCHT0")>; +def: InstRW<[SKXWriteResGroup58], (instregex "PREFETCHT1")>; +def: InstRW<[SKXWriteResGroup58], (instregex "PREFETCHT2")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOV64toPQIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOVDDUPrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOVSDrm")>; +def: InstRW<[SKXWriteResGroup58], (instregex "VMOVSSrm")>; + +def SKXWriteResGroup59 : SchedWriteRes<[SKXPort015]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup59], (instregex "VCVTSD2SSZrr_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup60 : SchedWriteRes<[SKXPort0,SKXPort5]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup60], (instregex "CVTDQ2PDrr")>; +def: InstRW<[SKXWriteResGroup60], (instregex "MMX_CVTPI2PDirr")>; +def: InstRW<[SKXWriteResGroup60], (instregex "VCVTDQ2PDrr")>; + +def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup61], (instregex "CVTPD2DQrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTPD2PSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTPS2PDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSD2SSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSI2SD64rr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSI2SDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSI2SSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTSS2SDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "CVTTPD2DQrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVTPD2PIirr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVTPS2PIirr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVTTPD2PIirr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVTTPS2PIirr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTDQ2PDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPD2DQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPD2DQrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPD2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPD2PSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPD2UDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPH2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPH2PSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2PDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2PDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2PHZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2PHrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2QQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTPS2UQQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTQQ2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSD2SSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI2SSrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSI642SDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSS2SDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTSS2SDrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPD2DQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPD2DQrr")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPD2UDQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPS2QQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTTPS2UQQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTUDQ2PDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTUQQ2PSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTUSI2SDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTUSI2SSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup61], (instregex "VCVTUSI642SDZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup62 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup62], (instregex "VPCONFLICTQZ128rr(b?)(k?)(z?)")>; + +def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>; + +def SKXWriteResGroup64 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup64], (instregex "MULX32rr")>; + +def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup65], (instregex "VCVTPS2PHZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup66 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVDWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVQWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSDWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSQWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSWBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSWBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVSWBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSDWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQWZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQWZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSQWZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSWBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSWBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVUSWBZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVWBZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVWBZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup66], (instregex "VPMOVWBZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup67 : SchedWriteRes<[SKXPort06,SKXPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,4]; +} +def: InstRW<[SKXWriteResGroup67], (instregex "XSETBV")>; + +def SKXWriteResGroup68 : SchedWriteRes<[SKXPort06,SKXPort0156]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [2,3]; +} +def: InstRW<[SKXWriteResGroup68], (instregex "CMPXCHG(16|32|64)rr")>; +def: InstRW<[SKXWriteResGroup68], (instregex "CMPXCHG8rr")>; + +def SKXWriteResGroup69 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort0156]> { + let Latency = 5; + let NumMicroOps = 6; + let ResourceCycles = [1,1,4]; +} +def: InstRW<[SKXWriteResGroup69], (instregex "PUSHF16")>; +def: InstRW<[SKXWriteResGroup69], (instregex "PUSHF64")>; + +def SKXWriteResGroup70 : SchedWriteRes<[SKXPort5]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup70], (instregex "PCLMULQDQrr")>; +def: InstRW<[SKXWriteResGroup70], (instregex "VPCLMULQDQrr")>; + +def SKXWriteResGroup71 : SchedWriteRes<[SKXPort23]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup71], (instregex "LDDQUrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVAPDrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVAPSrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVDQArm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVDQUrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVNTDQArm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVSHDUPrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVSLDUPrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVUPDrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "MOVUPSrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VBROADCASTSSrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VLDDQUrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVAPDrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVAPSrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVDQArm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVDQUrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVNTDQArm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVSHDUPrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVSLDUPrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVUPDrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VMOVUPSrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VPBROADCASTDrm")>; +def: InstRW<[SKXWriteResGroup71], (instregex "VPBROADCASTQrm")>; + +def SKXWriteResGroup72 : SchedWriteRes<[SKXPort0]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup72], (instregex "MMX_CVTPI2PSirr")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VCOMPRESSPSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPCOMPRESSQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPERMWZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPERMWZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup72], (instregex "VPERMWZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup73 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDSBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDSWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDUSBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PADDUSWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PAVGBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PAVGWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPEQBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPEQDirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPEQWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPGTBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPGTDirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PCMPGTWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PMAXSWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PMAXUBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PMINSWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PMINUBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSLLDrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSLLQrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSLLWrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSRADrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSRAWrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSRLDrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSRLQrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSRLWrm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSUBSBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSUBSWirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSUBUSBirm")>; +def: InstRW<[SKXWriteResGroup73], (instregex "MMX_PSUBUSWirm")>; + +def SKXWriteResGroup74 : SchedWriteRes<[SKXPort0,SKXPort015]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup74], (instregex "CVTSD2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "CVTSD2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "CVTSS2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "CVTSS2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "CVTTSD2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "CVTTSD2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2SI64Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2SIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2USI64Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSD2USIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SI64Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTSS2USIZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SI64Zrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SIZrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2SIrr")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2USI64Zrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSD2USIZrb")>; +def: InstRW<[SKXWriteResGroup74], (instregex "VCVTTSS2USIZrb")>; + +def SKXWriteResGroup75 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PINSRWirmi")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PSHUFWmi")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKHBWirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKHDQirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKHWDirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKLBWirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKLDQirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MMX_PUNPCKLWDirm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MOVHPDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MOVHPSrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MOVLPDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "MOVLPSrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PINSRBrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PINSRDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PINSRQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PINSRWrmi")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXBDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXBQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXBWrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXDQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXWDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVSXWQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXBDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXBQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXBWrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXDQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXWDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "PMOVZXWQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVHPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVHPDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVHPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVHPSrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVLPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVLPDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVLPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VMOVLPSrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRBrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPINSRWrmi")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXBDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXBQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXBWrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXDQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXWDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVSXWQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXBDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXBQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXBWrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXDQrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXWDrm")>; +def: InstRW<[SKXWriteResGroup75], (instregex "VPMOVZXWQrm")>; + +def SKXWriteResGroup76 : SchedWriteRes<[SKXPort6,SKXPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup76], (instregex "FARJMP64")>; +def: InstRW<[SKXWriteResGroup76], (instregex "JMP(16|32|64)m")>; + +def SKXWriteResGroup77 : SchedWriteRes<[SKXPort23,SKXPort05]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PABSBrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PABSDrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PABSWrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PADDBirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PADDDirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PADDQirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PADDWirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PANDNirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PANDirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PORirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSIGNBrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSIGNDrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSIGNWrm64")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSUBBirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSUBDirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSUBQirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PSUBWirm")>; +def: InstRW<[SKXWriteResGroup77], (instregex "MMX_PXORirm")>; + +def SKXWriteResGroup78 : SchedWriteRes<[SKXPort23,SKXPort06]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup78], (instregex "ADC(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADC8rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADCX32rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADCX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADOX32rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "ADOX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "BT(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVAE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVB(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVG(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVGE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVL(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVLE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNE(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNO(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNP(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVNS(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVO(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVP(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "CMOVS(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "RORX32mi")>; +def: InstRW<[SKXWriteResGroup78], (instregex "RORX64mi")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SARX32rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SARX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SBB(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SBB8rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SHLX32rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SHLX64rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SHRX32rm")>; +def: InstRW<[SKXWriteResGroup78], (instregex "SHRX64rm")>; + +def SKXWriteResGroup79 : SchedWriteRes<[SKXPort23,SKXPort15]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup79], (instregex "ANDN32rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "ANDN64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSI32rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSI64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSMSK32rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSMSK64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSR32rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BLSR64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BZHI32rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "BZHI64rm")>; +def: InstRW<[SKXWriteResGroup79], (instregex "MOVBE(16|32|64)rm")>; + +def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup80], (instregex "VMOVDI2PDIZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup81 : SchedWriteRes<[SKXPort23,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup81], (instregex "ADD(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "ADD8rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "AND(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "AND8rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP8mi")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP8mr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "CMP8rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "OR(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "OR8rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup81], (instregex "POP(16|32|64)rmr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "SUB(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "SUB8rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "TEST(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "TEST8mi")>; +def: InstRW<[SKXWriteResGroup81], (instregex "TEST8mr")>; +def: InstRW<[SKXWriteResGroup81], (instregex "XOR(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup81], (instregex "XOR8rm")>; + +def SKXWriteResGroup82 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup82], (instregex "CVTSI2SS64rr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "HADDPDrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "HADDPSrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "HSUBPDrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "HSUBPSrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VCVTSI642SSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VCVTUSI642SSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHADDPDYrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHADDPDrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHADDPSYrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHADDPSrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHSUBPDYrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHSUBPDrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHSUBPSYrr")>; +def: InstRW<[SKXWriteResGroup82], (instregex "VHSUBPSrr")>; + +def SKXWriteResGroup83 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SKXWriteResGroup83], (instregex "SHLD(16|32|64)rrCL")>; +def: InstRW<[SKXWriteResGroup83], (instregex "SHRD(16|32|64)rrCL")>; + +def SKXWriteResGroup84 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup84], (instregex "SLDT(16|32|64)r")>; + +def SKXWriteResGroup85 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup85], (instregex "VCVTPS2PHmr")>; + +def SKXWriteResGroup86 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup86], (instregex "BTC(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup86], (instregex "BTR(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup86], (instregex "BTS(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SAR(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SAR(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SAR8m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SAR8mi")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHL(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHL(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHL8m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHL8mi")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHR(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHR(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHR8m1")>; +def: InstRW<[SKXWriteResGroup86], (instregex "SHR8mi")>; + +def SKXWriteResGroup87 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup87], (instregex "ADD(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "ADD(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "ADD8mi")>; +def: InstRW<[SKXWriteResGroup87], (instregex "ADD8mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "AND(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "AND(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "AND8mi")>; +def: InstRW<[SKXWriteResGroup87], (instregex "AND8mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "DEC(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "DEC8m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "INC(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "INC8m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "NEG(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "NEG8m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "NOT(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "NOT8m")>; +def: InstRW<[SKXWriteResGroup87], (instregex "OR(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "OR(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "OR8mi")>; +def: InstRW<[SKXWriteResGroup87], (instregex "OR8mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "POP(16|32|64)rmm")>; +def: InstRW<[SKXWriteResGroup87], (instregex "PUSH(16|32|64)rmm")>; +def: InstRW<[SKXWriteResGroup87], (instregex "SUB(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "SUB(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "SUB8mi")>; +def: InstRW<[SKXWriteResGroup87], (instregex "SUB8mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "XOR(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup87], (instregex "XOR(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup87], (instregex "XOR8mi")>; +def: InstRW<[SKXWriteResGroup87], (instregex "XOR8mr")>; + +def SKXWriteResGroup88 : SchedWriteRes<[SKXPort6,SKXPort0156]> { + let Latency = 6; + let NumMicroOps = 6; + let ResourceCycles = [1,5]; +} +def: InstRW<[SKXWriteResGroup88], (instregex "STD")>; + +def SKXWriteResGroup89 : SchedWriteRes<[SKXPort23]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup89], (instregex "LD_F32m")>; +def: InstRW<[SKXWriteResGroup89], (instregex "LD_F64m")>; +def: InstRW<[SKXWriteResGroup89], (instregex "LD_F80m")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VBROADCASTF128")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VBROADCASTI128")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VBROADCASTSSYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VLDDQUYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVAPDYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVAPSYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVDDUPYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVDQAYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVDQUYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVNTDQAYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVNTDQAZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVUPDYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VMOVUPSYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VPBROADCASTDYrm")>; +def: InstRW<[SKXWriteResGroup89], (instregex "VPBROADCASTQYrm")>; + +def SKXWriteResGroup90 : SchedWriteRes<[SKXPort0,SKXPort5]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup90], (instregex "VCVTDQ2PDYrr")>; + +def SKXWriteResGroup91 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup91], (instregex "COMISDrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "COMISSrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "UCOMISDrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "UCOMISSrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VCOMISDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VCOMISDrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VCOMISSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VCOMISSrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VUCOMISDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VUCOMISDrm")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VUCOMISSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup91], (instregex "VUCOMISSrm")>; + +def SKXWriteResGroup92 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup92], (instregex "INSERTPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PACKSSDWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PACKSSWBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PACKUSDWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PACKUSWBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PALIGNRrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PBLENDWrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PSHUFBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PSHUFDmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PSHUFHWmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PSHUFLWmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKHBWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKHDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKHQDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKHWDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKLBWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKLDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKLQDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "PUNPCKLWDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "SHUFPDrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "SHUFPSrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "UNPCKHPDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "UNPCKHPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "UNPCKLPDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "UNPCKLPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VINSERTPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VMOVSSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKSSDWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKSSDWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKSSWBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKSSWBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKUSDWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKUSDWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKUSWBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPACKUSWBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPALIGNRZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPALIGNRrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPBLENDWrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPBROADCASTBZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPBROADCASTBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPBROADCASTWZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPBROADCASTWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPDmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPSZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPSmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPERMILPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFBrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFDmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFHWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFHWmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFLWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSHUFLWmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSLLDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPSRLDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHBWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHQDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHWDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKHWDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLQDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLQDQrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLWDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VPUNPCKLWDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VSHUFPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VSHUFPDrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VSHUFPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VSHUFPSrmi")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKHPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKHPDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKHPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKHPSrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKLPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKLPDrm")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKLPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup92], (instregex "VUNPCKLPSrm")>; + +def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2DQYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2DQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2DQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2PSYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2PSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2UDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPD2UDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPH2PSYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPH2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPH2PSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PHYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PHZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2PHZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2QQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2QQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2UQQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTPS2UQQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTQQ2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTQQ2PSZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPD2DQYrr")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPD2DQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPD2DQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPD2UDQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPD2UDQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPS2QQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPS2QQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPS2UQQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTTPS2UQQZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTUDQ2PDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTUDQ2PDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTUQQ2PSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTUQQ2PSZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup94 : SchedWriteRes<[SKXPort01,SKXPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup94], (instregex "PABSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PABSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PABSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PADDSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PADDSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PADDUSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PADDUSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PAVGBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PAVGWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPEQBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPEQDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPEQQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPEQWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPGTBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPGTDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PCMPGTWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXUBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXUDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMAXUWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINUBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINUDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PMINUWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSIGNBrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSIGNDrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSIGNWrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSLLDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSLLQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSLLWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSRADrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSRAWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSRLDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSRLQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSRLWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSUBSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSUBSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSUBUSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "PSUBUSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPABSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDUSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDUSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDUSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPADDUSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPAVGBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPAVGBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPAVGWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPAVGWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPEQBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPEQDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPEQQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPEQWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPGTBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPGTDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPCMPGTWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMAXUWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPMINUWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPROLDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPROLQZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPROLVDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPROLVQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPRORDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPRORQZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPRORVDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPRORVQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSIGNBrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSIGNDrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSIGNWrm128")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLQZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLVDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLVDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLVQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLVQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLVWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSLLWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRADZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRADZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRADrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAQZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAVDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAVDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAVQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAVWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRAWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLDZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLQZ128m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLVDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLVDrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLVQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLVQrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLVWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSRLWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBSWrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBUSBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBUSBrm")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBUSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup94], (instregex "VPSUBUSWrm")>; + +def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup95], (instregex "ANDNPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "ANDNPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "ANDPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "ANDPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "BLENDPDrmi")>; +def: InstRW<[SKXWriteResGroup95], (instregex "BLENDPSrmi")>; +def: InstRW<[SKXWriteResGroup95], (instregex "ORPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "ORPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PADDBrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PADDDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PADDQrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PADDWrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PANDNrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PANDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PORrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PSUBBrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PSUBDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PSUBQrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PSUBWrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "PXORrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDNPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDNPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDNPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDNPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VANDPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDPDrmi")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDPSrmi")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBROADCASTI32X2Z128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VBROADCASTSSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VINSERTF128rm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VINSERTI128rm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMASKMOVPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMASKMOVPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVAPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVAPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDDUPZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQA32Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQA64Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQU16Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQU32Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQU64Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVDQU8Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVNTDQAZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVSHDUPZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVSLDUPZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVUPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VMOVUPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VORPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VORPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VORPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VORPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDBrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDQrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPADDWrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDNDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDNQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDNrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPANDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBLENDDrmi")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBLENDMBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBLENDMDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBLENDMQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBLENDMWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBROADCASTDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPBROADCASTQZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPMASKMOVDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPMASKMOVQrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPORDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPORQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPORrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBBrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBQrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPSUBWrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPTERNLOGDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPTERNLOGQZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPXORDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPXORQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VPXORrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VXORPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VXORPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VXORPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup95], (instregex "VXORPSrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "XORPDrm")>; +def: InstRW<[SKXWriteResGroup95], (instregex "XORPSrm")>; + +def SKXWriteResGroup96 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup96], (instregex "MMX_PACKSSDWirm")>; +def: InstRW<[SKXWriteResGroup96], (instregex "MMX_PACKSSWBirm")>; +def: InstRW<[SKXWriteResGroup96], (instregex "MMX_PACKUSWBirm")>; + +def SKXWriteResGroup97 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2W128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2W256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMI2Wrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMT2W128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMT2W256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup97], (instregex "VPERMT2Wrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup98 : SchedWriteRes<[SKXPort23,SKXPort06]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup98], (instregex "CMOVA(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup98], (instregex "CMOVBE(16|32|64)rm")>; + +def SKXWriteResGroup99 : SchedWriteRes<[SKXPort23,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup99], (instregex "LEAVE64")>; +def: InstRW<[SKXWriteResGroup99], (instregex "SCASB")>; +def: InstRW<[SKXWriteResGroup99], (instregex "SCASL")>; +def: InstRW<[SKXWriteResGroup99], (instregex "SCASQ")>; +def: InstRW<[SKXWriteResGroup99], (instregex "SCASW")>; + +def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup100], (instregex "CVTTSS2SI64rr")>; +def: InstRW<[SKXWriteResGroup100], (instregex "CVTTSS2SIrr")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTSS2USI64Zrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SI64Zrb")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SI64rr")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SIZrb")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2SIrr")>; +def: InstRW<[SKXWriteResGroup100], (instregex "VCVTTSS2USI64Zrb")>; + +def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup101], (instregex "FLDCW16m")>; + +def SKXWriteResGroup102 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup102], (instregex "LDMXCSR")>; +def: InstRW<[SKXWriteResGroup102], (instregex "VLDMXCSR")>; + +def SKXWriteResGroup103 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup103], (instregex "KMOVBkm")>; +def: InstRW<[SKXWriteResGroup103], (instregex "KMOVDkm")>; +def: InstRW<[SKXWriteResGroup103], (instregex "KMOVQkm")>; +def: InstRW<[SKXWriteResGroup103], (instregex "KMOVWkm")>; + +def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup104], (instregex "LRETQ")>; +def: InstRW<[SKXWriteResGroup104], (instregex "RETQ")>; + +def SKXWriteResGroup105 : SchedWriteRes<[SKXPort23,SKXPort06,SKXPort15]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup105], (instregex "BEXTR32rm")>; +def: InstRW<[SKXWriteResGroup105], (instregex "BEXTR64rm")>; + +def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPSZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VCOMPRESSPSZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSQZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSQZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup106], (instregex "VPCOMPRESSQZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup107 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKXWriteResGroup107], (instregex "ROL(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROL(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROL8m1")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROL8mi")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROR(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROR(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROR8m1")>; +def: InstRW<[SKXWriteResGroup107], (instregex "ROR8mi")>; + +def SKXWriteResGroup108 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKXWriteResGroup108], (instregex "XADD(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup108], (instregex "XADD8rm")>; + +def SKXWriteResGroup109 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup109], (instregex "CALL(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup109], (instregex "FARCALL64")>; + +def SKXWriteResGroup110 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 7; + let ResourceCycles = [1,2,2,2]; +} +def: InstRW<[SKXWriteResGroup110], (instregex "VPSCATTERDQZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup110], (instregex "VPSCATTERQQZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup110], (instregex "VSCATTERDPDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup110], (instregex "VSCATTERQPDZ128mr(b?)(k?)(z?)")>; + +def SKXWriteResGroup111 : SchedWriteRes<[SKXPort6,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 7; + let ResourceCycles = [1,3,1,2]; +} +def: InstRW<[SKXWriteResGroup111], (instregex "LOOP")>; + +def SKXWriteResGroup112 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 11; + let ResourceCycles = [1,4,4,2]; +} +def: InstRW<[SKXWriteResGroup112], (instregex "VPSCATTERDQZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup112], (instregex "VPSCATTERQQZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup112], (instregex "VSCATTERDPDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup112], (instregex "VSCATTERQPDZ256mr(b?)(k?)(z?)")>; + +def SKXWriteResGroup113 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 19; + let ResourceCycles = [1,8,8,2]; +} +def: InstRW<[SKXWriteResGroup113], (instregex "VPSCATTERDQZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup113], (instregex "VPSCATTERQQZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup113], (instregex "VSCATTERDPDZmr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup113], (instregex "VSCATTERQPDZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup114 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { + let Latency = 7; + let NumMicroOps = 36; + let ResourceCycles = [1,16,1,16,2]; +} +def: InstRW<[SKXWriteResGroup114], (instregex "VSCATTERDPSZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup115 : SchedWriteRes<[SKXPort0]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup115], (instregex "AESIMCrr")>; +def: InstRW<[SKXWriteResGroup115], (instregex "VAESIMCrr")>; + +def SKXWriteResGroup116 : SchedWriteRes<[SKXPort015]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SKXWriteResGroup116], (instregex "PMULLDrr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDPDr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDPSr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDSDr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "ROUNDSSr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDYrr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VPMULLDrr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPDZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPSZ128rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPSZ256rri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALEPSZrri(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALESDr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VRNDSCALESSr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDPDr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDPSr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDSDr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDSSr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDYPDr")>; +def: InstRW<[SKXWriteResGroup116], (instregex "VROUNDYPSr")>; + +def SKXWriteResGroup117 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup117], (instregex "VTESTPDrm")>; +def: InstRW<[SKXWriteResGroup117], (instregex "VTESTPSrm")>; + +def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup118], (instregex "BSF(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "BSR(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "IMUL64m")>; +def: InstRW<[SKXWriteResGroup118], (instregex "IMUL(32|64)rm(i8?)")>; +def: InstRW<[SKXWriteResGroup118], (instregex "IMUL8m")>; +def: InstRW<[SKXWriteResGroup118], (instregex "LZCNT(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "MUL(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup118], (instregex "MUL8m")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PDEP32rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PDEP64rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PEXT32rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "PEXT64rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "POPCNT(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup118], (instregex "TZCNT(16|32|64)rm")>; + +def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup118_16_1], (instregex "IMUL16rm(i8?)")>; + +def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { + let Latency = 8; + let NumMicroOps = 5; +} +def: InstRW<[SKXWriteResGroup118_16_2], (instregex "IMUL16m")>; +def: InstRW<[SKXWriteResGroup118_16_2], (instregex "MUL16m")>; + +def SKXWriteResGroup118_32 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup118_32], (instregex "IMUL32m")>; +def: InstRW<[SKXWriteResGroup118_32], (instregex "MUL32m")>; + +def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup119], (instregex "FCOM32m")>; +def: InstRW<[SKXWriteResGroup119], (instregex "FCOM64m")>; +def: InstRW<[SKXWriteResGroup119], (instregex "FCOMP32m")>; +def: InstRW<[SKXWriteResGroup119], (instregex "FCOMP64m")>; +def: InstRW<[SKXWriteResGroup119], (instregex "MMX_PSADBWirm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VFPCLASSSDrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSDWYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSDWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSDWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSWBYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSWBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKSSWBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSDWYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSDWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSDWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSWBYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSWBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPACKUSWBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPALIGNRYrmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPALIGNRZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPALIGNRZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBLENDWYrmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTBYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTBZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTBZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTWYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTWZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPBROADCASTWZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDYmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSYmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPERMILPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPMOVSXBDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPMOVSXBQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPMOVSXWQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFBYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDYmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWYmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFHWZmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWYmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWZ128mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSHUFLWZmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSLLDQZ512rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPSRLDQZ512rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHBWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHDQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHQDQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHQDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHQDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHWDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHWDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKHWDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLBWYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLBWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLBWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLDQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLQDQYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLQDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLQDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLWDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLWDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VPUNPCKLWDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPDYrmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPSYrmi")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VSHUFPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPSYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKHPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPSYrm")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup119], (instregex "VUNPCKLPSZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup120 : SchedWriteRes<[SKXPort01,SKXPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPABSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPADDUSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPAVGWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPEQBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPEQDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPEQQYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPEQWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPGTBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPGTDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPCMPGTWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMAXUWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPMINUWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLVDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLVDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLVQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPROLVQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORVDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORVDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORVQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPRORVQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSIGNBYrm256")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSIGNDYrm256")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSIGNWYrm256")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLQYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVQYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLVWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLWZ256mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLWZmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSLLWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRADYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRADZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRADZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRADZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRADZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAVWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAWZ256mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAWZmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRAWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLQYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVDYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVQYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLVWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLWZ256mi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLWZmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSRLWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSBYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSWYrm")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup120], (instregex "VPSUBUSWZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup121 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPSYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDNPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPSYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VANDPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDPDYrmi")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDPSYrmi")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF32X2Z256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF32X2Zm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF32X4Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF32X4rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF32X8rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF64X2Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF64X2rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTF64X4rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI32X2Z256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI32X2Zm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI32X4Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI32X4rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI32X8rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI64X2Z128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI64X2rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTI64X4rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTSDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTSDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTSSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VBROADCASTSSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF32x4Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF32x4Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF32x8Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF64x2Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF64x2Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTF64x4Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI32x4Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI32x4Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI32x8Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI64x2Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI64x2Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VINSERTI64x4Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVAPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVAPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVAPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVAPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDDUPZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDDUPZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQA32Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQA32Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQA64Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQA64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU16Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU16Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU32Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU32Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU64Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU8Z256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVDQU8Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVNTDQAZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVSHDUPZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVSHDUPZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVSLDUPZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVSLDUPZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVUPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVUPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVUPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VMOVUPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPSYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VORPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDBYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDQYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDWYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPADDWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDNDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDNDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDNQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDNQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDNYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPANDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDDYrmi")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBLENDMWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBROADCASTDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBROADCASTDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBROADCASTQZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPBROADCASTQZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPMASKMOVDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPMASKMOVQYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPORDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPORDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPORQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPORQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPORYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBBYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBQYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBWYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPSUBWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPTERNLOGDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPTERNLOGDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPTERNLOGQZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPTERNLOGQZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPXORDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPXORDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPXORQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPXORQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VPXORYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPDYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPSYrm")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup121], (instregex "VXORPSZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup122 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup122], (instregex "BLENDVPDrm0")>; +def: InstRW<[SKXWriteResGroup122], (instregex "BLENDVPSrm0")>; +def: InstRW<[SKXWriteResGroup122], (instregex "PBLENDVBrm0")>; +def: InstRW<[SKXWriteResGroup122], (instregex "VBLENDVPDrm")>; +def: InstRW<[SKXWriteResGroup122], (instregex "VBLENDVPSrm")>; +def: InstRW<[SKXWriteResGroup122], (instregex "VPBLENDVBYrm")>; +def: InstRW<[SKXWriteResGroup122], (instregex "VPBLENDVBrm")>; + +def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PHADDSWrm64")>; +def: InstRW<[SKXWriteResGroup123], (instregex "MMX_PHSUBSWrm64")>; + +def SKXWriteResGroup124 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort05]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PHADDWrm64")>; +def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PHADDrm64")>; +def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PHSUBDrm64")>; +def: InstRW<[SKXWriteResGroup124], (instregex "MMX_PHSUBWrm64")>; + +def SKXWriteResGroup125 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237,SKXPort015]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup125], (instregex "VCVTPS2PHYmr")>; + +def SKXWriteResGroup126 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKXWriteResGroup126], (instregex "ROR(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup126], (instregex "ROR8mCL")>; + +def SKXWriteResGroup127 : SchedWriteRes<[SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SKXWriteResGroup127], (instregex "RCL(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCL(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCL8m1")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCL8mi")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCR(16|32|64)m1")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCR(16|32|64)mi")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCR8m1")>; +def: InstRW<[SKXWriteResGroup127], (instregex "RCR8mi")>; + +def SKXWriteResGroup128 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; +} +def: InstRW<[SKXWriteResGroup128], (instregex "ROL(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "ROL8mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SAR(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SAR8mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SHL(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SHL8mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SHR(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup128], (instregex "SHR8mCL")>; + +def SKXWriteResGroup129 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,3]; +} +def: InstRW<[SKXWriteResGroup129], (instregex "ADC(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup129], (instregex "ADC8mi")>; + +def SKXWriteResGroup130 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} +def: InstRW<[SKXWriteResGroup130], (instregex "ADC(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup130], (instregex "ADC8mr")>; +def: InstRW<[SKXWriteResGroup130], (instregex "CMPXCHG(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup130], (instregex "CMPXCHG8rm")>; +def: InstRW<[SKXWriteResGroup130], (instregex "SBB(16|32|64)mi8")>; +def: InstRW<[SKXWriteResGroup130], (instregex "SBB(16|32|64)mr")>; +def: InstRW<[SKXWriteResGroup130], (instregex "SBB8mi")>; +def: InstRW<[SKXWriteResGroup130], (instregex "SBB8mr")>; + +def SKXWriteResGroup131 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 8; + let ResourceCycles = [1,2,1,2,2]; +} +def: InstRW<[SKXWriteResGroup131], (instregex "VPSCATTERQDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup131], (instregex "VPSCATTERQDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup131], (instregex "VSCATTERQPSZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup131], (instregex "VSCATTERQPSZ256mr(b?)(k?)(z?)")>; + +def SKXWriteResGroup132 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 12; + let ResourceCycles = [1,4,1,4,2]; +} +def: InstRW<[SKXWriteResGroup132], (instregex "VPSCATTERDDZ128mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup132], (instregex "VSCATTERDPSZ128mr(b?)(k?)(z?)")>; + +def SKXWriteResGroup133 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 20; + let ResourceCycles = [1,8,1,8,2]; +} +def: InstRW<[SKXWriteResGroup133], (instregex "VPSCATTERDDZ256mr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup133], (instregex "VSCATTERDPSZ256mr(b?)(k?)(z?)")>; + +def SKXWriteResGroup134 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort237,SKXPort0156]> { + let Latency = 8; + let NumMicroOps = 36; + let ResourceCycles = [1,16,1,16,2]; +} +def: InstRW<[SKXWriteResGroup134], (instregex "VPSCATTERDDZmr(b?)(k?)(z?)")>; + +def SKXWriteResGroup135 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMADDUBSWrm64")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMADDWDirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMULHRSWrm64")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMULHUWirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMULHWirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMULLWirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "MMX_PMULUDQirm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "RCPSSm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "RSQRTSSm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "VRCPSSm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "VRSQRTSSm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "VTESTPDYrm")>; +def: InstRW<[SKXWriteResGroup135], (instregex "VTESTPSYrm")>; + +def SKXWriteResGroup136 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup136], (instregex "PCMPGTQrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "PSADBWrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VALIGNQZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VCMPPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VCMPPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VCMPSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VCMPSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VDBPSADBWZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VFPCLASSSSrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPBZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPDZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPEQBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPEQDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPEQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPEQWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPGTBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPGTDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPGTQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPGTQrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPGTWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPQZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPUBZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPUDZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPUQZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPUWZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPCMPWZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMI2D128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMI2PD128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMI2PS128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMI2Q128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMT2D128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMT2PD128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMT2PS128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPERMT2Q128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMAXSQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMAXUQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMINSQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMINUQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXBDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXBQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXBWYrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXDQYrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXWDYrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXWDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVSXWQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXBDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXBQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXWDYrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXWDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPMOVZXWQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPSADBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPSADBWrm")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTMBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTMDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTMQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTMWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTNMBZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTNMDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTNMQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup136], (instregex "VPTESTNMWZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup137], (instregex "ADDSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "ADDSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "CMPSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "CVTPS2PDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MAXSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MAXSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MINSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MINSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MULSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "MULSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "SUBSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "SUBSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VADDSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VADDSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VCMPSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VCMPSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VCVTPH2PSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VCVTPS2PDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD132SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD132SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD213SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD213SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD231SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMADD231SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB132SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB132SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB213SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB213SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB231SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFMSUB231SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD132SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD132SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD213SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD213SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD231SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMADD231SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB132SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB132SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB213SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB213SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB231SDm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VFNMSUB231SSm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMAXSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMAXSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMINSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMINSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMULSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VMULSSrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VSUBSDrm")>; +def: InstRW<[SKXWriteResGroup137], (instregex "VSUBSSrm")>; + +def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PSZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup138], (instregex "VRSQRT14PDZr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup138], (instregex "VRSQRT14PSZr(b?)(k?)(z?)")>; + +def SKXWriteResGroup139 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup139], (instregex "DPPDrri")>; +def: InstRW<[SKXWriteResGroup139], (instregex "VDPPDrri")>; + +def SKXWriteResGroup140 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup140], (instregex "VBLENDVPDYrm")>; +def: InstRW<[SKXWriteResGroup140], (instregex "VBLENDVPSYrm")>; + +def SKXWriteResGroup141 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup141], (instregex "PTESTrm")>; +def: InstRW<[SKXWriteResGroup141], (instregex "VPTESTrm")>; + +def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup142], (instregex "MULX64rm")>; + +def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup143], (instregex "PHADDSWrm128")>; +def: InstRW<[SKXWriteResGroup143], (instregex "PHSUBSWrm128")>; +def: InstRW<[SKXWriteResGroup143], (instregex "VPHADDSWrm128")>; +def: InstRW<[SKXWriteResGroup143], (instregex "VPHSUBSWrm128")>; + +def SKXWriteResGroup144 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup144], (instregex "PHADDDrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "PHADDWrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "PHSUBDrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "PHSUBWrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "VPHADDDrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "VPHADDWrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "VPHSUBDrm")>; +def: InstRW<[SKXWriteResGroup144], (instregex "VPHSUBWrm")>; + +def SKXWriteResGroup145 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup145], (instregex "SHLD(16|32|64)mri8")>; +def: InstRW<[SKXWriteResGroup145], (instregex "SHRD(16|32|64)mri8")>; + +def SKXWriteResGroup146 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { + let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup146], (instregex "LAR(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup146], (instregex "LSL(16|32|64)rm")>; + +def SKXWriteResGroup147 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup147], (instregex "AESDECLASTrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "AESDECrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "AESENCLASTrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "AESENCrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "RCPPSm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "RSQRTPSm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VAESDECLASTrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VAESDECrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VAESENCLASTrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VAESENCrm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRCP14PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRCP14PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRCP14SDrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRCP14SSrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRCPPSm")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRSQRT14PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRSQRT14PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRSQRT14SDrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRSQRT14SSrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup147], (instregex "VRSQRTPSm")>; + +def SKXWriteResGroup148 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup148], (instregex "ADD_F32m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "ADD_F64m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "ILD_F16m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "ILD_F32m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "ILD_F64m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "SUBR_F32m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "SUBR_F64m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "SUB_F32m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "SUB_F64m")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VALIGNDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VALIGNDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VALIGNQZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VALIGNQZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VCMPPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VCMPPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VCMPPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VCMPPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VDBPSADBWZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VDBPSADBWZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPBZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPBZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPDZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPDZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPEQWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTQYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPGTWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPQZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPQZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUBZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUBZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUDZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUDZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUQZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUQZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUWZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPUWZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPWZ256rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPCMPWZrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERM2F128rm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERM2I128rm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMDYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2D256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2Drm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2PD256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2PDrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2PS256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2PSrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2Q256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMI2Qrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPDYmi")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPDZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPDZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPSYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMQYmi")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMQZ256m(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMQZm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2D256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2Drm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2PD256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2PDrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2PS256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2PSrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2Q256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPERMT2Qrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMAXSQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMAXSQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMAXUQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMAXUQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMINSQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMINSQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMINUQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMINUQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXBWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXWDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXWDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXWQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVSXWQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBDYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBQYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBWYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXBWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXDQYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWQYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPMOVZXWQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPSADBWYrm")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPSADBWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTMWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMBZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMBZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VPTESTNMWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFF32X4Z256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFF32X4Zrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFF64X2Z256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFF64X2Zrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFI32X4Z256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFI32X4Zrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFI64X2Z256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup148], (instregex "VSHUFI64X2Zrm(b?)i(k?)(z?)")>; + +def SKXWriteResGroup149 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup149], (instregex "ADDPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "ADDPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "ADDSUBPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "ADDSUBPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CMPPDrmi")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CMPPSrmi")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CVTDQ2PSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CVTPS2DQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CVTSS2SDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "CVTTPS2DQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MAXPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MAXPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MINPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MINPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MULPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "MULPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMADDUBSWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMADDWDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULDQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULHRSWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULHUWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULHWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULLWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "PMULUDQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "SUBPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "SUBPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDSUBPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VADDSUBPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCMPPDrmi")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCMPPSrmi")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTDQ2PSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPD2QQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPD2UQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPH2PSYrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPH2PSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2DQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2DQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2QQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2UDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTPS2UQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTQQ2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTSS2SDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTSS2SDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPD2QQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPD2UQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2DQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2QQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2UDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTTPS2UQQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTUDQ2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTUDQ2PSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VCVTUQQ2PDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMSDrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFIXUPIMMSSrmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD132SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD213SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADD231SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMADDSUB231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB132SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB213SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUB231SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFMSUBADD231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD132SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD213SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMADD231SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB132SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB213SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PDm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231PSm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231SDZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VFNMSUB231SSZm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPPDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPPSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPSDm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETEXPSSm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETMANTPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETMANTPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETMANTSDZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VGETMANTSSZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMAXSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMINSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VMULSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPLZCNTDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPLZCNTQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMADDUBSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMADDUBSWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMADDWDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMADDWDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULDQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHRSWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHRSWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHUWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHUWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULHWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULLWZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULLWrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULUDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VPMULUDQrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VRANGEPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VRANGEPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VRANGESDZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VRANGESSZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VREDUCEPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VREDUCEPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VREDUCESDZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VREDUCESSZ128rmi(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSCALEFPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSCALEFPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSCALEFSDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSCALEFSSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPDrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBPSrm")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBSDZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup149], (instregex "VSUBSSZrm_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup150 : SchedWriteRes<[SKXPort0]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKXWriteResGroup150], (instregex "PCMPISTRIrr")>; +def: InstRW<[SKXWriteResGroup150], (instregex "PCMPISTRM128rr")>; +def: InstRW<[SKXWriteResGroup150], (instregex "VPCMPISTRIrr")>; +def: InstRW<[SKXWriteResGroup150], (instregex "VPCMPISTRM128rr")>; + +def SKXWriteResGroup151 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup151], (instregex "MPSADBWrmi")>; +def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup151], (instregex "VEXPANDPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup151], (instregex "VMPSADBWrmi")>; +def: InstRW<[SKXWriteResGroup151], (instregex "VPEXPANDDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup151], (instregex "VPEXPANDQZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup152 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup152], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[SKXWriteResGroup152], (instregex "VPTESTYrm")>; + +def SKXWriteResGroup153 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup153], (instregex "CVTSD2SSrm")>; +def: InstRW<[SKXWriteResGroup153], (instregex "VCVTSD2SSrm")>; + +def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup154], (instregex "VPHADDSWrm256")>; +def: InstRW<[SKXWriteResGroup154], (instregex "VPHSUBSWrm256")>; + +def SKXWriteResGroup155 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup155], (instregex "VPHADDDYrm")>; +def: InstRW<[SKXWriteResGroup155], (instregex "VPHADDWYrm")>; +def: InstRW<[SKXWriteResGroup155], (instregex "VPHSUBDYrm")>; +def: InstRW<[SKXWriteResGroup155], (instregex "VPHSUBWYrm")>; + +def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> { + let Latency = 10; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup156], (instregex "MULX32rm")>; + +def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 10; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,3]; +} +def: InstRW<[SKXWriteResGroup157], (instregex "ADD8mi")>; +def: InstRW<[SKXWriteResGroup157], (instregex "AND8mi")>; +def: InstRW<[SKXWriteResGroup157], (instregex "OR8mi")>; +def: InstRW<[SKXWriteResGroup157], (instregex "SUB8mi")>; +def: InstRW<[SKXWriteResGroup157], (instregex "XCHG(16|32|64)rm")>; +def: InstRW<[SKXWriteResGroup157], (instregex "XCHG8rm")>; +def: InstRW<[SKXWriteResGroup157], (instregex "XOR8mi")>; + +def SKXWriteResGroup158 : SchedWriteRes<[SKXPort05,SKXPort0156]> { + let Latency = 10; + let NumMicroOps = 10; + let ResourceCycles = [9,1]; +} +def: InstRW<[SKXWriteResGroup158], (instregex "MMX_EMMS")>; + +def SKXWriteResGroup159 : SchedWriteRes<[SKXPort0]> { + let Latency = 11; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup159], (instregex "DIVPSrr")>; +def: InstRW<[SKXWriteResGroup159], (instregex "DIVSSrr")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSYrr")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVPSrr")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVSSZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup159], (instregex "VDIVSSrr")>; + +def SKXWriteResGroup160 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F32m")>; +def: InstRW<[SKXWriteResGroup160], (instregex "MUL_F64m")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRCP14PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRCP14PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRCPPSYm")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRSQRT14PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRSQRT14PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup160], (instregex "VRSQRTPSYm")>; + +def SKXWriteResGroup161 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDSUBPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VADDSUBPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCMPPDYrmi")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCMPPSYrmi")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTDQ2PSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPD2QQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPD2QQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPD2UQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPD2UQQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPH2PSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPH2PSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2DQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2DQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2PDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2PDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2QQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2UDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2UDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTPS2UQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTQQ2PDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTQQ2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2QQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2QQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2UQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPD2UQQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2DQYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2DQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2DQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2QQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2UDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2UDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTTPS2UQQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUDQ2PDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUDQ2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUDQ2PSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUDQ2PSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUQQ2PDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VCVTUQQ2PDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFIXUPIMMPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADD231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMADDSUB231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUB231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFMSUBADD231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMADD231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB132PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB213PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PDYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PSYm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VFNMSUB231PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPDZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPDm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPSZ256m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETEXPPSm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETMANTPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETMANTPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETMANTPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VGETMANTPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMAXPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMINPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VMULPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPLZCNTDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPLZCNTDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPLZCNTQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPLZCNTQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDUBSWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDUBSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDUBSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDWDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDWDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMADDWDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULDQYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHRSWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHRSWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHRSWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHUWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHUWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHUWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULHWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULLWYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULLWZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULUDQYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULUDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VPMULUDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VRANGEPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VRANGEPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VRANGEPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VRANGEPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VREDUCEPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VREDUCEPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VREDUCEPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VREDUCEPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSCALEFPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSCALEFPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSCALEFPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSCALEFPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPDYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPSYrm")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup161], (instregex "VSUBPSZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup162 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup162], (instregex "FICOM16m")>; +def: InstRW<[SKXWriteResGroup162], (instregex "FICOM32m")>; +def: InstRW<[SKXWriteResGroup162], (instregex "FICOMP16m")>; +def: InstRW<[SKXWriteResGroup162], (instregex "FICOMP32m")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VEXPANDPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VEXPANDPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VEXPANDPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VEXPANDPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VMPSADBWYrmi")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VPEXPANDDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VPEXPANDDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VPEXPANDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup162], (instregex "VPEXPANDQZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup163 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup163], (instregex "VCVTSD2SSZrm_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup164 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup164], (instregex "CVTDQ2PDrm")>; +def: InstRW<[SKXWriteResGroup164], (instregex "VCVTDQ2PDrm")>; + +def SKXWriteResGroup165 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup165], (instregex "CVTSD2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTSD2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTSS2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTSS2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTTSD2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTTSD2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "CVTTSS2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSD2SI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSD2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSD2SIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSD2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSD2USI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSS2SI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSS2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSS2SIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSS2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTSS2USIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSD2SI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSD2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSD2SIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSD2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSD2USI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSS2SI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSS2SI64rm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSS2SIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSS2SIrm")>; +def: InstRW<[SKXWriteResGroup165], (instregex "VCVTTSS2USIZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup166 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup166], (instregex "CVTPD2DQrm")>; +def: InstRW<[SKXWriteResGroup166], (instregex "CVTPD2PSrm")>; +def: InstRW<[SKXWriteResGroup166], (instregex "CVTTPD2DQrm")>; +def: InstRW<[SKXWriteResGroup166], (instregex "MMX_CVTPD2PIirm")>; +def: InstRW<[SKXWriteResGroup166], (instregex "MMX_CVTTPD2PIirm")>; + +def SKXWriteResGroup167 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup168 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 11; + let NumMicroOps = 6; + let ResourceCycles = [1,1,1,2,1]; +} +def: InstRW<[SKXWriteResGroup168], (instregex "SHLD(16|32|64)mrCL")>; +def: InstRW<[SKXWriteResGroup168], (instregex "SHRD(16|32|64)mrCL")>; + +def SKXWriteResGroup169 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { + let Latency = 11; + let NumMicroOps = 7; + let ResourceCycles = [2,3,2]; +} +def: InstRW<[SKXWriteResGroup169], (instregex "RCL(16|32|64)rCL")>; +def: InstRW<[SKXWriteResGroup169], (instregex "RCR(16|32|64)rCL")>; + +def SKXWriteResGroup170 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 11; + let NumMicroOps = 9; + let ResourceCycles = [1,5,1,2]; +} +def: InstRW<[SKXWriteResGroup170], (instregex "RCL8rCL")>; + +def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> { + let Latency = 11; + let NumMicroOps = 11; + let ResourceCycles = [2,9]; +} +def: InstRW<[SKXWriteResGroup171], (instregex "LOOPE")>; +def: InstRW<[SKXWriteResGroup171], (instregex "LOOPNE")>; + +def SKXWriteResGroup172 : SchedWriteRes<[SKXPort0]> { + let Latency = 12; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup172], (instregex "SQRTPSr")>; +def: InstRW<[SKXWriteResGroup172], (instregex "SQRTSSr")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSYr")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTPSr")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTSSZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup172], (instregex "VSQRTSSr")>; + +def SKXWriteResGroup173 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup173], (instregex "PCLMULQDQrm")>; +def: InstRW<[SKXWriteResGroup173], (instregex "VPCLMULQDQrm")>; + +def SKXWriteResGroup174 : SchedWriteRes<[SKXPort015]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup175], (instregex "VPERMWZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup176 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup176], (instregex "VCVTSD2USIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup176], (instregex "VCVTSS2USI64Zrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup176], (instregex "VCVTTSD2USIZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup176], (instregex "VCVTTSS2USI64Zrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup177 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup177], (instregex "VCVTPS2QQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup177], (instregex "VCVTPS2UQQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup177], (instregex "VCVTTPS2QQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup177], (instregex "VCVTTPS2UQQZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup178 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup178], (instregex "HADDPDrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "HADDPSrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "HSUBPDrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "HSUBPSrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "VHADDPDrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "VHADDPSrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "VHSUBPDrm")>; +def: InstRW<[SKXWriteResGroup178], (instregex "VHSUBPSrm")>; + +def SKXWriteResGroup179 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SKXWriteResGroup179], (instregex "CVTTSS2SI64rm")>; + +def SKXWriteResGroup180 : SchedWriteRes<[SKXPort5,SKXPort23]> { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup180], (instregex "ADD_FI16m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "ADD_FI32m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "SUBR_FI16m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "SUBR_FI32m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "SUB_FI16m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "SUB_FI32m")>; +def: InstRW<[SKXWriteResGroup180], (instregex "VPERMWZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup180], (instregex "VPERMWZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup181 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup181], (instregex "VCVTDQ2PDYrm")>; + +def SKXWriteResGroup182 : SchedWriteRes<[SKXPort5,SKXPort015]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup182], (instregex "DPPSrri")>; +def: InstRW<[SKXWriteResGroup182], (instregex "VDPPSYrri")>; +def: InstRW<[SKXWriteResGroup182], (instregex "VDPPSrri")>; + +def SKXWriteResGroup183 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup183], (instregex "VHADDPDYrm")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VHADDPSYrm")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VHSUBPDYrm")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VHSUBPSYrm")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VPERMI2W128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup183], (instregex "VPERMT2W128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup184 : SchedWriteRes<[SKXPort0]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup184], (instregex "DIVPDrr")>; +def: InstRW<[SKXWriteResGroup184], (instregex "DIVSDrr")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDYrr")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVPDrr")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVSDZrr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup184], (instregex "VDIVSDrr")>; + +def SKXWriteResGroup185 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup185], (instregex "AESIMCrm")>; +def: InstRW<[SKXWriteResGroup185], (instregex "VAESIMCrm")>; + +def SKXWriteResGroup186 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup186], (instregex "PMULLDrm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDPDm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDPSm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDSDm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "ROUNDSSm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VPMULLDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VPMULLDrm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPDZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALEPSZ128rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALESDm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VRNDSCALESSm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDPDm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDPSm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDSDm")>; +def: InstRW<[SKXWriteResGroup186], (instregex "VROUNDSSm")>; + +def SKXWriteResGroup187 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI16m")>; +def: InstRW<[SKXWriteResGroup187], (instregex "MUL_FI32m")>; + +def SKXWriteResGroup188 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 14; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2PSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTPD2UDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTQQ2PSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTTPD2DQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTTPD2UDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup188], (instregex "VCVTUQQ2PSZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup189 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2W256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup189], (instregex "VPERMI2Wrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup189], (instregex "VPERMT2W256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup189], (instregex "VPERMT2Wrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup190 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 14; + let NumMicroOps = 10; + let ResourceCycles = [2,4,1,3]; +} +def: InstRW<[SKXWriteResGroup190], (instregex "RCR8rCL")>; + +def SKXWriteResGroup191 : SchedWriteRes<[SKXPort0]> { + let Latency = 15; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_FPrST0")>; +def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_FST0r")>; +def: InstRW<[SKXWriteResGroup191], (instregex "DIVR_FrST0")>; + +def SKXWriteResGroup192 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 15; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDYrm")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VPMULLDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPDZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPSZ256rm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VRNDSCALEPSZrm(b?)i(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VROUNDYPDm")>; +def: InstRW<[SKXWriteResGroup192], (instregex "VROUNDYPSm")>; + +def SKXWriteResGroup193 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 15; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SKXWriteResGroup193], (instregex "DPPDrmi")>; +def: InstRW<[SKXWriteResGroup193], (instregex "VDPPDrmi")>; + +def SKXWriteResGroup194 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { + let Latency = 15; + let NumMicroOps = 8; + let ResourceCycles = [1,2,2,1,2]; +} +def: InstRW<[SKXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 15; + let NumMicroOps = 10; + let ResourceCycles = [1,1,1,5,1,1]; +} +def: InstRW<[SKXWriteResGroup195], (instregex "RCL(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup195], (instregex "RCL8mCL")>; + +def SKXWriteResGroup196 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 16; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup196], (instregex "DIVSSrm")>; +def: InstRW<[SKXWriteResGroup196], (instregex "VDIVSSrm")>; + +def SKXWriteResGroup197 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 16; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; +} +def: InstRW<[SKXWriteResGroup197], (instregex "PCMPISTRIrm")>; +def: InstRW<[SKXWriteResGroup197], (instregex "PCMPISTRM128rm")>; +def: InstRW<[SKXWriteResGroup197], (instregex "VPCMPISTRIrm")>; +def: InstRW<[SKXWriteResGroup197], (instregex "VPCMPISTRM128rm")>; + +def SKXWriteResGroup198 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 16; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup198], (instregex "VRCP14PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup198], (instregex "VRCP14PSZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup198], (instregex "VRSQRT14PDZm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup198], (instregex "VRSQRT14PSZm(b?)(k?)(z?)")>; + +def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 16; + let NumMicroOps = 14; + let ResourceCycles = [1,1,1,4,2,5]; +} +def: InstRW<[SKXWriteResGroup199], (instregex "CMPXCHG8B")>; + +def SKXWriteResGroup200 : SchedWriteRes<[SKXPort0156]> { + let Latency = 16; + let NumMicroOps = 16; + let ResourceCycles = [16]; +} +def: InstRW<[SKXWriteResGroup200], (instregex "VZEROALL")>; + +def SKXWriteResGroup201 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 17; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup201], (instregex "DIVPSrm")>; +def: InstRW<[SKXWriteResGroup201], (instregex "SQRTSSm")>; +def: InstRW<[SKXWriteResGroup201], (instregex "VDIVPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup201], (instregex "VDIVPSrm")>; +def: InstRW<[SKXWriteResGroup201], (instregex "VDIVSSZrm_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup201], (instregex "VSQRTSSm")>; + +def SKXWriteResGroup202 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { + let Latency = 17; + let NumMicroOps = 15; + let ResourceCycles = [2,1,2,4,2,4]; +} +def: InstRW<[SKXWriteResGroup202], (instregex "XCH_F")>; + +def SKXWriteResGroup203 : SchedWriteRes<[SKXPort0]> { + let Latency = 18; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup203], (instregex "SQRTPDr")>; +def: InstRW<[SKXWriteResGroup203], (instregex "SQRTSDr")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDYr")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDZ128r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDZ256r(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTPDr")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTSDZr_Int(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup203], (instregex "VSQRTSDr")>; + +def SKXWriteResGroup204 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 18; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup204], (instregex "SQRTPSm")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPSYrm")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VDIVPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VSQRTPSZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VSQRTPSm")>; +def: InstRW<[SKXWriteResGroup204], (instregex "VSQRTSSZm_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup205 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 18; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup206 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort0156]> { + let Latency = 18; + let NumMicroOps = 8; + let ResourceCycles = [4,3,1]; +} +def: InstRW<[SKXWriteResGroup206], (instregex "PCMPESTRIrr")>; +def: InstRW<[SKXWriteResGroup206], (instregex "VPCMPESTRIrr")>; + +def SKXWriteResGroup207 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> { + let Latency = 18; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,5]; +} +def: InstRW<[SKXWriteResGroup207], (instregex "CPUID")>; +def: InstRW<[SKXWriteResGroup207], (instregex "RDTSC")>; + +def SKXWriteResGroup208 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 18; + let NumMicroOps = 11; + let ResourceCycles = [2,1,1,4,1,2]; +} +def: InstRW<[SKXWriteResGroup208], (instregex "RCR(16|32|64)mCL")>; +def: InstRW<[SKXWriteResGroup208], (instregex "RCR8mCL")>; + +def SKXWriteResGroup209 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 19; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup209], (instregex "DIVSDrm")>; +def: InstRW<[SKXWriteResGroup209], (instregex "VDIVSDrm")>; +def: InstRW<[SKXWriteResGroup209], (instregex "VSQRTPSYm")>; +def: InstRW<[SKXWriteResGroup209], (instregex "VSQRTPSZ256m(b?)(k?)(z?)")>; + +def SKXWriteResGroup210 : SchedWriteRes<[SKXPort0,SKXPort015]> { + let Latency = 19; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup210], (instregex "VSQRTPSZr(b?)(k?)(z?)")>; + +def SKXWriteResGroup211 : SchedWriteRes<[SKXPort23,SKXPort015]> { + let Latency = 19; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup211], (instregex "VPMULLQZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup212 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 19; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKXWriteResGroup212], (instregex "DPPSrmi")>; +def: InstRW<[SKXWriteResGroup212], (instregex "VDPPSrmi")>; + +def SKXWriteResGroup213 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015,SKXPort0156]> { + let Latency = 19; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; +} +def: InstRW<[SKXWriteResGroup213], (instregex "PCMPESTRM128rr")>; +def: InstRW<[SKXWriteResGroup213], (instregex "VPCMPESTRM128rr")>; + +def SKXWriteResGroup214 : SchedWriteRes<[]> { + let Latency = 20; + let NumMicroOps = 0; +} +def: InstRW<[SKXWriteResGroup214], (instregex "VGATHERDPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup214], (instregex "VGATHERQPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup214], (instregex "VPGATHERDDZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup215 : SchedWriteRes<[SKXPort0]> { + let Latency = 20; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup215], (instregex "DIV_FPrST0")>; +def: InstRW<[SKXWriteResGroup215], (instregex "DIV_FST0r")>; +def: InstRW<[SKXWriteResGroup215], (instregex "DIV_FrST0")>; + +def SKXWriteResGroup216 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 20; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup216], (instregex "DIVPDrm")>; +def: InstRW<[SKXWriteResGroup216], (instregex "VDIVPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup216], (instregex "VDIVPDrm")>; +def: InstRW<[SKXWriteResGroup216], (instregex "VDIVSDZrm_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup217 : SchedWriteRes<[SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 20; + let NumMicroOps = 5; + let ResourceCycles = [1,1,3]; +} +def: InstRW<[SKXWriteResGroup217], (instregex "VDPPSYrmi")>; + +def SKXWriteResGroup218 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 20; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup218], (instregex "VGATHERQPSZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup218], (instregex "VGATHERQPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup218], (instregex "VPGATHERQDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup218], (instregex "VPGATHERQDZ256rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup219 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 20; + let NumMicroOps = 8; + let ResourceCycles = [1,1,1,1,1,1,2]; +} +def: InstRW<[SKXWriteResGroup219], (instregex "INSB")>; +def: InstRW<[SKXWriteResGroup219], (instregex "INSL")>; +def: InstRW<[SKXWriteResGroup219], (instregex "INSW")>; + +def SKXWriteResGroup220 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort0156]> { + let Latency = 20; + let NumMicroOps = 10; + let ResourceCycles = [1,2,7]; +} +def: InstRW<[SKXWriteResGroup220], (instregex "MWAITrr")>; + +def SKXWriteResGroup221 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015]> { + let Latency = 20; + let NumMicroOps = 11; + let ResourceCycles = [3,6,2]; +} +def: InstRW<[SKXWriteResGroup221], (instregex "AESKEYGENASSIST128rr")>; +def: InstRW<[SKXWriteResGroup221], (instregex "VAESKEYGENASSIST128rr")>; + +def SKXWriteResGroup222 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 21; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup222], (instregex "VDIVPDYrm")>; +def: InstRW<[SKXWriteResGroup222], (instregex "VDIVPDZ256rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup223 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 22; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F32m")>; +def: InstRW<[SKXWriteResGroup223], (instregex "DIV_F64m")>; + +def SKXWriteResGroup224 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 22; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup224], (instregex "VGATHERDPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup224], (instregex "VGATHERQPDZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup224], (instregex "VPGATHERDQZ128rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup224], (instregex "VPGATHERQQZ128rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup224_2 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { + let Latency = 22; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPSrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPSrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDQrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQQrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERDQrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VPGATHERQQrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPSrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPSrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERDPDrm")>; +def: InstRW<[SKXWriteResGroup224_2], (instregex "VGATHERQPDrm")>; + +def SKXWriteResGroup224_3 : SchedWriteRes<[SKXPort0, SKXPort23, SKXPort5, SKXPort015]> { + let Latency = 25; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERDPSYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERQPDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERQPSYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDQYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQQYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQDYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERDQYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VPGATHERQQYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERDPSYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERQPSYrm")>; +def: InstRW<[SKXWriteResGroup224_3], (instregex "VGATHERDPDYrm")>; + +def SKXWriteResGroup225 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { + let Latency = 22; + let NumMicroOps = 14; + let ResourceCycles = [5,5,4]; +} +def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTDZ128rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup225], (instregex "VPCONFLICTQZ256rr(b?)(k?)(z?)")>; + +def SKXWriteResGroup226 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 23; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup226], (instregex "SQRTSDm")>; +def: InstRW<[SKXWriteResGroup226], (instregex "VSQRTSDm")>; + +def SKXWriteResGroup227 : SchedWriteRes<[SKXPort0,SKXPort015]> { + let Latency = 23; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup227], (instregex "VDIVPDZrr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup227], (instregex "VDIVPSZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup228 : SchedWriteRes<[SKXPort0,SKXPort4,SKXPort5,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 23; + let NumMicroOps = 19; + let ResourceCycles = [2,1,4,1,1,4,6]; +} +def: InstRW<[SKXWriteResGroup228], (instregex "CMPXCHG16B")>; + +def SKXWriteResGroup229 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 24; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup229], (instregex "SQRTPDm")>; +def: InstRW<[SKXWriteResGroup229], (instregex "VSQRTPDZ128m(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup229], (instregex "VSQRTPDm")>; +def: InstRW<[SKXWriteResGroup229], (instregex "VSQRTSDZm_Int(b?)(k?)(z?)")>; + +def SKXWriteResGroup230 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 24; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup230], (instregex "VDIVPSZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup231 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort0156]> { + let Latency = 24; + let NumMicroOps = 9; + let ResourceCycles = [4,3,1,1]; +} +def: InstRW<[SKXWriteResGroup231], (instregex "PCMPESTRIrm")>; +def: InstRW<[SKXWriteResGroup231], (instregex "VPCMPESTRIrm")>; + +def SKXWriteResGroup232 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 25; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup232], (instregex "VSQRTPDYm")>; +def: InstRW<[SKXWriteResGroup232], (instregex "VSQRTPDZ256m(b?)(k?)(z?)")>; + +def SKXWriteResGroup233 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 25; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI16m")>; +def: InstRW<[SKXWriteResGroup233], (instregex "DIV_FI32m")>; + +def SKXWriteResGroup234 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 25; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup234], (instregex "VGATHERDPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup234], (instregex "VGATHERQPDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup234], (instregex "VPGATHERDQZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup234], (instregex "VPGATHERQDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup234], (instregex "VPGATHERQQZ256rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup235 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 25; + let NumMicroOps = 10; + let ResourceCycles = [4,3,1,1,1]; +} +def: InstRW<[SKXWriteResGroup235], (instregex "PCMPESTRM128rm")>; +def: InstRW<[SKXWriteResGroup235], (instregex "VPCMPESTRM128rm")>; + +def SKXWriteResGroup236 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort015]> { + let Latency = 25; + let NumMicroOps = 11; + let ResourceCycles = [3,6,1,1]; +} +def: InstRW<[SKXWriteResGroup236], (instregex "AESKEYGENASSIST128rm")>; +def: InstRW<[SKXWriteResGroup236], (instregex "VAESKEYGENASSIST128rm")>; + +def SKXWriteResGroup237 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 26; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup237], (instregex "VSQRTPSZm(b?)(k?)(z?)")>; + +def SKXWriteResGroup238 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 26; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup238], (instregex "VGATHERDPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup238], (instregex "VGATHERQPDZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup238], (instregex "VPGATHERDQZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup238], (instregex "VPGATHERQQZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup239 : SchedWriteRes<[SKXPort0,SKXPort23]> { + let Latency = 27; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F32m")>; +def: InstRW<[SKXWriteResGroup239], (instregex "DIVR_F64m")>; + +def SKXWriteResGroup240 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 27; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup240], (instregex "VGATHERDPSZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup240], (instregex "VPGATHERDDZ256rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup241 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23,SKXPort0156]> { + let Latency = 28; + let NumMicroOps = 8; + let ResourceCycles = [2,4,1,1]; +} +def: InstRW<[SKXWriteResGroup241], (instregex "IDIV(16|32|64)m")>; +def: InstRW<[SKXWriteResGroup241], (instregex "IDIV8m")>; + +def SKXWriteResGroup242 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { + let Latency = 29; + let NumMicroOps = 15; + let ResourceCycles = [5,5,1,4]; +} +def: InstRW<[SKXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)(k?)(z?)")>; + +def SKXWriteResGroup243 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { + let Latency = 30; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI16m")>; +def: InstRW<[SKXWriteResGroup243], (instregex "DIVR_FI32m")>; + +def SKXWriteResGroup244 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 30; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup244], (instregex "VDIVPDZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup245 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015,SKXPort0156]> { + let Latency = 30; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SKXWriteResGroup245], (instregex "VGATHERDPSZrm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup245], (instregex "VPGATHERDDZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup246 : SchedWriteRes<[SKXPort0,SKXPort015]> { + let Latency = 31; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SKXWriteResGroup246], (instregex "VSQRTPDZr(b?)(k?)(z?)")>; + +def SKXWriteResGroup247 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort06,SKXPort0156]> { + let Latency = 35; + let NumMicroOps = 23; + let ResourceCycles = [1,5,3,4,10]; +} +def: InstRW<[SKXWriteResGroup247], (instregex "IN32ri")>; +def: InstRW<[SKXWriteResGroup247], (instregex "IN32rr")>; +def: InstRW<[SKXWriteResGroup247], (instregex "IN8ri")>; +def: InstRW<[SKXWriteResGroup247], (instregex "IN8rr")>; + +def SKXWriteResGroup248 : SchedWriteRes<[SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 35; + let NumMicroOps = 23; + let ResourceCycles = [1,5,2,1,4,10]; +} +def: InstRW<[SKXWriteResGroup248], (instregex "OUT32ir")>; +def: InstRW<[SKXWriteResGroup248], (instregex "OUT32rr")>; +def: InstRW<[SKXWriteResGroup248], (instregex "OUT8ir")>; +def: InstRW<[SKXWriteResGroup248], (instregex "OUT8rr")>; + +def SKXWriteResGroup249 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { + let Latency = 37; + let NumMicroOps = 21; + let ResourceCycles = [9,7,5]; +} +def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTDZ256rr(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup249], (instregex "VPCONFLICTQZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup250 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort23,SKXPort0156]> { + let Latency = 37; + let NumMicroOps = 31; + let ResourceCycles = [1,8,1,21]; +} +def: InstRW<[SKXWriteResGroup250], (instregex "XRSTOR(64?)")>; + +def SKXWriteResGroup251 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { + let Latency = 38; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SKXWriteResGroup251], (instregex "VSQRTPDZm(b?)(k?)(z?)")>; + +def SKXWriteResGroup252 : SchedWriteRes<[SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort23,SKXPort237,SKXPort15,SKXPort0156]> { + let Latency = 40; + let NumMicroOps = 18; + let ResourceCycles = [1,1,2,3,1,1,1,8]; +} +def: InstRW<[SKXWriteResGroup252], (instregex "VMCLEARm")>; + +def SKXWriteResGroup253 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 41; + let NumMicroOps = 39; + let ResourceCycles = [1,10,1,1,26]; +} +def: InstRW<[SKXWriteResGroup253], (instregex "XSAVE64")>; + +def SKXWriteResGroup254 : SchedWriteRes<[SKXPort5,SKXPort0156]> { + let Latency = 42; + let NumMicroOps = 22; + let ResourceCycles = [2,20]; +} +def: InstRW<[SKXWriteResGroup254], (instregex "RDTSCP")>; + +def SKXWriteResGroup255 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort0156]> { + let Latency = 42; + let NumMicroOps = 40; + let ResourceCycles = [1,11,1,1,26]; +} +def: InstRW<[SKXWriteResGroup255], (instregex "XSAVE")>; + +def SKXWriteResGroup256 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { + let Latency = 44; + let NumMicroOps = 22; + let ResourceCycles = [9,7,1,5]; +} +def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)(k?)(z?)")>; +def: InstRW<[SKXWriteResGroup256], (instregex "VPCONFLICTQZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup258 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05,SKXPort06,SKXPort0156]> { + let Latency = 62; + let NumMicroOps = 64; + let ResourceCycles = [2,8,5,10,39]; +} +def: InstRW<[SKXWriteResGroup258], (instregex "FLDENVm")>; +def: InstRW<[SKXWriteResGroup258], (instregex "FLDENVm")>; + +def SKXWriteResGroup259 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 63; + let NumMicroOps = 88; + let ResourceCycles = [4,4,31,1,2,1,45]; +} +def: InstRW<[SKXWriteResGroup259], (instregex "FXRSTOR64")>; + +def SKXWriteResGroup260 : SchedWriteRes<[SKXPort0,SKXPort6,SKXPort23,SKXPort05,SKXPort06,SKXPort15,SKXPort0156]> { + let Latency = 63; + let NumMicroOps = 90; + let ResourceCycles = [4,2,33,1,2,1,47]; +} +def: InstRW<[SKXWriteResGroup260], (instregex "FXRSTOR")>; + +def SKXWriteResGroup261 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort015]> { + let Latency = 67; + let NumMicroOps = 35; + let ResourceCycles = [17,11,7]; +} +def: InstRW<[SKXWriteResGroup261], (instregex "VPCONFLICTDZrr(b?)(k?)(z?)")>; + +def SKXWriteResGroup262 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23,SKXPort015]> { + let Latency = 74; + let NumMicroOps = 36; + let ResourceCycles = [17,11,1,7]; +} +def: InstRW<[SKXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)(k?)(z?)")>; + +def SKXWriteResGroup263 : SchedWriteRes<[SKXPort5,SKXPort05,SKXPort0156]> { + let Latency = 75; + let NumMicroOps = 15; + let ResourceCycles = [6,3,6]; +} +def: InstRW<[SKXWriteResGroup263], (instregex "FNINIT")>; + +def SKXWriteResGroup264 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort05,SKXPort0156]> { + let Latency = 76; + let NumMicroOps = 32; + let ResourceCycles = [7,2,8,3,1,11]; +} +def: InstRW<[SKXWriteResGroup264], (instregex "DIV(16|32|64)r")>; + +def SKXWriteResGroup265 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort5,SKXPort6,SKXPort06,SKXPort0156]> { + let Latency = 102; + let NumMicroOps = 66; + let ResourceCycles = [4,2,4,8,14,34]; +} +def: InstRW<[SKXWriteResGroup265], (instregex "IDIV(16|32|64)r")>; + +def SKXWriteResGroup266 : SchedWriteRes<[SKXPort0,SKXPort1,SKXPort4,SKXPort5,SKXPort6,SKXPort237,SKXPort06,SKXPort0156]> { + let Latency = 106; + let NumMicroOps = 100; + let ResourceCycles = [9,1,11,16,1,11,21,30]; +} +def: InstRW<[SKXWriteResGroup266], (instregex "FSTENVm")>; +def: InstRW<[SKXWriteResGroup266], (instregex "FSTENVm")>; + +def SKXWriteResGroup267 : SchedWriteRes<[SKXPort6,SKXPort0156]> { + let Latency = 140; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SKXWriteResGroup267], (instregex "PAUSE")>; +} // SchedModel diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 45f7aceecd2be..fcf9f4ff6384f 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -663,8 +663,10 @@ def GenericPostRAModel : GenericX86Model { include "X86ScheduleAtom.td" include "X86SchedSandyBridge.td" include "X86SchedHaswell.td" +include "X86SchedBroadwell.td" include "X86ScheduleSLM.td" include "X86ScheduleZnver1.td" include "X86ScheduleBtVer2.td" include "X86SchedSkylakeClient.td" +include "X86SchedSkylakeServer.td" diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index 40e7345cdd274..38657d40c6121 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -370,6 +370,38 @@ def : WriteRes { let Latency = 100; } def : WriteRes; def : WriteRes; +//////////////////////////////////////////////////////////////////////////////// +// SSE4.1 instructions. +//////////////////////////////////////////////////////////////////////////////// + +def WriteDPPS: SchedWriteRes<[JFPU0, JFPU1]> { + let Latency = 11; + let ResourceCycles = [3,3]; + let NumMicroOps = 5; +} +def : InstRW<[WriteDPPS], (instregex "(V)?DPPSrri")>; + +def WriteDPPSLd: SchedWriteRes<[JLAGU, JFPU0, JFPU1]> { + let Latency = 16; + let ResourceCycles = [1,3,3]; + let NumMicroOps = 6; +} +def : InstRW<[WriteDPPSLd], (instregex "(V)?DPPSrmi")>; + +def WriteDPPD: SchedWriteRes<[JFPU0, JFPU1]> { + let Latency = 9; + let ResourceCycles = [3,3]; + let NumMicroOps = 3; +} +def : InstRW<[WriteDPPD], (instregex "(V)?DPPDrri")>; + +def WriteDPPDLd: SchedWriteRes<[JLAGU, JFPU0, JFPU1]> { + let Latency = 14; + let ResourceCycles = [1,3,3]; + let NumMicroOps = 3; +} +def : InstRW<[WriteDPPDLd], (instregex "(V)?DPPDrmi")>; + //////////////////////////////////////////////////////////////////////////////// // SSE4A instructions. //////////////////////////////////////////////////////////////////////////////// @@ -386,10 +418,74 @@ def WriteINSERTQ: SchedWriteRes<[JFPU01]> { } def : InstRW<[WriteINSERTQ], (instregex "INSERTQ")>; +//////////////////////////////////////////////////////////////////////////////// +// F16C instructions. +//////////////////////////////////////////////////////////////////////////////// + +def WriteCVT3: SchedWriteRes<[JFPU1]> { + let Latency = 3; +} +def : InstRW<[WriteCVT3], (instregex "VCVTPS2PHrr")>; +def : InstRW<[WriteCVT3], (instregex "VCVTPH2PSrr")>; + +def WriteCVT3St: SchedWriteRes<[JFPU1, JLAGU]> { + let Latency = 3; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVT3St], (instregex "VCVTPS2PHmr")>; + +def WriteCVT3Ld: SchedWriteRes<[JFPU1, JLAGU]> { + let Latency = 8; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteCVT3Ld], (instregex "VCVTPH2PSrm")>; + +def WriteCVTPS2PHY: SchedWriteRes<[JFPU1, JFPU01]> { + let Latency = 6; + let ResourceCycles = [2,2]; + let NumMicroOps = 3; +} +def : InstRW<[WriteCVTPS2PHY], (instregex "VCVTPS2PHYrr")>; + +def WriteCVTPS2PHYSt: SchedWriteRes<[JFPU1, JFPU01, JLAGU]> { + let Latency = 11; + let ResourceCycles = [2,2,1]; + let NumMicroOps = 3; +} +def : InstRW<[WriteCVTPS2PHYSt], (instregex "VCVTPS2PHYmr")>; + +def WriteCVTPH2PSY: SchedWriteRes<[JFPU1]> { + let Latency = 3; + let ResourceCycles = [2]; + let NumMicroOps = 2; +} +def : InstRW<[WriteCVTPH2PSY], (instregex "VCVTPH2PSYrr")>; + +def WriteCVTPH2PSYLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 8; + let ResourceCycles = [1,2]; + let NumMicroOps = 2; +} +def : InstRW<[WriteCVTPH2PSYLd], (instregex "VCVTPH2PSYrm")>; + //////////////////////////////////////////////////////////////////////////////// // AVX instructions. //////////////////////////////////////////////////////////////////////////////// +def WriteVDPPSY: SchedWriteRes<[JFPU1, JFPU0]> { + let Latency = 12; + let ResourceCycles = [6, 6]; + let NumMicroOps = 10; +} +def : InstRW<[WriteVDPPSY], (instregex "VDPPSYrr")>; + +def WriteVDPPSYLd: SchedWriteRes<[JLAGU, JFPU1, JFPU0]> { + let Latency = 17; + let ResourceCycles = [1, 6, 6]; + let NumMicroOps = 11; +} +def : InstRW<[WriteVDPPSYLd, ReadAfterLd], (instregex "VDPPSYrm")>; + def WriteFAddY: SchedWriteRes<[JFPU0]> { let Latency = 3; let ResourceCycles = [2]; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 13062ca8cfe28..0de5619cff28d 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -345,7 +345,6 @@ void X86Subtarget::initializeEnvironment() { HasSGX = false; HasCLFLUSHOPT = false; HasCLWB = false; - IsBTMemSlow = false; IsPMULLDSlow = false; IsSHLDSlow = false; IsUAMem16Slow = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index a8de6eaf1cc92..a8d7f290688a2 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -193,9 +193,6 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// Processor has Prefetch with intent to Write instruction bool HasPFPREFETCHWT1; - /// True if BT (bit test) of memory instructions are slow. - bool IsBTMemSlow; - /// True if SHLD instructions are slow. bool IsSHLDSlow; @@ -489,7 +486,6 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool hasMWAITX() const { return HasMWAITX; } bool hasCLZERO() const { return HasCLZERO; } - bool isBTMemSlow() const { return IsBTMemSlow; } bool isSHLDSlow() const { return IsSHLDSlow; } bool isPMULLDSlow() const { return IsPMULLDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } @@ -592,13 +588,9 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool isOSWindows() const { return TargetTriple.isOSWindows(); } - bool isTargetWin64() const { - return In64BitMode && TargetTriple.isOSWindows(); - } + bool isTargetWin64() const { return In64BitMode && isOSWindows(); } - bool isTargetWin32() const { - return !In64BitMode && (isTargetCygMing() || isTargetKnownWindowsMSVC()); - } + bool isTargetWin32() const { return !In64BitMode && isOSWindows(); } bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; } bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; } @@ -680,6 +672,8 @@ class X86Subtarget final : public X86GenSubtargetInfo { AntiDepBreakMode getAntiDepBreakMode() const override { return TargetSubtargetInfo::ANTIDEP_CRITICAL; } + + bool enableAdvancedRASplitCost() const override { return true; } }; } // end namespace llvm diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f071d229d8964..6e6c724eb0af9 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -58,7 +58,10 @@ namespace llvm { void initializeWinEHStatePassPass(PassRegistry &); void initializeFixupLEAPassPass(PassRegistry &); +void initializeX86CallFrameOptimizationPass(PassRegistry &); +void initializeX86CmovConverterPassPass(PassRegistry &); void initializeX86ExecutionDepsFixPass(PassRegistry &); +void initializeX86DomainReassignmentPass(PassRegistry &); } // end namespace llvm @@ -73,7 +76,10 @@ extern "C" void LLVMInitializeX86Target() { initializeFixupBWInstPassPass(PR); initializeEvexToVexInstPassPass(PR); initializeFixupLEAPassPass(PR); + initializeX86CallFrameOptimizationPass(PR); + initializeX86CmovConverterPassPass(PR); initializeX86ExecutionDepsFixPass(PR); + initializeX86DomainReassignmentPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -312,6 +318,7 @@ class X86PassConfig : public TargetPassConfig { bool addGlobalInstructionSelect() override; bool addILPOpts() override; bool addPreISel() override; + void addMachineSSAOptimization() override; void addPreRegAlloc() override; void addPostRegAlloc() override; void addPreEmitPass() override; @@ -405,6 +412,10 @@ void X86PassConfig::addPreRegAlloc() { addPass(createX86WinAllocaExpander()); } +void X86PassConfig::addMachineSSAOptimization() { + addPass(createX86DomainReassignmentPass()); + TargetPassConfig::addMachineSSAOptimization(); +} void X86PassConfig::addPostRegAlloc() { addPass(createX86FloatingPointStackifierPass()); diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 05f42deb53c7c..f54728d4482fc 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2617,8 +2617,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, { 3, MVT::v2i8, 10 }, //(load 6i8 and) deinterleave into 3 x 2i8 { 3, MVT::v4i8, 4 }, //(load 12i8 and) deinterleave into 3 x 4i8 { 3, MVT::v8i8, 9 }, //(load 24i8 and) deinterleave into 3 x 8i8 - { 3, MVT::v16i8, 18}, //(load 48i8 and) deinterleave into 3 x 16i8 - { 3, MVT::v32i8, 42 }, //(load 96i8 and) deinterleave into 3 x 32i8 + { 3, MVT::v16i8, 11}, //(load 48i8 and) deinterleave into 3 x 16i8 + { 3, MVT::v32i8, 13}, //(load 96i8 and) deinterleave into 3 x 32i8 { 4, MVT::v2i8, 12 }, //(load 8i8 and) deinterleave into 4 x 2i8 { 4, MVT::v4i8, 4 }, //(load 16i8 and) deinterleave into 4 x 4i8 @@ -2631,14 +2631,14 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, { 3, MVT::v2i8, 7 }, //interleave 3 x 2i8 into 6i8 (and store) { 3, MVT::v4i8, 8 }, //interleave 3 x 4i8 into 12i8 (and store) { 3, MVT::v8i8, 11 }, //interleave 3 x 8i8 into 24i8 (and store) - { 3, MVT::v16i8, 17 }, //interleave 3 x 16i8 into 48i8 (and store) - { 3, MVT::v32i8, 32 }, //interleave 3 x 32i8 into 96i8 (and store) + { 3, MVT::v16i8, 11 }, //interleave 3 x 16i8 into 48i8 (and store) + { 3, MVT::v32i8, 13 }, //interleave 3 x 32i8 into 96i8 (and store) { 4, MVT::v2i8, 12 }, //interleave 4 x 2i8 into 8i8 (and store) { 4, MVT::v4i8, 9 }, //interleave 4 x 4i8 into 16i8 (and store) - { 4, MVT::v8i8, 16 }, //interleave 4 x 8i8 into 32i8 (and store) - { 4, MVT::v16i8, 20 }, //interleave 4 x 16i8 into 64i8 (and store) - { 4, MVT::v32i8, 40 } //interleave 4 x 32i8 into 128i8 (and store) + { 4, MVT::v8i8, 10 }, //interleave 4 x 8i8 into 32i8 (and store) + { 4, MVT::v16i8, 10 }, //interleave 4 x 16i8 into 64i8 (and store) + { 4, MVT::v32i8, 12 } //interleave 4 x 32i8 into 128i8 (and store) }; if (Opcode == Instruction::Load) { @@ -2684,7 +2684,27 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace); + unsigned VF = VecTy->getVectorNumElements() / Factor; + MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); + if (Opcode == Instruction::Load) { + // The tables (AVX512InterleavedLoadTbl and AVX512InterleavedStoreTbl) + // contain the cost of the optimized shuffle sequence that the + // X86InterleavedAccess pass will generate. + // The cost of loads and stores are computed separately from the table. + + // X86InterleavedAccess support only the following interleaved-access group. + static const CostTblEntry AVX512InterleavedLoadTbl[] = { + {3, MVT::v16i8, 12}, //(load 48i8 and) deinterleave into 3 x 16i8 + {3, MVT::v32i8, 14}, //(load 96i8 and) deinterleave into 3 x 32i8 + {3, MVT::v64i8, 22}, //(load 96i8 and) deinterleave into 3 x 32i8 + }; + + if (const auto *Entry = + CostTableLookup(AVX512InterleavedLoadTbl, Factor, VT)) + return NumOfMemOps * MemOpCost + Entry->Cost; + //If an entry does not exist, fallback to the default implementation. + // Kind of shuffle depends on number of loaded values. // If we load the entire data in one register, we can use a 1-src shuffle. // Otherwise, we'll merge 2 sources in each operation. @@ -2727,6 +2747,22 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, // Store. assert(Opcode == Instruction::Store && "Expected Store Instruction at this point"); + // X86InterleavedAccess support only the following interleaved-access group. + static const CostTblEntry AVX512InterleavedStoreTbl[] = { + {3, MVT::v16i8, 12}, // interleave 3 x 16i8 into 48i8 (and store) + {3, MVT::v32i8, 14}, // interleave 3 x 32i8 into 96i8 (and store) + {3, MVT::v64i8, 26}, // interleave 3 x 64i8 into 96i8 (and store) + + {4, MVT::v8i8, 10}, // interleave 4 x 8i8 into 32i8 (and store) + {4, MVT::v16i8, 11}, // interleave 4 x 16i8 into 64i8 (and store) + {4, MVT::v32i8, 14}, // interleave 4 x 32i8 into 128i8 (and store) + {4, MVT::v64i8, 24} // interleave 4 x 32i8 into 256i8 (and store) + }; + + if (const auto *Entry = + CostTableLookup(AVX512InterleavedStoreTbl, Factor, VT)) + return NumOfMemOps * MemOpCost + Entry->Cost; + //If an entry does not exist, fallback to the default implementation. // There is no strided stores meanwhile. And store can't be folded in // shuffle. diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index d17dfac6a9974..fb8c2a71c9ab2 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -1,4 +1,4 @@ -//===-- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter -----------===// +//===- X86VZeroUpper.cpp - AVX vzeroupper instruction inserter ------------===// // // The LLVM Compiler Infrastructure // @@ -17,14 +17,25 @@ #include "X86.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include + using namespace llvm; #define DEBUG_TYPE "x86-vzeroupper" @@ -35,23 +46,25 @@ namespace { class VZeroUpperInserter : public MachineFunctionPass { public: - VZeroUpperInserter() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &MF) override; + MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } + StringRef getPassName() const override { return "X86 vzeroupper inserter"; } private: - void processBasicBlock(MachineBasicBlock &MBB); void insertVZeroUpper(MachineBasicBlock::iterator I, MachineBasicBlock &MBB); void addDirtySuccessor(MachineBasicBlock &MBB); - typedef enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY } BlockExitState; + using BlockExitState = enum { PASS_THROUGH, EXITS_CLEAN, EXITS_DIRTY }; + static const char* getBlockExitStateName(BlockExitState ST); // Core algorithm state: @@ -73,13 +86,15 @@ namespace { // to be guarded until we discover a predecessor that // is DIRTY_OUT. struct BlockState { - BlockState() : ExitState(PASS_THROUGH), AddedToDirtySuccessors(false) {} - BlockExitState ExitState; - bool AddedToDirtySuccessors; + BlockExitState ExitState = PASS_THROUGH; + bool AddedToDirtySuccessors = false; MachineBasicBlock::iterator FirstUnguardedCall; + + BlockState() = default; }; - typedef SmallVector BlockStateMap; - typedef SmallVector DirtySuccessorsWorkList; + + using BlockStateMap = SmallVector; + using DirtySuccessorsWorkList = SmallVector; BlockStateMap BlockStates; DirtySuccessorsWorkList DirtySuccessors; @@ -90,8 +105,9 @@ namespace { static char ID; }; - char VZeroUpperInserter::ID = 0; -} +} // end anonymous namespace + +char VZeroUpperInserter::ID = 0; FunctionPass *llvm::createX86IssueVZeroUpperPass() { return new VZeroUpperInserter(); @@ -116,9 +132,8 @@ static bool isYmmOrZmmReg(unsigned Reg) { } static bool checkFnHasLiveInYmmOrZmm(MachineRegisterInfo &MRI) { - for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(), - E = MRI.livein_end(); I != E; ++I) - if (isYmmOrZmmReg(I->first)) + for (std::pair LI : MRI.liveins()) + if (isYmmOrZmmReg(LI.first)) return true; return false; diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp index 0c3b343414760..0472a85f50da2 100644 --- a/lib/Target/X86/X86WinEHState.cpp +++ b/lib/Target/X86/X86WinEHState.cpp @@ -401,6 +401,8 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) { Twine("__ehhandler$") + GlobalValue::dropLLVMManglingEscape( ParentFunc->getName()), TheModule); + if (auto *C = ParentFunc->getComdat()) + Trampoline->setComdat(C); BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline); IRBuilder<> Builder(EntryBB); Value *LSDA = emitEHLSDA(Builder, ParentFunc); diff --git a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp index eb9b9c3b264d3..3891efae57bb4 100644 --- a/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp +++ b/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp @@ -165,8 +165,9 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef ArgsArr) { E.SymbolName = E.Name; // Trim off the trailing decoration. Symbols will always have a // starting prefix here (either _ for cdecl/stdcall, @ for fastcall - // or ? for C++ functions). (Vectorcall functions also will end up having - // a prefix here, even if they shouldn't.) + // or ? for C++ functions). Vectorcall functions won't have any + // fixed prefix, but the function base name will still be at least + // one char. E.Name = E.Name.substr(0, E.Name.find('@', 1)); // By making sure E.SymbolName != E.Name for decorated symbols, // writeImportLibrary writes these symbols with the type diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 72bae203ee94b..3eff421d53e51 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -1,4 +1,4 @@ -//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===// +//===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===// // // The LLVM Compiler Infrastructure // @@ -31,30 +31,59 @@ #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include +#include +#include +#include +#include +#include #include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "argpromotion" @@ -65,7 +94,7 @@ STATISTIC(NumByValArgsPromoted, "Number of byval arguments promoted"); STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated"); /// A vector used to hold the indices of a single GEP instruction -typedef std::vector IndicesVector; +using IndicesVector = std::vector; /// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's @@ -75,13 +104,12 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, SmallPtrSetImpl &ByValArgsToTransform, Optional> ReplaceCallSite) { - // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. FunctionType *FTy = F->getFunctionType(); std::vector Params; - typedef std::set> ScalarizeTable; + using ScalarizeTable = std::set>; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we @@ -89,7 +117,6 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. - // std::map ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the @@ -335,7 +362,6 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. - // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { @@ -537,7 +563,7 @@ static void markIndicesSafe(const IndicesVector &ToMark, /// arguments passed in. static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca, AAResults &AAR, unsigned MaxElements) { - typedef std::set GEPIndicesSet; + using GEPIndicesSet = std::set; // Quick exit for unused arguments if (Arg->use_empty()) @@ -714,7 +740,6 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca, /// \brief Checks if a type could have padding bytes. static bool isDenselyPacked(Type *type, const DataLayout &DL) { - // There is no size information, so be conservative. if (!type->isSized()) return false; @@ -749,7 +774,6 @@ static bool isDenselyPacked(Type *type, const DataLayout &DL) { /// \brief Checks if the padding bytes of an argument could be accessed. static bool canPaddingBeAccessed(Argument *arg) { - assert(arg->hasByValAttr()); // Track all the pointers to the argument to make sure they are not captured. @@ -788,7 +812,6 @@ static bool canPaddingBeAccessed(Argument *arg) { /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. -/// static Function * promoteArguments(Function *F, function_ref AARGetter, unsigned MaxElements, @@ -964,9 +987,17 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C, } namespace { + /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass. -/// struct ArgPromotion : public CallGraphSCCPass { + // Pass identification, replacement for typeid + static char ID; + + explicit ArgPromotion(unsigned MaxElements = 3) + : CallGraphSCCPass(ID), MaxElements(MaxElements) { + initializeArgPromotionPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); @@ -975,21 +1006,20 @@ struct ArgPromotion : public CallGraphSCCPass { } bool runOnSCC(CallGraphSCC &SCC) override; - static char ID; // Pass identification, replacement for typeid - explicit ArgPromotion(unsigned MaxElements = 3) - : CallGraphSCCPass(ID), MaxElements(MaxElements) { - initializeArgPromotionPass(*PassRegistry::getPassRegistry()); - } private: using llvm::Pass::doInitialization; + bool doInitialization(CallGraph &CG) override; + /// The maximum number of elements to expand, or 0 for unlimited. unsigned MaxElements; }; -} + +} // end anonymous namespace char ArgPromotion::ID = 0; + INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion", "Promote 'by reference' arguments to scalars", false, false) diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 67f18a307b9b9..397561746f863 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMipo AlwaysInliner.cpp ArgumentPromotion.cpp BarrierNoopPass.cpp + CalledValuePropagation.cpp ConstantMerge.cpp CrossDSOCFI.cpp DeadArgumentElimination.cpp diff --git a/lib/Transforms/IPO/CalledValuePropagation.cpp b/lib/Transforms/IPO/CalledValuePropagation.cpp new file mode 100644 index 0000000000000..c5f6336aa2be3 --- /dev/null +++ b/lib/Transforms/IPO/CalledValuePropagation.cpp @@ -0,0 +1,423 @@ +//===- CalledValuePropagation.cpp - Propagate called values -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a transformation that attaches !callees metadata to +// indirect call sites. For a given call site, the metadata, if present, +// indicates the set of functions the call site could possibly target at +// run-time. This metadata is added to indirect call sites when the set of +// possible targets can be determined by analysis and is known to be small. The +// analysis driving the transformation is similar to constant propagation and +// makes uses of the generic sparse propagation solver. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/CalledValuePropagation.h" +#include "llvm/Analysis/SparsePropagation.h" +#include "llvm/Analysis/ValueLatticeUtils.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/Transforms/IPO.h" +using namespace llvm; + +#define DEBUG_TYPE "called-value-propagation" + +/// The maximum number of functions to track per lattice value. Once the number +/// of functions a call site can possibly target exceeds this threshold, it's +/// lattice value becomes overdefined. The number of possible lattice values is +/// bounded by Ch(F, M), where F is the number of functions in the module and M +/// is MaxFunctionsPerValue. As such, this value should be kept very small. We +/// likely can't do anything useful for call sites with a large number of +/// possible targets, anyway. +static cl::opt MaxFunctionsPerValue( + "cvp-max-functions-per-value", cl::Hidden, cl::init(4), + cl::desc("The maximum number of functions to track per lattice value")); + +namespace { +/// To enable interprocedural analysis, we assign LLVM values to the following +/// groups. The register group represents SSA registers, the return group +/// represents the return values of functions, and the memory group represents +/// in-memory values. An LLVM Value can technically be in more than one group. +/// It's necessary to distinguish these groups so we can, for example, track a +/// global variable separately from the value stored at its location. +enum class IPOGrouping { Register, Return, Memory }; + +/// Our LatticeKeys are PointerIntPairs composed of LLVM values and groupings. +using CVPLatticeKey = PointerIntPair; + +/// The lattice value type used by our custom lattice function. It holds the +/// lattice state, and a set of functions. +class CVPLatticeVal { +public: + /// The states of the lattice values. Only the FunctionSet state is + /// interesting. It indicates the set of functions to which an LLVM value may + /// refer. + enum CVPLatticeStateTy { Undefined, FunctionSet, Overdefined, Untracked }; + + /// Comparator for sorting the functions set. We want to keep the order + /// deterministic for testing, etc. + struct Compare { + bool operator()(const Function *LHS, const Function *RHS) const { + return LHS->getName() < RHS->getName(); + } + }; + + CVPLatticeVal() : LatticeState(Undefined) {} + CVPLatticeVal(CVPLatticeStateTy LatticeState) : LatticeState(LatticeState) {} + CVPLatticeVal(std::set &&Functions) + : LatticeState(FunctionSet), Functions(Functions) {} + + /// Get a reference to the functions held by this lattice value. The number + /// of functions will be zero for states other than FunctionSet. + const std::set &getFunctions() const { + return Functions; + } + + /// Returns true if the lattice value is in the FunctionSet state. + bool isFunctionSet() const { return LatticeState == FunctionSet; } + + bool operator==(const CVPLatticeVal &RHS) const { + return LatticeState == RHS.LatticeState && Functions == RHS.Functions; + } + + bool operator!=(const CVPLatticeVal &RHS) const { + return LatticeState != RHS.LatticeState || Functions != RHS.Functions; + } + +private: + /// Holds the state this lattice value is in. + CVPLatticeStateTy LatticeState; + + /// Holds functions indicating the possible targets of call sites. This set + /// is empty for lattice values in the undefined, overdefined, and untracked + /// states. The maximum size of the set is controlled by + /// MaxFunctionsPerValue. Since most LLVM values are expected to be in + /// uninteresting states (i.e., overdefined), CVPLatticeVal objects should be + /// small and efficiently copyable. + std::set Functions; +}; + +/// The custom lattice function used by the generic sparse propagation solver. +/// It handles merging lattice values and computing new lattice values for +/// constants, arguments, values returned from trackable functions, and values +/// located in trackable global variables. It also computes the lattice values +/// that change as a result of executing instructions. +class CVPLatticeFunc + : public AbstractLatticeFunction { +public: + CVPLatticeFunc() + : AbstractLatticeFunction(CVPLatticeVal(CVPLatticeVal::Undefined), + CVPLatticeVal(CVPLatticeVal::Overdefined), + CVPLatticeVal(CVPLatticeVal::Untracked)) {} + + /// Compute and return a CVPLatticeVal for the given CVPLatticeKey. + CVPLatticeVal ComputeLatticeVal(CVPLatticeKey Key) override { + switch (Key.getInt()) { + case IPOGrouping::Register: + if (isa(Key.getPointer())) { + return getUndefVal(); + } else if (auto *A = dyn_cast(Key.getPointer())) { + if (canTrackArgumentsInterprocedurally(A->getParent())) + return getUndefVal(); + } else if (auto *C = dyn_cast(Key.getPointer())) { + return computeConstant(C); + } + return getOverdefinedVal(); + case IPOGrouping::Memory: + case IPOGrouping::Return: + if (auto *GV = dyn_cast(Key.getPointer())) { + if (canTrackGlobalVariableInterprocedurally(GV)) + return computeConstant(GV->getInitializer()); + } else if (auto *F = cast(Key.getPointer())) + if (canTrackReturnsInterprocedurally(F)) + return getUndefVal(); + } + return getOverdefinedVal(); + } + + /// Merge the two given lattice values. The interesting cases are merging two + /// FunctionSet values and a FunctionSet value with an Undefined value. For + /// these cases, we simply union the function sets. If the size of the union + /// is greater than the maximum functions we track, the merged value is + /// overdefined. + CVPLatticeVal MergeValues(CVPLatticeVal X, CVPLatticeVal Y) override { + if (X == getOverdefinedVal() || Y == getOverdefinedVal()) + return getOverdefinedVal(); + if (X == getUndefVal() && Y == getUndefVal()) + return getUndefVal(); + std::set Union; + std::set_union(X.getFunctions().begin(), X.getFunctions().end(), + Y.getFunctions().begin(), Y.getFunctions().end(), + std::inserter(Union, Union.begin()), + CVPLatticeVal::Compare{}); + if (Union.size() > MaxFunctionsPerValue) + return getOverdefinedVal(); + return CVPLatticeVal(std::move(Union)); + } + + /// Compute the lattice values that change as a result of executing the given + /// instruction. The changed values are stored in \p ChangedValues. We handle + /// just a few kinds of instructions since we're only propagating values that + /// can be called. + void ComputeInstructionState( + Instruction &I, DenseMap &ChangedValues, + SparseSolver &SS) override { + switch (I.getOpcode()) { + case Instruction::Call: + return visitCallSite(cast(&I), ChangedValues, SS); + case Instruction::Invoke: + return visitCallSite(cast(&I), ChangedValues, SS); + case Instruction::Load: + return visitLoad(*cast(&I), ChangedValues, SS); + case Instruction::Ret: + return visitReturn(*cast(&I), ChangedValues, SS); + case Instruction::Select: + return visitSelect(*cast(&I), ChangedValues, SS); + case Instruction::Store: + return visitStore(*cast(&I), ChangedValues, SS); + default: + return visitInst(I, ChangedValues, SS); + } + } + + /// Print the given CVPLatticeVal to the specified stream. + void PrintLatticeVal(CVPLatticeVal LV, raw_ostream &OS) override { + if (LV == getUndefVal()) + OS << "Undefined "; + else if (LV == getOverdefinedVal()) + OS << "Overdefined"; + else if (LV == getUntrackedVal()) + OS << "Untracked "; + else + OS << "FunctionSet"; + } + + /// Print the given CVPLatticeKey to the specified stream. + void PrintLatticeKey(CVPLatticeKey Key, raw_ostream &OS) override { + if (Key.getInt() == IPOGrouping::Register) + OS << " "; + else if (Key.getInt() == IPOGrouping::Memory) + OS << " "; + else if (Key.getInt() == IPOGrouping::Return) + OS << " "; + if (isa(Key.getPointer())) + OS << Key.getPointer()->getName(); + else + OS << *Key.getPointer(); + } + + /// We collect a set of indirect calls when visiting call sites. This method + /// returns a reference to that set. + SmallPtrSetImpl &getIndirectCalls() { return IndirectCalls; } + +private: + /// Holds the indirect calls we encounter during the analysis. We will attach + /// metadata to these calls after the analysis indicating the functions the + /// calls can possibly target. + SmallPtrSet IndirectCalls; + + /// Compute a new lattice value for the given constant. The constant, after + /// stripping any pointer casts, should be a Function. We ignore null + /// pointers as an optimization, since calling these values is undefined + /// behavior. + CVPLatticeVal computeConstant(Constant *C) { + if (isa(C)) + return CVPLatticeVal(CVPLatticeVal::FunctionSet); + if (auto *F = dyn_cast(C->stripPointerCasts())) + return CVPLatticeVal({F}); + return getOverdefinedVal(); + } + + /// Handle return instructions. The function's return state is the merge of + /// the returned value state and the function's return state. + void visitReturn(ReturnInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { + Function *F = I.getParent()->getParent(); + if (F->getReturnType()->isVoidTy()) + return; + auto RegI = CVPLatticeKey(I.getReturnValue(), IPOGrouping::Register); + auto RetF = CVPLatticeKey(F, IPOGrouping::Return); + ChangedValues[RetF] = + MergeValues(SS.getValueState(RegI), SS.getValueState(RetF)); + } + + /// Handle call sites. The state of a called function's formal arguments is + /// the merge of the argument state with the call sites corresponding actual + /// argument state. The call site state is the merge of the call site state + /// with the returned value state of the called function. + void visitCallSite(CallSite CS, + DenseMap &ChangedValues, + SparseSolver &SS) { + Function *F = CS.getCalledFunction(); + Instruction *I = CS.getInstruction(); + auto RegI = CVPLatticeKey(I, IPOGrouping::Register); + + // If this is an indirect call, save it so we can quickly revisit it when + // attaching metadata. + if (!F) + IndirectCalls.insert(I); + + // If we can't track the function's return values, there's nothing to do. + if (!F || !canTrackReturnsInterprocedurally(F)) { + ChangedValues[RegI] = getOverdefinedVal(); + return; + } + + // Inform the solver that the called function is executable, and perform + // the merges for the arguments and return value. + SS.MarkBlockExecutable(&F->front()); + auto RetF = CVPLatticeKey(F, IPOGrouping::Return); + for (Argument &A : F->args()) { + auto RegFormal = CVPLatticeKey(&A, IPOGrouping::Register); + auto RegActual = + CVPLatticeKey(CS.getArgument(A.getArgNo()), IPOGrouping::Register); + ChangedValues[RegFormal] = + MergeValues(SS.getValueState(RegFormal), SS.getValueState(RegActual)); + } + ChangedValues[RegI] = + MergeValues(SS.getValueState(RegI), SS.getValueState(RetF)); + } + + /// Handle select instructions. The select instruction state is the merge the + /// true and false value states. + void visitSelect(SelectInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { + auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); + auto RegT = CVPLatticeKey(I.getTrueValue(), IPOGrouping::Register); + auto RegF = CVPLatticeKey(I.getFalseValue(), IPOGrouping::Register); + ChangedValues[RegI] = + MergeValues(SS.getValueState(RegT), SS.getValueState(RegF)); + } + + /// Handle load instructions. If the pointer operand of the load is a global + /// variable, we attempt to track the value. The loaded value state is the + /// merge of the loaded value state with the global variable state. + void visitLoad(LoadInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { + auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); + if (auto *GV = dyn_cast(I.getPointerOperand())) { + auto MemGV = CVPLatticeKey(GV, IPOGrouping::Memory); + ChangedValues[RegI] = + MergeValues(SS.getValueState(RegI), SS.getValueState(MemGV)); + } else { + ChangedValues[RegI] = getOverdefinedVal(); + } + } + + /// Handle store instructions. If the pointer operand of the store is a + /// global variable, we attempt to track the value. The global variable state + /// is the merge of the stored value state with the global variable state. + void visitStore(StoreInst &I, + DenseMap &ChangedValues, + SparseSolver &SS) { + auto *GV = dyn_cast(I.getPointerOperand()); + if (!GV) + return; + auto RegI = CVPLatticeKey(I.getValueOperand(), IPOGrouping::Register); + auto MemGV = CVPLatticeKey(GV, IPOGrouping::Memory); + ChangedValues[MemGV] = + MergeValues(SS.getValueState(RegI), SS.getValueState(MemGV)); + } + + /// Handle all other instructions. All other instructions are marked + /// overdefined. + void visitInst(Instruction &I, + DenseMap &ChangedValues, + SparseSolver &SS) { + auto RegI = CVPLatticeKey(&I, IPOGrouping::Register); + ChangedValues[RegI] = getOverdefinedVal(); + } +}; +} // namespace + +namespace llvm { +/// A specialization of LatticeKeyInfo for CVPLatticeKeys. The generic solver +/// must translate between LatticeKeys and LLVM Values when adding Values to +/// its work list and inspecting the state of control-flow related values. +template <> struct LatticeKeyInfo { + static inline Value *getValueFromLatticeKey(CVPLatticeKey Key) { + return Key.getPointer(); + } + static inline CVPLatticeKey getLatticeKeyFromValue(Value *V) { + return CVPLatticeKey(V, IPOGrouping::Register); + } +}; +} // namespace llvm + +static bool runCVP(Module &M) { + // Our custom lattice function and generic sparse propagation solver. + CVPLatticeFunc Lattice; + SparseSolver Solver(&Lattice); + + // For each function in the module, if we can't track its arguments, let the + // generic solver assume it is executable. + for (Function &F : M) + if (!F.isDeclaration() && !canTrackArgumentsInterprocedurally(&F)) + Solver.MarkBlockExecutable(&F.front()); + + // Solver our custom lattice. In doing so, we will also build a set of + // indirect call sites. + Solver.Solve(); + + // Attach metadata to the indirect call sites that were collected indicating + // the set of functions they can possibly target. + bool Changed = false; + MDBuilder MDB(M.getContext()); + for (Instruction *C : Lattice.getIndirectCalls()) { + CallSite CS(C); + auto RegI = CVPLatticeKey(CS.getCalledValue(), IPOGrouping::Register); + CVPLatticeVal LV = Solver.getExistingValueState(RegI); + if (!LV.isFunctionSet() || LV.getFunctions().empty()) + continue; + MDNode *Callees = MDB.createCallees(SmallVector( + LV.getFunctions().begin(), LV.getFunctions().end())); + C->setMetadata(LLVMContext::MD_callees, Callees); + Changed = true; + } + + return Changed; +} + +PreservedAnalyses CalledValuePropagationPass::run(Module &M, + ModuleAnalysisManager &) { + runCVP(M); + return PreservedAnalyses::all(); +} + +namespace { +class CalledValuePropagationLegacyPass : public ModulePass { +public: + static char ID; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + CalledValuePropagationLegacyPass() : ModulePass(ID) { + initializeCalledValuePropagationLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + if (skipModule(M)) + return false; + return runCVP(M); + } +}; +} // namespace + +char CalledValuePropagationLegacyPass::ID = 0; +INITIALIZE_PASS(CalledValuePropagationLegacyPass, "called-value-propagation", + "Called Value Propagation", false, false) + +ModulePass *llvm::createCalledValuePropagationPass() { + return new CalledValuePropagationLegacyPass(); +} diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 62b5a9c9ba266..e0b1037053f01 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -19,16 +19,23 @@ #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/IPO.h" +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "constmerge" @@ -102,8 +109,7 @@ static bool mergeConstants(Module &M) { // constants together may allow us to merge other constants together if the // second level constants have initializers which point to the globals that // were just merged. - while (1) { - + while (true) { // First: Find the canonical constants others will be merged with. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { @@ -225,23 +231,27 @@ PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) { } namespace { + struct ConstantMergeLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid + ConstantMergeLegacyPass() : ModulePass(ID) { initializeConstantMergeLegacyPassPass(*PassRegistry::getPassRegistry()); } // For this pass, process all of the globals in the module, eliminating // duplicate constants. - bool runOnModule(Module &M) { + bool runOnModule(Module &M) override { if (skipModule(M)) return false; return mergeConstants(M); } }; -} + +} // end anonymous namespace char ConstantMergeLegacyPass::ID = 0; + INITIALIZE_PASS(ConstantMergeLegacyPass, "constmerge", "Merge Duplicate Global Constants", false, false) diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 8e26849ea9e37..5446541550e54 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -1,4 +1,4 @@ -//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===// +//===- DeadArgumentElimination.cpp - Eliminate dead arguments -------------===// // // The LLVM Compiler Infrastructure // @@ -20,24 +20,36 @@ #include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" -#include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include -#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "deadargelim" @@ -46,9 +58,10 @@ STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); STATISTIC(NumArgumentsReplacedWithUndef, "Number of unread args replaced with undef"); + namespace { + /// DAE - The dead argument elimination pass. - /// class DAE : public ModulePass { protected: // DAH uses this to specify a different ID. @@ -56,6 +69,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid + DAE() : ModulePass(ID) { initializeDAEPass(*PassRegistry::getPassRegistry()); } @@ -71,33 +85,38 @@ namespace { virtual bool ShouldHackArguments() const { return false; } }; -} +} // end anonymous namespace char DAE::ID = 0; + INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false) namespace { + /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but /// deletes arguments to functions which are external. This is only for use /// by bugpoint. struct DAH : public DAE { static char ID; + DAH() : DAE(ID) {} bool ShouldHackArguments() const override { return true; } }; -} + +} // end anonymous namespace char DAH::ID = 0; + INITIALIZE_PASS(DAH, "deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)", false, false) /// createDeadArgEliminationPass - This pass removes arguments from functions /// which are not used by the body of the function. -/// ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); } + ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); } /// DeleteDeadVarargs - If this is an function that takes a ... list, and if @@ -140,7 +159,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // the old function, but doesn't have isVarArg set. FunctionType *FTy = Fn.getFunctionType(); - std::vector Params(FTy->param_begin(), FTy->param_end()); + std::vector Params(FTy->param_begin(), FTy->param_end()); FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false); unsigned NumArgs = Params.size(); @@ -155,7 +174,7 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // Loop over all of the callers of the function, transforming the call sites // to pass in a smaller number of arguments into the new function. // - std::vector Args; + std::vector Args; for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) { CallSite CS(*I++); if (!CS) @@ -214,7 +233,6 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) { // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. While we're at // it, remove the dead arguments from the DeadArguments list. - // for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++I2) { // Move the name and users over to the new version. @@ -343,7 +361,6 @@ DeadArgumentEliminationPass::MarkIfNotLive(RetOrArg Use, return MaybeLive; } - /// SurveyUse - This looks at a single use of an argument or return value /// and determines if it should be alive or not. Adds this use to MaybeLiveUses /// if it causes the used value to become MaybeLive. @@ -460,7 +477,6 @@ DeadArgumentEliminationPass::SurveyUses(const Value *V, // // We consider arguments of non-internal functions to be intrinsically alive as // well as arguments to functions which have their "address taken". -// void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { // Functions with inalloca parameters are expecting args in a particular // register and memory layout. @@ -478,11 +494,14 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { } unsigned RetCount = NumRetVals(&F); + // Assume all return values are dead - typedef SmallVector RetVals; + using RetVals = SmallVector; + RetVals RetValLiveness(RetCount, MaybeLive); - typedef SmallVector RetUses; + using RetUses = SmallVector; + // These vectors map each return value to the uses that make it MaybeLive, so // we can add those to the Uses map if the return value really turns out to be // MaybeLive. Initialized to a list of RetCount empty lists. @@ -601,15 +620,15 @@ void DeadArgumentEliminationPass::SurveyFunction(const Function &F) { void DeadArgumentEliminationPass::MarkValue(const RetOrArg &RA, Liveness L, const UseVector &MaybeLiveUses) { switch (L) { - case Live: MarkLive(RA); break; + case Live: + MarkLive(RA); + break; case MaybeLive: - { // Note any uses of this value, so this return value can be // marked live whenever one of the uses becomes live. for (const auto &MaybeLiveUse : MaybeLiveUses) Uses.insert(std::make_pair(MaybeLiveUse, RA)); break; - } } } @@ -762,7 +781,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // One return type? Just a simple value then, but only if we didn't use to // return a struct with that simple value before. NRetTy = RetTypes.front(); - else if (RetTypes.size() == 0) + else if (RetTypes.empty()) // No return types? Make it void, but only if we didn't use to return {}. NRetTy = Type::getVoidTy(F->getContext()); } @@ -808,7 +827,6 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) { // Loop over all of the callers of the function, transforming the call sites // to pass in a smaller number of arguments into the new function. - // std::vector Args; while (!F->use_empty()) { CallSite CS(F->user_back()); diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp index ecff88c88dcba..d5fef59286dd5 100644 --- a/lib/Transforms/IPO/ElimAvailExtern.cpp +++ b/lib/Transforms/IPO/ElimAvailExtern.cpp @@ -1,5 +1,4 @@ -//===-- ElimAvailExtern.cpp - DCE unreachable internal functions -//----------------===// +//===- ElimAvailExtern.cpp - DCE unreachable internal functions -----------===// // // The LLVM Compiler Infrastructure // @@ -15,11 +14,15 @@ #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/ADT/Statistic.h" -#include "llvm/IR/Constants.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/GlobalStatus.h" + using namespace llvm; #define DEBUG_TYPE "elim-avail-extern" @@ -69,8 +72,10 @@ EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) { } namespace { + struct EliminateAvailableExternallyLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid + EliminateAvailableExternallyLegacyPass() : ModulePass(ID) { initializeEliminateAvailableExternallyLegacyPassPass( *PassRegistry::getPassRegistry()); @@ -78,16 +83,17 @@ struct EliminateAvailableExternallyLegacyPass : public ModulePass { // run - Do the EliminateAvailableExternally pass on the specified module, // optionally updating the specified callgraph to reflect the changes. - // - bool runOnModule(Module &M) { + bool runOnModule(Module &M) override { if (skipModule(M)) return false; return eliminateAvailableExternally(M); } }; -} + +} // end anonymous namespace char EliminateAvailableExternallyLegacyPass::ID = 0; + INITIALIZE_PASS(EliminateAvailableExternallyLegacyPass, "elim-avail-extern", "Eliminate Available Externally Globals", false, false) diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 92810c7d6cbaf..f9850619f9638 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -6,34 +6,61 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// +// /// \file /// This file implements interprocedural passes which walk the /// call-graph deducing and/or propagating function attributes. -/// +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "functionattrs" @@ -57,8 +84,10 @@ static cl::opt EnableNonnullArgPropagation( "caller functions.")); namespace { -typedef SmallSetVector SCCNodeSet; -} + +using SCCNodeSet = SmallSetVector; + +} // end anonymous namespace /// Returns the memory access attribute for function F using AAR for AA results, /// where SCCNodes is the current SCC. @@ -237,6 +266,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) { } namespace { + /// For a given pointer Argument, this retains a list of Arguments of functions /// in the same SCC that the pointer data flows into. We use this to build an /// SCC of the arguments. @@ -248,7 +278,7 @@ struct ArgumentGraphNode { class ArgumentGraph { // We store pointers to ArgumentGraphNode objects, so it's important that // that they not move around upon insert. - typedef std::map ArgumentMapTy; + using ArgumentMapTy = std::map; ArgumentMapTy ArgumentMap; @@ -263,7 +293,7 @@ class ArgumentGraph { public: ArgumentGraph() { SyntheticRoot.Definition = nullptr; } - typedef SmallVectorImpl::iterator iterator; + using iterator = SmallVectorImpl::iterator; iterator begin() { return SyntheticRoot.Uses.begin(); } iterator end() { return SyntheticRoot.Uses.end(); } @@ -281,8 +311,7 @@ class ArgumentGraph { /// consider that a capture, instead adding it to the "Uses" list and /// continuing with the analysis. struct ArgumentUsesTracker : public CaptureTracker { - ArgumentUsesTracker(const SCCNodeSet &SCCNodes) - : Captured(false), SCCNodes(SCCNodes) {} + ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {} void tooManyUses() override { Captured = true; } @@ -331,37 +360,45 @@ struct ArgumentUsesTracker : public CaptureTracker { return false; } - bool Captured; // True only if certainly captured (used outside our SCC). - SmallVector Uses; // Uses within our SCC. + // True only if certainly captured (used outside our SCC). + bool Captured = false; + + // Uses within our SCC. + SmallVector Uses; const SCCNodeSet &SCCNodes; }; -} + +} // end anonymous namespace namespace llvm { + template <> struct GraphTraits { - typedef ArgumentGraphNode *NodeRef; - typedef SmallVectorImpl::iterator ChildIteratorType; + using NodeRef = ArgumentGraphNode *; + using ChildIteratorType = SmallVectorImpl::iterator; static NodeRef getEntryNode(NodeRef A) { return A; } static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); } static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); } }; + template <> struct GraphTraits : public GraphTraits { static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); } + static ChildIteratorType nodes_begin(ArgumentGraph *AG) { return AG->begin(); } + static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); } }; -} + +} // end namespace llvm /// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone. static Attribute::AttrKind determinePointerReadAttrs(Argument *A, const SmallPtrSet &SCCNodes) { - SmallVector Worklist; SmallSet Visited; @@ -502,8 +539,8 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) { continue; // There is nothing to do if an argument is already marked as 'returned'. - if (any_of(F->args(), - [](const Argument &Arg) { return Arg.hasReturnedAttr(); })) + if (llvm::any_of(F->args(), + [](const Argument &Arg) { return Arg.hasReturnedAttr(); })) continue; auto FindRetArg = [&]() -> Value * { @@ -1137,8 +1174,11 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, } namespace { + struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { - static char ID; // Pass identification, replacement for typeid + // Pass identification, replacement for typeid + static char ID; + PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) { initializePostOrderFunctionAttrsLegacyPassPass( *PassRegistry::getPassRegistry()); @@ -1153,7 +1193,8 @@ struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass { CallGraphSCCPass::getAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char PostOrderFunctionAttrsLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "functionattrs", @@ -1216,8 +1257,11 @@ bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) { } namespace { + struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass { - static char ID; // Pass identification, replacement for typeid + // Pass identification, replacement for typeid + static char ID; + ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) { initializeReversePostOrderFunctionAttrsLegacyPassPass( *PassRegistry::getPassRegistry()); @@ -1231,9 +1275,11 @@ struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass { AU.addPreserved(); } }; -} + +} // end anonymous namespace char ReversePostOrderFunctionAttrsLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs", "Deduce function attributes in RPO", false, false) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) @@ -1293,7 +1339,7 @@ static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) { } bool Changed = false; - for (auto *F : reverse(Worklist)) + for (auto *F : llvm::reverse(Worklist)) Changed |= addNoRecurseAttrsTopDown(*F); return Changed; diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 670a84862e0ad..3a1d6de342fe2 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -12,30 +12,51 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/FunctionImport.h" - +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSet.h" -#include "llvm/ADT/Triple.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/AutoUpgrade.h" -#include "llvm/IR/DiagnosticPrinter.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Verifier.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" -#include "llvm/Object/IRObjectFile.h" +#include "llvm/Linker/IRMover.h" +#include "llvm/Object/ModuleSymbolTable.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" - -#define DEBUG_TYPE "function-import" +#include +#include +#include +#include +#include +#include +#include using namespace llvm; +#define DEBUG_TYPE "function-import" + STATISTIC(NumImportedFunctions, "Number of functions imported"); STATISTIC(NumImportedModules, "Number of modules imported from"); STATISTIC(NumDeadSymbols, "Number of dead stripped symbols in index"); @@ -91,6 +112,12 @@ static cl::opt EnableImportMetadata( ), cl::Hidden, cl::desc("Enable import metadata like 'thinlto_src_module'")); +/// Summary file to use for function importing when using -function-import from +/// the command line. +static cl::opt + SummaryFile("summary-file", + cl::desc("The summary file to use for function importing.")); + // Load lazily a module from \p FileName in \p Context. static std::unique_ptr loadFile(const std::string &FileName, LLVMContext &Context) { @@ -109,8 +136,6 @@ static std::unique_ptr loadFile(const std::string &FileName, return Result; } -namespace { - /// Given a list of possible callee implementation for a call site, select one /// that fits the \p Threshold. /// @@ -184,9 +209,13 @@ selectCallee(const ModuleSummaryIndex &Index, return cast(It->get()); } +namespace { + using EdgeInfo = std::tuple; +} // anonymous namespace + static ValueInfo updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) { if (!VI.getSummaryList().empty()) @@ -354,8 +383,6 @@ static void ComputeImportForModule( } } -} // anonymous namespace - /// Compute all the import and export for every module using the Index. void llvm::ComputeCrossModuleImport( const ModuleSummaryIndex &Index, @@ -409,7 +436,6 @@ void llvm::ComputeCrossModuleImport( void llvm::ComputeCrossModuleImportForModule( StringRef ModulePath, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList) { - // Collect the list of functions this module defines. // GUID -> Summary GVSummaryMapTy FunctionSummaryMap; @@ -663,12 +689,11 @@ void llvm::thinLTOInternalizeModule(Module &TheModule, // FIXME: See if we can just internalize directly here via linkage changes // based on the index, rather than invoking internalizeModule. - llvm::internalizeModule(TheModule, MustPreserveGV); + internalizeModule(TheModule, MustPreserveGV); } // Automatically import functions in Module \p DestModule based on the summaries // index. -// Expected FunctionImporter::importFunctions( Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) { DEBUG(dbgs() << "Starting import for Module " @@ -715,10 +740,9 @@ Expected FunctionImporter::importFunctions( // Add 'thinlto_src_module' metadata for statistics and debugging. F.setMetadata( "thinlto_src_module", - llvm::MDNode::get( - DestModule.getContext(), - {llvm::MDString::get(DestModule.getContext(), - SrcModule->getSourceFileName())})); + MDNode::get(DestModule.getContext(), + {MDString::get(DestModule.getContext(), + SrcModule->getSourceFileName())})); } GlobalsToImport.insert(&F); } @@ -779,12 +803,6 @@ Expected FunctionImporter::importFunctions( return ImportedCount; } -/// Summary file to use for function importing when using -function-import from -/// the command line. -static cl::opt - SummaryFile("summary-file", - cl::desc("The summary file to use for function importing.")); - static bool doImportingForModule(Module &M) { if (SummaryFile.empty()) report_fatal_error("error: -function-import requires -summary-file\n"); @@ -838,17 +856,18 @@ static bool doImportingForModule(Module &M) { } namespace { + /// Pass that performs cross-module function import provided a summary file. class FunctionImportLegacyPass : public ModulePass { public: /// Pass identification, replacement for typeid static char ID; + explicit FunctionImportLegacyPass() : ModulePass(ID) {} + /// Specify pass name for debug output StringRef getPassName() const override { return "Function Importing"; } - explicit FunctionImportLegacyPass() : ModulePass(ID) {} - bool runOnModule(Module &M) override { if (skipModule(M)) return false; @@ -856,7 +875,8 @@ class FunctionImportLegacyPass : public ModulePass { return doImportingForModule(M); } }; -} // anonymous namespace + +} // end anonymous namespace PreservedAnalyses FunctionImportPass::run(Module &M, ModuleAnalysisManager &AM) { @@ -871,7 +891,9 @@ INITIALIZE_PASS(FunctionImportLegacyPass, "function-import", "Summary Based Function Import", false, false) namespace llvm { + Pass *createFunctionImportPass() { return new FunctionImportLegacyPass(); } -} + +} // end namespace llvm diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index c91e8b454927f..1f354e8e3aa73 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -115,7 +115,7 @@ void GlobalDCEPass::UpdateGVDependencies(GlobalValue &GV) { ComputeDependencies(User, Deps); Deps.erase(&GV); // Remove self-reference. for (GlobalValue *GVU : Deps) { - GVDependencies.insert(std::make_pair(GVU, &GV)); + GVDependencies[GVU].insert(&GV); } } @@ -199,8 +199,8 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) { AliveGlobals.end()}; while (!NewLiveGVs.empty()) { GlobalValue *LGV = NewLiveGVs.pop_back_val(); - for (auto &&GVD : make_range(GVDependencies.equal_range(LGV))) - MarkLive(*GVD.second, &NewLiveGVs); + for (auto *GVD : GVDependencies[LGV]) + MarkLive(*GVD, &NewLiveGVs); } // Now that all globals which are needed are in the AliveGlobals set, we loop diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index e31bbc7fe575f..12090bff381a8 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -20,24 +20,41 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Pass.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -47,7 +64,11 @@ #include "llvm/Transforms/Utils/Evaluator.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Transforms/Utils/Local.h" -#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "globalopt" @@ -141,7 +162,7 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) { } V = I->getOperand(0); - } while (1); + } while (true); } /// This GV is a pointer root. Loop over all users of the global and clean up @@ -222,7 +243,7 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV, break; I->eraseFromParent(); I = J; - } while (1); + } while (true); I->eraseFromParent(); } } @@ -350,7 +371,6 @@ static bool isSafeSROAElementUse(Value *V) { return true; } - /// U is a direct user of the specified global value. Look at it and its uses /// and decide whether it is safe to SROA this global. static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) { @@ -436,7 +456,6 @@ static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, } } - /// Perform scalar replacement of aggregates on the specified global variable. /// This opens the door for other optimizations by exposing the behavior of the /// program in a more fine-grained way. We have determined that this @@ -451,7 +470,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { Constant *Init = GV->getInitializer(); Type *Ty = Init->getType(); - std::vector NewGlobals; + std::vector NewGlobals; Module::GlobalListType &Globals = GV->getParent()->getGlobalList(); // Get the alignment of the global, either explicit or target-specific. @@ -717,7 +736,6 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) { return Changed; } - /// The specified global has only one non-null value stored into it. If there /// are uses of the loaded value that would trap if the loaded value is /// dynamically null, then we know that they cannot be reachable with a null @@ -1073,7 +1091,6 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V, return true; } - /// If all users of values loaded from GV are simple enough to perform HeapSRA, /// return true. static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, @@ -1123,9 +1140,9 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV, } static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, - DenseMap > &InsertedScalarizedValues, - std::vector > &PHIsToRewrite) { - std::vector &FieldVals = InsertedScalarizedValues[V]; + DenseMap> &InsertedScalarizedValues, + std::vector> &PHIsToRewrite) { + std::vector &FieldVals = InsertedScalarizedValues[V]; if (FieldNo >= FieldVals.size()) FieldVals.resize(FieldNo+1); @@ -1167,8 +1184,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, /// Given a load instruction and a value derived from the load, rewrite the /// derived value to use the HeapSRoA'd load. static void RewriteHeapSROALoadUser(Instruction *LoadUser, - DenseMap > &InsertedScalarizedValues, - std::vector > &PHIsToRewrite) { + DenseMap> &InsertedScalarizedValues, + std::vector> &PHIsToRewrite) { // If this is a comparison against null, handle it. if (ICmpInst *SCI = dyn_cast(LoadUser)) { assert(isa(SCI->getOperand(1))); @@ -1215,7 +1232,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, // processed. PHINode *PN = cast(LoadUser); if (!InsertedScalarizedValues.insert(std::make_pair(PN, - std::vector())).second) + std::vector())).second) return; // If this is the first time we've seen this PHI, recursively process all @@ -1230,8 +1247,8 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser, /// global. Eliminate all uses of Ptr, making them use FieldGlobals instead. /// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA. static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load, - DenseMap > &InsertedScalarizedValues, - std::vector > &PHIsToRewrite) { + DenseMap> &InsertedScalarizedValues, + std::vector > &PHIsToRewrite) { for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) { Instruction *User = cast(*UI++); RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); @@ -1260,8 +1277,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // Okay, at this point, there are no users of the malloc. Insert N // new mallocs at the same place as CI, and N globals. - std::vector FieldGlobals; - std::vector FieldMallocs; + std::vector FieldGlobals; + std::vector FieldMallocs; SmallVector OpBundles; CI->getOperandBundlesAsDefs(OpBundles); @@ -1358,10 +1375,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, /// As we process loads, if we can't immediately update all uses of the load, /// keep track of what scalarized loads are inserted for a given load. - DenseMap > InsertedScalarizedValues; + DenseMap> InsertedScalarizedValues; InsertedScalarizedValues[GV] = FieldGlobals; - std::vector > PHIsToRewrite; + std::vector> PHIsToRewrite; // Okay, the malloc site is completely handled. All of the uses of GV are now // loads, and all uses of those loads are simple. Rewrite them to use loads @@ -1407,7 +1424,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, } // Drop all inter-phi links and any loads that made it this far. - for (DenseMap >::iterator + for (DenseMap>::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); I != E; ++I) { if (PHINode *PN = dyn_cast(I->first)) @@ -1417,7 +1434,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, } // Delete all the phis and loads now that inter-references are dead. - for (DenseMap >::iterator + for (DenseMap>::iterator I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end(); I != E; ++I) { if (PHINode *PN = dyn_cast(I->first)) @@ -2275,7 +2292,7 @@ static void setUsedInitializer(GlobalVariable &V, // Type of pointer to the array of pointers. PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0); - SmallVector UsedArray; + SmallVector UsedArray; for (GlobalValue *GV : Init) { Constant *Cast = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy); @@ -2288,14 +2305,15 @@ static void setUsedInitializer(GlobalVariable &V, Module *M = V.getParent(); V.removeFromParent(); GlobalVariable *NV = - new GlobalVariable(*M, ATy, false, llvm::GlobalValue::AppendingLinkage, - llvm::ConstantArray::get(ATy, UsedArray), ""); + new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage, + ConstantArray::get(ATy, UsedArray), ""); NV->takeName(&V); NV->setSection("llvm.metadata"); delete &V; } namespace { + /// An easy to access representation of llvm.used and llvm.compiler.used. class LLVMUsed { SmallPtrSet Used; @@ -2308,25 +2326,34 @@ class LLVMUsed { UsedV = collectUsedGlobalVariables(M, Used, false); CompilerUsedV = collectUsedGlobalVariables(M, CompilerUsed, true); } - typedef SmallPtrSet::iterator iterator; - typedef iterator_range used_iterator_range; + + using iterator = SmallPtrSet::iterator; + using used_iterator_range = iterator_range; + iterator usedBegin() { return Used.begin(); } iterator usedEnd() { return Used.end(); } + used_iterator_range used() { return used_iterator_range(usedBegin(), usedEnd()); } + iterator compilerUsedBegin() { return CompilerUsed.begin(); } iterator compilerUsedEnd() { return CompilerUsed.end(); } + used_iterator_range compilerUsed() { return used_iterator_range(compilerUsedBegin(), compilerUsedEnd()); } + bool usedCount(GlobalValue *GV) const { return Used.count(GV); } + bool compilerUsedCount(GlobalValue *GV) const { return CompilerUsed.count(GV); } + bool usedErase(GlobalValue *GV) { return Used.erase(GV); } bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); } bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; } + bool compilerUsedInsert(GlobalValue *GV) { return CompilerUsed.insert(GV).second; } @@ -2338,7 +2365,8 @@ class LLVMUsed { setUsedInitializer(*CompilerUsedV, CompilerUsed); } }; -} + +} // end anonymous namespace static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) { if (GA.use_empty()) // No use at all. @@ -2653,8 +2681,10 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) { } namespace { + struct GlobalOptLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid + GlobalOptLegacyPass() : ModulePass(ID) { initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -2676,9 +2706,11 @@ struct GlobalOptLegacyPass : public ModulePass { AU.addRequired(); } }; -} + +} // end anonymous namespace char GlobalOptLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt", "Global Variable Optimizer", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) diff --git a/lib/Transforms/IPO/GlobalSplit.cpp b/lib/Transforms/IPO/GlobalSplit.cpp index e47d881d1127a..792f4b3052a3f 100644 --- a/lib/Transforms/IPO/GlobalSplit.cpp +++ b/lib/Transforms/IPO/GlobalSplit.cpp @@ -15,22 +15,30 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/GlobalSplit.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/IPO.h" - -#include +#include +#include using namespace llvm; -namespace { - -bool splitGlobal(GlobalVariable &GV) { +static bool splitGlobal(GlobalVariable &GV) { // If the address of the global is taken outside of the module, we cannot // apply this transformation. if (!GV.hasLocalLinkage()) @@ -130,7 +138,7 @@ bool splitGlobal(GlobalVariable &GV) { return true; } -bool splitGlobals(Module &M) { +static bool splitGlobals(Module &M) { // First, see if the module uses either of the llvm.type.test or // llvm.type.checked.load intrinsics, which indicates that splitting globals // may be beneficial. @@ -151,12 +159,16 @@ bool splitGlobals(Module &M) { return Changed; } +namespace { + struct GlobalSplit : public ModulePass { static char ID; + GlobalSplit() : ModulePass(ID) { initializeGlobalSplitPass(*PassRegistry::getPassRegistry()); } - bool runOnModule(Module &M) { + + bool runOnModule(Module &M) override { if (skipModule(M)) return false; @@ -164,11 +176,12 @@ struct GlobalSplit : public ModulePass { } }; -} +} // end anonymous namespace -INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false) char GlobalSplit::ID = 0; +INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false) + ModulePass *llvm::createGlobalSplitPass() { return new GlobalSplit; } diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index 5bb305ca84d03..d5d35ee89e0eb 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -25,6 +25,7 @@ using namespace llvm; void llvm::initializeIPO(PassRegistry &Registry) { initializeArgPromotionPass(Registry); + initializeCalledValuePropagationLegacyPassPass(Registry); initializeConstantMergeLegacyPassPass(Registry); initializeCrossDSOCFIPass(Registry); initializeDAEPass(Registry); @@ -67,6 +68,10 @@ void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createArgumentPromotionPass()); } +void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createCalledValuePropagationPass()); +} + void LLVMAddConstantMergePass(LLVMPassManagerRef PM) { unwrap(PM)->add(createConstantMergePass()); } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 680a450c1c9a4..4449c87ddefa4 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -14,29 +14,60 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/Inliner.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "inline" @@ -63,13 +94,16 @@ static cl::opt cl::init(false), cl::Hidden); namespace { + enum class InlinerFunctionImportStatsOpts { No = 0, Basic = 1, Verbose = 2, }; -cl::opt InlinerFunctionImportStats( +} // end anonymous namespace + +static cl::opt InlinerFunctionImportStats( "inliner-function-import-stats", cl::init(InlinerFunctionImportStatsOpts::No), cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic", @@ -77,10 +111,8 @@ cl::opt InlinerFunctionImportStats( clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose", "printing of statistics for each inlined function")), cl::Hidden, cl::desc("Enable inliner stats for imported functions")); -} // namespace -LegacyInlinerBase::LegacyInlinerBase(char &ID) - : CallGraphSCCPass(ID), InsertLifetime(true) {} +LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} @@ -96,7 +128,7 @@ void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const { CallGraphSCCPass::getAnalysisUsage(AU); } -typedef DenseMap> InlinedArrayAllocasTy; +using InlinedArrayAllocasTy = DenseMap>; /// Look at all of the allocas that we inlined through this call site. If we /// have already inlined other allocas through other calls into this function, @@ -161,7 +193,6 @@ static void mergeInlinedArrayAllocas( // function. Also, AllocasForType can be empty of course! bool MergedAwayAlloca = false; for (AllocaInst *AvailableAlloca : AllocasForType) { - unsigned Align1 = AI->getAlignment(), Align2 = AvailableAlloca->getAlignment(); @@ -267,7 +298,6 @@ static bool shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC, int &TotalSecondaryCost, function_ref GetInlineCost) { - // For now we only handle local or inline functions. if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage()) return false; @@ -342,6 +372,7 @@ static Optional shouldInline(CallSite CS, function_ref GetInlineCost, OptimizationRemarkEmitter &ORE) { using namespace ore; + InlineCost IC = GetInlineCost(CS); Instruction *Call = CS.getInstruction(); Function *Callee = CS.getCalledFunction(); @@ -384,11 +415,13 @@ shouldInline(CallSite CS, function_ref GetInlineCost, DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts", Call) << "Not inlining. Cost of inlining " << NV("Callee", Callee) << " increases the cost of inlining " << NV("Caller", Caller) - << " in other contexts"); + << " in other contexts"; + }); // IC does not bool() to false, so get an InlineCost that will. // This will not be inspected to make an error message. @@ -476,11 +509,14 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, if (Function *Callee = CS.getCalledFunction()) if (Callee->isDeclaration()) { using namespace ore; - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) + + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " << NV("Caller", CS.getCaller()) << " because its definition is unavailable" - << setIsVerbose()); + << setIsVerbose(); + }); continue; } @@ -569,30 +605,35 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, // Attempt to inline the function. using namespace ore; + if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime, AARGetter, ImportedFunctionsStats)) { - ORE.emit( - OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) - << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", Caller)); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, + Block) + << NV("Callee", Callee) << " will not be inlined into " + << NV("Caller", Caller); + }); continue; } ++NumInlined; - if (OIC->isAlways()) - ORE.emit(OptimizationRemark(DEBUG_TYPE, "AlwaysInline", DLoc, Block) - << NV("Callee", Callee) << " inlined into " - << NV("Caller", Caller) << " with cost=always"); - else - ORE.emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Inlined", DLoc, Block) - << NV("Callee", Callee) << " inlined into " - << NV("Caller", Caller) - << " with cost=" << NV("Cost", OIC->getCost()) - << " (threshold=" << NV("Threshold", OIC->getThreshold()) - << ")"; - }); + ORE.emit([&]() { + bool AlwaysInline = OIC->isAlways(); + StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; + OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block); + R << NV("Callee", Callee) << " inlined into "; + R << NV("Caller", Caller); + if (AlwaysInline) + R << " with cost=always"; + else { + R << " with cost=" << NV("Cost", OIC->getCost()); + R << " (threshold=" << NV("Threshold", OIC->getThreshold()); + R << ")"; + } + return R; + }); // If inlining this function gave us any new call sites, throw them // onto our worklist to process. They are useful inline candidates. @@ -612,7 +653,6 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && // TODO: Can remove if in SCC now. !SCCFunctions.count(Callee) && - // The function may be apparently dead, but if there are indirect // callgraph references to the node, we cannot delete it yet, this // could invalidate the CGSCC iterator. @@ -914,26 +954,33 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, BasicBlock *Block = CS.getParent(); using namespace ore; + if (!InlineFunction(CS, IFI)) { - ORE.emit( - OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) - << NV("Callee", &Callee) << " will not be inlined into " - << NV("Caller", &F)); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) + << NV("Callee", &Callee) << " will not be inlined into " + << NV("Caller", &F); + }); continue; } DidInline = true; InlinedCallees.insert(&Callee); - if (OIC->isAlways()) - ORE.emit(OptimizationRemark(DEBUG_TYPE, "AlwaysInline", DLoc, Block) - << NV("Callee", &Callee) << " inlined into " - << NV("Caller", &F) << " with cost=always"); - else - ORE.emit( - OptimizationRemark(DEBUG_TYPE, "Inlined", DLoc, Block) - << NV("Callee", &Callee) << " inlined into " << NV("Caller", &F) - << " with cost=" << NV("Cost", OIC->getCost()) - << " (threshold=" << NV("Threshold", OIC->getThreshold()) << ")"); + ORE.emit([&]() { + bool AlwaysInline = OIC->isAlways(); + StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined"; + OptimizationRemark R(DEBUG_TYPE, RemarkName, DLoc, Block); + R << NV("Callee", &Callee) << " inlined into "; + R << NV("Caller", &F); + if (AlwaysInline) + R << " with cost=always"; + else { + R << " with cost=" << NV("Cost", OIC->getCost()); + R << " (threshold=" << NV("Threshold", OIC->getThreshold()); + R << ")"; + } + return R; + }); // Add any new callsites to defined functions to the worklist. if (!IFI.InlinedCallSites.empty()) { diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index 027f426649c40..9fa5ed9ab2b8d 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -1,4 +1,4 @@ -//===-- LowerTypeTests.cpp - type metadata lowering pass ------------------===// +//===- LowerTypeTests.cpp - type metadata lowering pass -------------------===// // // The LLVM Compiler Infrastructure // @@ -13,32 +13,70 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/LowerTypeTests.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TypeMetadataUtils.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/ModuleSummaryIndexYAML.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/TrailingObjects.h" +#include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace lowertypetests; @@ -206,16 +244,19 @@ struct ByteArrayInfo { /// operation involving a map lookup; this data structure helps to reduce the /// number of times we need to do this lookup. class GlobalTypeMember final : TrailingObjects { + friend TrailingObjects; + GlobalObject *GO; size_t NTypes; + // For functions: true if this is a definition (either in the merged module or // in one of the thinlto modules). bool IsDefinition; + // For functions: true if this function is either defined or used in a thinlto // module and its jumptable entry needs to be exported to thinlto backends. bool IsExported; - friend TrailingObjects; size_t numTrailingObjects(OverloadToken) const { return NTypes; } public: @@ -232,15 +273,19 @@ class GlobalTypeMember final : TrailingObjects { GTM->getTrailingObjects()); return GTM; } + GlobalObject *getGlobal() const { return GO; } + bool isDefinition() const { return IsDefinition; } + bool isExported() const { return IsExported; } + ArrayRef types() const { return makeArrayRef(getTrailingObjects(), NTypes); } @@ -259,6 +304,7 @@ class LowerTypeTestsModule { IntegerType *Int1Ty = Type::getInt1Ty(M.getContext()); IntegerType *Int8Ty = Type::getInt8Ty(M.getContext()); PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); + ArrayType *Int8Arr0Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), 0); IntegerType *Int32Ty = Type::getInt32Ty(M.getContext()); PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty); IntegerType *Int64Ty = Type::getInt64Ty(M.getContext()); @@ -353,6 +399,7 @@ class LowerTypeTestsModule { public: LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary, const ModuleSummaryIndex *ImportSummary); + bool lower(); // Lower the module using the action and summary passed as command line @@ -388,11 +435,12 @@ struct LowerTypeTests : public ModulePass { } }; -} // anonymous namespace +} // end anonymous namespace + +char LowerTypeTests::ID = 0; INITIALIZE_PASS(LowerTypeTests, "lowertypetests", "Lower type metadata", false, false) -char LowerTypeTests::ID = 0; ModulePass * llvm::createLowerTypeTestsPass(ModuleSummaryIndex *ExportSummary, @@ -803,10 +851,13 @@ LowerTypeTestsModule::importTypeId(StringRef TypeId) { TIL.TheKind = TTRes.TheKind; auto ImportGlobal = [&](StringRef Name) { - Constant *C = - M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(), Int8Ty); + // Give the global a type of length 0 so that it is not assumed not to alias + // with any other global. + Constant *C = M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(), + Int8Arr0Ty); if (auto *GV = dyn_cast(C)) GV->setVisibility(GlobalValue::HiddenVisibility); + C = ConstantExpr::getBitCast(C, Int8PtrTy); return C; }; @@ -1188,7 +1239,7 @@ void LowerTypeTestsModule::createJumpTable( // Luckily, this function does not get any prologue even without the // attribute. if (OS != Triple::Win32) - F->addFnAttr(llvm::Attribute::Naked); + F->addFnAttr(Attribute::Naked); if (JumpTableArch == Triple::arm) F->addFnAttr("target-features", "-thumb-mode"); if (JumpTableArch == Triple::thumb) { @@ -1397,7 +1448,7 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM( void LowerTypeTestsModule::buildBitSetsFromDisjointSet( ArrayRef TypeIds, ArrayRef Globals) { - llvm::DenseMap TypeIdIndices; + DenseMap TypeIdIndices; for (unsigned I = 0; I != TypeIds.size(); ++I) TypeIdIndices[TypeIds[I]] = I; @@ -1551,8 +1602,8 @@ bool LowerTypeTestsModule::lower() { // Equivalence class set containing type identifiers and the globals that // reference them. This is used to partition the set of type identifiers in // the module into disjoint sets. - typedef EquivalenceClasses> - GlobalClassesTy; + using GlobalClassesTy = + EquivalenceClasses>; GlobalClassesTy GlobalClasses; // Verify the type metadata and build a few data structures to let us @@ -1567,7 +1618,7 @@ bool LowerTypeTestsModule::lower() { unsigned Index; std::vector RefGlobals; }; - llvm::DenseMap TypeIdInfo; + DenseMap TypeIdInfo; unsigned I = 0; SmallVector Types; diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index a43b69f341301..76b90391fbb1b 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -89,28 +89,45 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/FunctionComparator.h" +#include +#include +#include +#include +#include #include using namespace llvm; @@ -153,10 +170,12 @@ namespace { class FunctionNode { mutable AssertingVH F; FunctionComparator::FunctionHash Hash; + public: // Note the hash is recalculated potentially multiple times, but it is cheap. FunctionNode(Function *F) : F(F), Hash(FunctionComparator::functionHash(*F)) {} + Function *getFunc() const { return F; } FunctionComparator::FunctionHash getHash() const { return Hash; } @@ -173,12 +192,12 @@ class FunctionNode { /// by considering all pointer types to be equivalent. Once identified, /// MergeFunctions will fold them by replacing a call to one to a call to a /// bitcast of the other. -/// class MergeFunctions : public ModulePass { public: static char ID; + MergeFunctions() - : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree() { + : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)) { initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); } @@ -189,8 +208,10 @@ class MergeFunctions : public ModulePass { // not need to become larger with another pointer. class FunctionNodeCmp { GlobalNumberState* GlobalNumbers; + public: FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {} + bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const { // Order first by hashes, then full function comparison. if (LHS.getHash() != RHS.getHash()) @@ -199,7 +220,7 @@ class MergeFunctions : public ModulePass { return FCmp.compare() == -1; } }; - typedef std::set FnTreeType; + using FnTreeType = std::set; GlobalNumberState GlobalNumbers; @@ -207,9 +228,9 @@ class MergeFunctions : public ModulePass { /// analyzed again. std::vector Deferred; +#ifndef NDEBUG /// Checks the rules of order relation introduced among functions set. /// Returns true, if sanity check has been passed, and false if failed. -#ifndef NDEBUG bool doSanityCheck(std::vector &Worklist); #endif @@ -257,6 +278,7 @@ class MergeFunctions : public ModulePass { /// The set of all distinct functions. Use the insert() and remove() methods /// to modify it. The map allows efficient lookup and deferring of Functions. FnTreeType FnTree; + // Map functions to the iterators of the FunctionNode which contains them // in the FnTree. This must be updated carefully whenever the FnTree is // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid @@ -268,6 +290,7 @@ class MergeFunctions : public ModulePass { } // end anonymous namespace char MergeFunctions::ID = 0; + INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) ModulePass *llvm::createMergeFunctionsPass() { @@ -475,7 +498,6 @@ static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) { // parameter debug info, from the entry block. void MergeFunctions::eraseInstsUnrelatedToPDI( std::vector &PDIUnrelatedWL) { - DEBUG(dbgs() << " Erasing instructions (in reverse order of appearance in " "entry block) unrelated to parameter debug info from entry " "block: {\n"); @@ -493,7 +515,6 @@ void MergeFunctions::eraseInstsUnrelatedToPDI( // Reduce G to its entry block. void MergeFunctions::eraseTail(Function *G) { - std::vector WorklistBB; for (Function::iterator BBI = std::next(G->begin()), BBE = G->end(); BBI != BBE; ++BBI) { @@ -518,7 +539,6 @@ void MergeFunctions::eraseTail(Function *G) { // PDIUnrelatedWL with such instructions. void MergeFunctions::filterInstsUnrelatedToPDI( BasicBlock *GEntryBlock, std::vector &PDIUnrelatedWL) { - std::set PDIRelated; for (BasicBlock::iterator BI = GEntryBlock->begin(), BIE = GEntryBlock->end(); BI != BIE; ++BI) { @@ -628,9 +648,18 @@ void MergeFunctions::filterInstsUnrelatedToPDI( // call sites to point to F even when within the same translation unit. void MergeFunctions::writeThunk(Function *F, Function *G) { if (!G->isInterposable() && !MergeFunctionsPDI) { - // Redirect direct callers of G to F. (See note on MergeFunctionsPDI - // above). - replaceDirectCallers(G, F); + if (G->hasGlobalUnnamedAddr()) { + // G might have been a key in our GlobalNumberState, and it's illegal + // to replace a key in ValueMap with a non-global. + GlobalNumbers.erase(G); + // If G's address is not significant, replace it entirely. + Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); + G->replaceAllUsesWith(BitcastF); + } else { + // Redirect direct callers of G to F. (See note on MergeFunctionsPDI + // above). + replaceDirectCallers(G, F); + } } // If G was internal then we may have replaced all uses of G with F. If so, @@ -641,6 +670,16 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { return; } + // Don't merge tiny functions using a thunk, since it can just end up + // making the function larger. + if (F->size() == 1) { + if (F->front().size() <= 2) { + DEBUG(dbgs() << "writeThunk: " << F->getName() + << " is too small to bother creating a thunk for\n"); + return; + } + } + BasicBlock *GEntryBlock = nullptr; std::vector PDIUnrelatedWL; BasicBlock *BB = nullptr; @@ -667,7 +706,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { SmallVector Args; unsigned i = 0; FunctionType *FFTy = F->getFunctionType(); - for (Argument & AI : H->args()) { + for (Argument &AI : H->args()) { Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i))); ++i; } @@ -773,18 +812,6 @@ bool MergeFunctions::insert(Function *NewFunction) { const FunctionNode &OldF = *Result.first; - // Don't merge tiny functions, since it can just end up making the function - // larger. - // FIXME: Should still merge them if they are unnamed_addr and produce an - // alias. - if (NewFunction->size() == 1) { - if (NewFunction->front().size() <= 2) { - DEBUG(dbgs() << NewFunction->getName() - << " is to small to bother merging\n"); - return false; - } - } - // Impose a total order (by name) on the replacement of functions. This is // important when operating on more than one module independently to prevent // cycles of thunks calling each other when the modules are linked together. diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index c7fe4a3dc8d6d..b5267f75e417f 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -13,26 +13,54 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/PartialInlining.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" +#include "llvm/IR/User.h" #include "llvm/Pass.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include +#include +#include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "partial-inlining" @@ -44,6 +72,7 @@ STATISTIC(NumPartialInlined, static cl::opt DisablePartialInlining("disable-partial-inlining", cl::init(false), cl::Hidden, cl::desc("Disable partial ininling")); + // This is an option used by testing: static cl::opt SkipCostAnalysis("skip-partial-inlining-cost-analysis", cl::init(false), cl::ZeroOrMore, @@ -76,9 +105,8 @@ static cl::opt ExtraOutliningPenalty( namespace { struct FunctionOutliningInfo { - FunctionOutliningInfo() - : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr), - ReturnBlockPreds() {} + FunctionOutliningInfo() = default; + // Returns the number of blocks to be inlined including all blocks // in Entries and one return block. unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; } @@ -86,10 +114,13 @@ struct FunctionOutliningInfo { // A set of blocks including the function entry that guard // the region to be outlined. SmallVector Entries; + // The return block that is not included in the outlined region. - BasicBlock *ReturnBlock; + BasicBlock *ReturnBlock = nullptr; + // The dominating block of the region to be outlined. - BasicBlock *NonReturnBlock; + BasicBlock *NonReturnBlock = nullptr; + // The set of blocks in Entries that that are predecessors to ReturnBlock SmallVector ReturnBlockPreds; }; @@ -101,6 +132,7 @@ struct PartialInlinerImpl { Optional> GBFI, ProfileSummaryInfo *ProfSI) : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {} + bool run(Module &M); Function *unswitchFunction(Function *F); @@ -197,17 +229,18 @@ struct PartialInlinerImpl { // - The second value is the estimated size of the new call sequence in // basic block Cloner.OutliningCallBB; std::tuple computeOutliningCosts(FunctionCloner &Cloner); + // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to // approximate both the size and runtime cost (Note that in the current // inline cost analysis, there is no clear distinction there either). static int computeBBInlineCost(BasicBlock *BB); std::unique_ptr computeOutliningInfo(Function *F); - }; struct PartialInlinerLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid + PartialInlinerLegacyPass() : ModulePass(ID) { initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -217,6 +250,7 @@ struct PartialInlinerLegacyPass : public ModulePass { AU.addRequired(); AU.addRequired(); } + bool runOnModule(Module &M) override { if (skipModule(M)) return false; @@ -240,7 +274,8 @@ struct PartialInlinerLegacyPass : public ModulePass { return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M); } }; -} + +} // end anonymous namespace std::unique_ptr PartialInlinerImpl::computeOutliningInfo(Function *F) { @@ -320,7 +355,6 @@ PartialInlinerImpl::computeOutliningInfo(Function *F) { OutliningInfo->Entries.push_back(CurrEntry); CurrEntry = OtherSucc; - } while (true); if (!CandidateFound) @@ -414,7 +448,6 @@ static bool hasProfileData(Function *F, FunctionOutliningInfo *OI) { BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { - auto EntryFreq = Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock()); auto OutliningCallFreq = @@ -451,8 +484,8 @@ PartialInlinerImpl::getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) { bool PartialInlinerImpl::shouldPartialInline( CallSite CS, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost, OptimizationRemarkEmitter &ORE) { - using namespace ore; + if (SkipCostAnalysis) return true; @@ -466,26 +499,32 @@ bool PartialInlinerImpl::shouldPartialInline( *GetAssumptionCache, GetBFI, PSI, &ORE); if (IC.isAlways()) { - ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) + ORE.emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) << NV("Callee", Cloner.OrigFunc) - << " should always be fully inlined, not partially"); + << " should always be fully inlined, not partially"; + }); return false; } if (IC.isNever()) { - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) - << " because it should never be inlined (cost=never)"); + << " because it should never be inlined (cost=never)"; + }); return false; } if (!IC) { - ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call) + ORE.emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call) << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " << NV("Caller", Caller) << " because too costly to inline (cost=" << NV("Cost", IC.getCost()) << ", threshold=" - << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"; + }); return false; } const DataLayout &DL = Caller->getParent()->getDataLayout(); @@ -496,23 +535,28 @@ bool PartialInlinerImpl::shouldPartialInline( // Weighted saving is smaller than weighted cost, return false if (NormWeightedSavings < WeightedOutliningRcost) { - ORE.emit( - OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", Call) - << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " - << NV("Caller", Caller) << " runtime overhead (overhead=" - << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) - << ", savings=" - << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) << ")" - << " of making the outlined call is too high"); + ORE.emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutliningCallcostTooHigh", + Call) + << NV("Callee", Cloner.OrigFunc) << " not partially inlined into " + << NV("Caller", Caller) << " runtime overhead (overhead=" + << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency()) + << ", savings=" + << NV("Savings", (unsigned)NormWeightedSavings.getFrequency()) + << ")" + << " of making the outlined call is too high"; + }); return false; } - ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) + ORE.emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into " << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) << " (threshold=" - << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); + << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"; + }); return true; } @@ -567,7 +611,6 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { std::tuple PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) { - // Now compute the cost of the call sequence to the outlined function // 'OutlinedFunction' in BB 'OutliningCallBB': int OutliningFuncCallCost = computeBBInlineCost(Cloner.OutliningCallBB); @@ -661,7 +704,6 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(Function *F, } void PartialInlinerImpl::FunctionCloner::NormalizeReturnBlock() { - auto getFirstPHI = [](BasicBlock *BB) { BasicBlock::iterator I = BB->begin(); PHINode *FirstPhi = nullptr; @@ -798,7 +840,6 @@ PartialInlinerImpl::FunctionCloner::~FunctionCloner() { } Function *PartialInlinerImpl::unswitchFunction(Function *F) { - if (F->hasAddressTaken()) return nullptr; @@ -853,13 +894,15 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { DebugLoc DLoc; BasicBlock *Block; std::tie(DLoc, Block) = getOneDebugLoc(Cloner.ClonedFunc); - ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", + ORE.emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "OutlineRegionTooSmall", DLoc, Block) << ore::NV("Function", Cloner.OrigFunc) << " not partially inlined into callers (Original Size = " << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost) << ", Size of call sequence to outlined function = " - << ore::NV("NewSize", SizeCost) << ")"); + << ore::NV("NewSize", SizeCost) << ")"; + }); return false; } @@ -888,10 +931,12 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (!shouldPartialInline(CS, Cloner, WeightedRcost, ORE)) continue; - ORE.emit( - OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", CS.getInstruction()) - << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " - << ore::NV("Caller", CS.getCaller())); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", + CS.getInstruction()) + << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " + << ore::NV("Caller", CS.getCaller()); + }); InlineFunctionInfo IFI(nullptr, GetAssumptionCache, PSI); InlineFunction(CS, IFI); @@ -955,6 +1000,7 @@ bool PartialInlinerImpl::run(Module &M) { } char PartialInlinerLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index b38462913c485..35ca107c3259f 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -94,15 +94,6 @@ static cl::opt EnableLoopInterchange( "enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the new, experimental LoopInterchange Pass")); -static cl::opt EnableNonLTOGlobalsModRef( - "enable-non-lto-gmr", cl::init(true), cl::Hidden, - cl::desc( - "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline.")); - -static cl::opt EnableLoopLoadElim( - "enable-loop-load-elim", cl::init(true), cl::Hidden, - cl::desc("Enable the LoopLoadElimination Pass")); - static cl::opt EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable preparation for ThinLTO.")); @@ -160,7 +151,6 @@ PassManagerBuilder::PassManagerBuilder() { SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; - DisableUnitAtATime = false; DisableUnrollLoops = false; SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; @@ -464,23 +454,22 @@ void PassManagerBuilder::populateModulePassManager( if (PrepareForThinLTOUsingPGOSampleProfile) DisableUnrollLoops = true; - if (!DisableUnitAtATime) { - // Infer attributes about declarations if possible. - MPM.add(createInferFunctionAttrsLegacyPass()); + // Infer attributes about declarations if possible. + MPM.add(createInferFunctionAttrsLegacyPass()); - addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); - MPM.add(createIPSCCPPass()); // IP SCCP - MPM.add(createGlobalOptimizerPass()); // Optimize out global vars - // Promote any localized global vars. - MPM.add(createPromoteMemoryToRegisterPass()); + MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createCalledValuePropagationPass()); + MPM.add(createGlobalOptimizerPass()); // Optimize out global vars + // Promote any localized global vars. + MPM.add(createPromoteMemoryToRegisterPass()); - MPM.add(createDeadArgEliminationPass()); // Dead argument elimination + MPM.add(createDeadArgEliminationPass()); // Dead argument elimination - addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE - addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE - } + addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); + MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE // For SamplePGO in ThinLTO compile phase, we do not want to do indirect // call promotion as it will change the CFG too much to make the 2nd @@ -490,21 +479,21 @@ void PassManagerBuilder::populateModulePassManager( if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); - if (EnableNonLTOGlobalsModRef) - // We add a module alias analysis pass here. In part due to bugs in the - // analysis infrastructure this "works" in that the analysis stays alive - // for the entire SCC pass run below. - MPM.add(createGlobalsAAWrapperPass()); + // We add a module alias analysis pass here. In part due to bugs in the + // analysis infrastructure this "works" in that the analysis stays alive + // for the entire SCC pass run below. + MPM.add(createGlobalsAAWrapperPass()); // Start of CallGraph SCC passes. - if (!DisableUnitAtATime) - MPM.add(createPruneEHPass()); // Remove dead EH info + MPM.add(createPruneEHPass()); // Remove dead EH info + bool RunInliner = false; if (Inliner) { MPM.add(Inliner); Inliner = nullptr; + RunInliner = true; } - if (!DisableUnitAtATime) - MPM.add(createPostOrderFunctionAttrsLegacyPass()); + + MPM.add(createPostOrderFunctionAttrsLegacyPass()); if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args @@ -515,11 +504,11 @@ void PassManagerBuilder::populateModulePassManager( // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + if (RunPartialInlining) MPM.add(createPartialInliningPass()); - if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO && - !PrepareForThinLTO) + if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve // these so they are eligible for inlining at link-time. Note if they @@ -531,15 +520,23 @@ void PassManagerBuilder::populateModulePassManager( // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); - if (!DisableUnitAtATime) - MPM.add(createReversePostOrderFunctionAttrsPass()); + MPM.add(createReversePostOrderFunctionAttrsPass()); + + // The inliner performs some kind of dead code elimination as it goes, + // but there are cases that are not really caught by it. We might + // at some point consider teaching the inliner about them, but it + // is OK for now to run GlobalOpt + GlobalDCE in tandem as their + // benefits generally outweight the cost, making the whole pipeline + // faster. + if (RunInliner) { + MPM.add(createGlobalOptimizerPass()); + MPM.add(createGlobalDCEPass()); + } // If we are planning to perform ThinLTO later, let's not bloat the code with // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes // during ThinLTO and perform the rest of the optimizations afterward. if (PrepareForThinLTO) { - // Reduce the size of the IR as much as possible. - MPM.add(createGlobalOptimizerPass()); // Rename anon globals to be able to export them in the summary. MPM.add(createNameAnonGlobalPass()); return; @@ -560,23 +557,22 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createLICMPass()); // Hoist loop invariants } - if (EnableNonLTOGlobalsModRef) - // We add a fresh GlobalsModRef run at this point. This is particularly - // useful as the above will have inlined, DCE'ed, and function-attr - // propagated everything. We should at this point have a reasonably minimal - // and richly annotated call graph. By computing aliasing and mod/ref - // information for all local globals here, the late loop passes and notably - // the vectorizer will be able to use them to help recognize vectorizable - // memory operations. - // - // Note that this relies on a bug in the pass manager which preserves - // a module analysis into a function pass pipeline (and throughout it) so - // long as the first function pass doesn't invalidate the module analysis. - // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for - // this to work. Fortunately, it is trivial to preserve AliasAnalysis - // (doing nothing preserves it as it is required to be conservatively - // correct in the face of IR changes). - MPM.add(createGlobalsAAWrapperPass()); + // We add a fresh GlobalsModRef run at this point. This is particularly + // useful as the above will have inlined, DCE'ed, and function-attr + // propagated everything. We should at this point have a reasonably minimal + // and richly annotated call graph. By computing aliasing and mod/ref + // information for all local globals here, the late loop passes and notably + // the vectorizer will be able to use them to help recognize vectorizable + // memory operations. + // + // Note that this relies on a bug in the pass manager which preserves + // a module analysis into a function pass pipeline (and throughout it) so + // long as the first function pass doesn't invalidate the module analysis. + // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for + // this to work. Fortunately, it is trivial to preserve AliasAnalysis + // (doing nothing preserves it as it is required to be conservatively + // correct in the face of IR changes). + MPM.add(createGlobalsAAWrapperPass()); MPM.add(createFloat2IntPass()); @@ -597,8 +593,7 @@ void PassManagerBuilder::populateModulePassManager( // Eliminate loads by forwarding stores from the previous iteration to loads // of the current iteration. - if (EnableLoopLoadElim) - MPM.add(createLoopLoadEliminationPass()); + MPM.add(createLoopLoadEliminationPass()); // FIXME: Because of #pragma vectorize enable, the passes below are always // inserted in the pipeline, even when the vectorizer doesn't run (ex. when @@ -650,16 +645,14 @@ void PassManagerBuilder::populateModulePassManager( // about pointer alignments. MPM.add(createAlignmentFromAssumptionsPass()); - if (!DisableUnitAtATime) { - // FIXME: We shouldn't bother with this anymore. - MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes + // FIXME: We shouldn't bother with this anymore. + MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes - // GlobalOpt already deletes dead functions and globals, at -O2 try a - // late pass of GlobalDCE. It is capable of deleting dead cycles. - if (OptLevel > 1) { - MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. - MPM.add(createConstantMergePass()); // Merge dup global constants - } + // GlobalOpt already deletes dead functions and globals, at -O2 try a + // late pass of GlobalDCE. It is capable of deleting dead cycles. + if (OptLevel > 1) { + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants } if (MergeFunctions) @@ -711,6 +704,10 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. PM.add(createIPSCCPPass()); + + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + PM.add(createCalledValuePropagationPass()); } // Infer attributes about definitions. The readnone attribute in particular is @@ -941,8 +938,7 @@ LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, void LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, LLVMBool Value) { - PassManagerBuilder *Builder = unwrap(PMB); - Builder->DisableUnitAtATime = Value; + // NOTE: The DisableUnitAtATime switch has been removed. } void diff --git a/lib/Transforms/IPO/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp index 5258746dbb9ce..bea55b3f4d0b6 100644 --- a/lib/Transforms/IPO/SampleProfile.cpp +++ b/lib/Transforms/IPO/SampleProfile.cpp @@ -23,42 +23,64 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/SampleProfile.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" -#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/SampleProf.h" #include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" -#include "llvm/Support/Format.h" +#include "llvm/Support/GenericDomTree.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/Cloning.h" -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; using namespace sampleprof; @@ -70,34 +92,39 @@ using namespace sampleprof; static cl::opt SampleProfileFile( "sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden); + static cl::opt SampleProfileMaxPropagateIterations( "sample-profile-max-propagate-iterations", cl::init(100), cl::desc("Maximum number of iterations to go through when propagating " "sample block/edge weights through the CFG.")); + static cl::opt SampleProfileRecordCoverage( "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of records in the input profile " "are matched to the IR.")); + static cl::opt SampleProfileSampleCoverage( "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"), cl::desc("Emit a warning if less than N% of samples in the input profile " "are matched to the IR.")); + static cl::opt SampleProfileHotThreshold( "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"), cl::desc("Inlined functions that account for more than N% of all samples " "collected in the parent function, will be inlined again.")); namespace { -typedef DenseMap BlockWeightMap; -typedef DenseMap EquivalenceClassMap; -typedef std::pair Edge; -typedef DenseMap EdgeWeightMap; -typedef DenseMap> - BlockEdgeMap; + +using BlockWeightMap = DenseMap; +using EquivalenceClassMap = DenseMap; +using Edge = std::pair; +using EdgeWeightMap = DenseMap; +using BlockEdgeMap = + DenseMap>; class SampleCoverageTracker { public: - SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {} + SampleCoverageTracker() = default; bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset, uint32_t Discriminator, uint64_t Samples); @@ -106,15 +133,16 @@ class SampleCoverageTracker { unsigned countBodyRecords(const FunctionSamples *FS) const; uint64_t getTotalUsedSamples() const { return TotalUsedSamples; } uint64_t countBodySamples(const FunctionSamples *FS) const; + void clear() { SampleCoverage.clear(); TotalUsedSamples = 0; } private: - typedef std::map BodySampleCoverageMap; - typedef DenseMap - FunctionSamplesCoverageMap; + using BodySampleCoverageMap = std::map; + using FunctionSamplesCoverageMap = + DenseMap; /// Coverage map for sampling records. /// @@ -138,7 +166,7 @@ class SampleCoverageTracker { /// and all the inlined callsites. Strictly, we should have a map of counters /// keyed by FunctionSamples pointers, but these stats are cleared after /// every function, so we just need to keep a single counter. - uint64_t TotalUsedSamples; + uint64_t TotalUsedSamples = 0; }; /// \brief Sample profile pass. @@ -149,13 +177,11 @@ class SampleCoverageTracker { class SampleProfileLoader { public: SampleProfileLoader( - StringRef Name, + StringRef Name, bool IsThinLTOPreLink, std::function GetAssumptionCache, std::function GetTargetTransformInfo) - : DT(nullptr), PDT(nullptr), LI(nullptr), GetAC(GetAssumptionCache), - GetTTI(GetTargetTransformInfo), Reader(), Samples(nullptr), - Filename(Name), ProfileIsValid(false), TotalCollectedSamples(0), - ORE(nullptr) {} + : GetAC(GetAssumptionCache), GetTTI(GetTargetTransformInfo), + Filename(Name), IsThinLTOPreLink(IsThinLTOPreLink) {} bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM); @@ -170,8 +196,9 @@ class SampleProfileLoader { ErrorOr getBlockWeight(const BasicBlock *BB); const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const; std::vector - findIndirectCallFunctionSamples(const Instruction &I) const; + findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const; const FunctionSamples *findFunctionSamples(const Instruction &I) const; + bool inlineCallInstruction(Instruction *I); bool inlineHotFunctions(Function &F, DenseSet &ImportGUIDs); void printEdgeWeight(raw_ostream &OS, Edge E); @@ -243,22 +270,28 @@ class SampleProfileLoader { std::unique_ptr Reader; /// \brief Samples collected for the body of this function. - FunctionSamples *Samples; + FunctionSamples *Samples = nullptr; /// \brief Name of the profile file to load. std::string Filename; /// \brief Flag indicating whether the profile input loaded successfully. - bool ProfileIsValid; + bool ProfileIsValid = false; + + /// \brief Flag indicating if the pass is invoked in ThinLTO compile phase. + /// + /// In this phase, in annotation, we should not promote indirect calls. + /// Instead, we will mark GUIDs that needs to be annotated to the function. + bool IsThinLTOPreLink; /// \brief Total number of samples collected in this profile. /// /// This is the sum of all the samples collected in all the functions executed /// at runtime. - uint64_t TotalCollectedSamples; + uint64_t TotalCollectedSamples = 0; /// \brief Optimization Remark Emitter used to emit diagnostic remarks. - OptimizationRemarkEmitter *ORE; + OptimizationRemarkEmitter *ORE = nullptr; }; class SampleProfileLoaderLegacyPass : public ModulePass { @@ -266,15 +299,15 @@ class SampleProfileLoaderLegacyPass : public ModulePass { // Class identification, replacement for typeinfo static char ID; - SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile) - : ModulePass(ID), SampleLoader(Name, + SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile, + bool IsThinLTOPreLink = false) + : ModulePass(ID), SampleLoader(Name, IsThinLTOPreLink, [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }, [&](Function &F) -> TargetTransformInfo & { return TTIWP->getTTI(F); - }), - ACT(nullptr), TTIWP(nullptr) { + }) { initializeSampleProfileLoaderLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -284,6 +317,7 @@ class SampleProfileLoaderLegacyPass : public ModulePass { bool doInitialization(Module &M) override { return SampleLoader.doInitialization(M); } + StringRef getPassName() const override { return "Sample profile pass"; } bool runOnModule(Module &M) override; @@ -294,10 +328,12 @@ class SampleProfileLoaderLegacyPass : public ModulePass { private: SampleProfileLoader SampleLoader; - AssumptionCacheTracker *ACT; - TargetTransformInfoWrapperPass *TTIWP; + AssumptionCacheTracker *ACT = nullptr; + TargetTransformInfoWrapperPass *TTIWP = nullptr; }; +} // end anonymous namespace + /// Return true if the given callsite is hot wrt to its caller. /// /// Functions that were inlined in the original binary will be represented @@ -312,8 +348,8 @@ class SampleProfileLoaderLegacyPass : public ModulePass { /// /// If that fraction is larger than the default given by /// SampleProfileHotThreshold, the callsite will be inlined again. -bool callsiteIsHot(const FunctionSamples *CallerFS, - const FunctionSamples *CallsiteFS) { +static bool callsiteIsHot(const FunctionSamples *CallerFS, + const FunctionSamples *CallsiteFS) { if (!CallsiteFS) return false; // The callsite was not inlined in the original binary. @@ -329,7 +365,6 @@ bool callsiteIsHot(const FunctionSamples *CallerFS, (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0; return PercentSamples >= SampleProfileHotThreshold; } -} /// Mark as used the sample record for the given function samples at /// (LineOffset, Discriminator). @@ -502,10 +537,12 @@ ErrorOr SampleProfileLoader::getInstWeight(const Instruction &Inst) { if (isa(Inst) || isa(Inst)) return std::error_code(); - // If a call/invoke instruction is inlined in profile, but not inlined here, + // If a direct call/invoke instruction is inlined in profile + // (findCalleeFunctionSamples returns non-empty result), but not inlined here, // it means that the inlined callsite has no sample, thus the call // instruction should have 0 count. if ((isa(Inst) || isa(Inst)) && + !ImmutableCallSite(&Inst).isIndirectCall() && findCalleeFunctionSamples(Inst)) return 0; @@ -517,17 +554,18 @@ ErrorOr SampleProfileLoader::getInstWeight(const Instruction &Inst) { bool FirstMark = CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get()); if (FirstMark) { - if (Discriminator) - ORE->emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AppliedSamples", &Inst) - << "Applied " << ore::NV("NumSamples", *R) - << " samples from profile (offset: " - << ore::NV("LineOffset", LineOffset) << "." - << ore::NV("Discriminator", Discriminator) << ")"); - else - ORE->emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AppliedSamples", &Inst) - << "Applied " << ore::NV("NumSamples", *R) - << " samples from profile (offset: " - << ore::NV("LineOffset", LineOffset) << ")"); + ORE->emit([&]() { + OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst); + Remark << "Applied " << ore::NV("NumSamples", *R); + Remark << " samples from profile (offset: "; + Remark << ore::NV("LineOffset", LineOffset); + if (Discriminator) { + Remark << "."; + Remark << ore::NV("Discriminator", Discriminator); + } + Remark << ")"; + return Remark; + }); } DEBUG(dbgs() << " " << DLoc.getLine() << "." << DIL->getBaseDiscriminator() << ":" << Inst @@ -614,10 +652,11 @@ SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const { } /// Returns a vector of FunctionSamples that are the indirect call targets -/// of \p Inst. The vector is sorted by the total number of samples. +/// of \p Inst. The vector is sorted by the total number of samples. Stores +/// the total call count of the indirect call in \p Sum. std::vector SampleProfileLoader::findIndirectCallFunctionSamples( - const Instruction &Inst) const { + const Instruction &Inst, uint64_t &Sum) const { const DILocation *DIL = Inst.getDebugLoc(); std::vector R; @@ -629,16 +668,25 @@ SampleProfileLoader::findIndirectCallFunctionSamples( if (FS == nullptr) return R; + uint32_t LineOffset = getOffset(DIL); + uint32_t Discriminator = DIL->getBaseDiscriminator(); + + auto T = FS->findCallTargetMapAt(LineOffset, Discriminator); + Sum = 0; + if (T) + for (const auto &T_C : T.get()) + Sum += T_C.second; if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt( LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()))) { - if (M->size() == 0) + if (M->empty()) return R; for (const auto &NameFS : *M) { + Sum += NameFS.second.getEntrySamples(); R.push_back(&NameFS.second); } std::sort(R.begin(), R.end(), [](const FunctionSamples *L, const FunctionSamples *R) { - return L->getTotalSamples() > R->getTotalSamples(); + return L->getEntrySamples() > R->getEntrySamples(); }); } return R; @@ -676,6 +724,39 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { return FS; } +bool SampleProfileLoader::inlineCallInstruction(Instruction *I) { + assert(isa(I) || isa(I)); + CallSite CS(I); + Function *CalledFunction = CS.getCalledFunction(); + assert(CalledFunction); + DebugLoc DLoc = I->getDebugLoc(); + BasicBlock *BB = I->getParent(); + InlineParams Params = getInlineParams(); + Params.ComputeFullInlineCost = true; + // Checks if there is anything in the reachable portion of the callee at + // this callsite that makes this inlining potentially illegal. Need to + // set ComputeFullInlineCost, otherwise getInlineCost may return early + // when cost exceeds threshold without checking all IRs in the callee. + // The acutal cost does not matter because we only checks isNever() to + // see if it is legal to inline the callsite. + InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC, + None, nullptr, nullptr); + if (Cost.isNever()) { + ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB) + << "incompatible inlining"); + return false; + } + InlineFunctionInfo IFI(nullptr, &GetAC); + if (InlineFunction(CS, IFI)) { + // The call to InlineFunction erases I, so we can't pass it here. + ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB) + << "inlined hot callee '" << ore::NV("Callee", CalledFunction) + << "' into '" << ore::NV("Caller", BB->getParent()) << "'"); + return true; + } + return false; +} + /// \brief Iteratively inline hot callsites of a function. /// /// Iteratively traverse all callsites of the function \p F, and find if @@ -713,82 +794,59 @@ bool SampleProfileLoader::inlineHotFunctions( } } for (auto I : CIS) { - InlineFunctionInfo IFI(nullptr, &GetAC); Function *CalledFunction = CallSite(I).getCalledFunction(); // Do not inline recursive calls. if (CalledFunction == &F) continue; - Instruction *DI = I; - if (!CalledFunction && !PromotedInsns.count(I) && - CallSite(I).isIndirectCall()) { - for (const auto *FS : findIndirectCallFunctionSamples(*I)) { + if (CallSite(I).isIndirectCall()) { + if (PromotedInsns.count(I)) + continue; + uint64_t Sum; + for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { + if (IsThinLTOPreLink) { + FS->findImportedFunctions(ImportGUIDs, F.getParent(), + Samples->getTotalSamples() * + SampleProfileHotThreshold / 100); + continue; + } auto CalleeFunctionName = FS->getName(); // If it is a recursive call, we do not inline it as it could bloat // the code exponentially. There is way to better handle this, e.g. // clone the caller first, and inline the cloned caller if it is - // recursive. As llvm does not inline recursive calls, we will simply - // ignore it instead of handling it explicitly. + // recursive. As llvm does not inline recursive calls, we will + // simply ignore it instead of handling it explicitly. if (CalleeFunctionName == F.getName()) continue; + const char *Reason = "Callee function not available"; auto R = SymbolMap.find(CalleeFunctionName); - if (R == SymbolMap.end()) - continue; - CalledFunction = R->getValue(); - if (CalledFunction && isLegalToPromote(I, CalledFunction, &Reason)) { - // The indirect target was promoted and inlined in the profile, as a - // result, we do not have profile info for the branch probability. - // We set the probability to 80% taken to indicate that the static - // call is likely taken. - DI = dyn_cast( - promoteIndirectCall(I, CalledFunction, 80, 100, false, ORE) - ->stripPointerCasts()); + if (R != SymbolMap.end() && R->getValue() && + !R->getValue()->isDeclaration() && + R->getValue()->getSubprogram() && + isLegalToPromote(I, R->getValue(), &Reason)) { + uint64_t C = FS->getEntrySamples(); + Instruction *DI = promoteIndirectCall( + I, R->getValue(), C, Sum, false, ORE); + Sum -= C; PromotedInsns.insert(I); + // If profile mismatches, we should not attempt to inline DI. + if ((isa(DI) || isa(DI)) && + inlineCallInstruction(DI)) + LocalChanged = true; } else { - DEBUG(dbgs() << "\nFailed to promote indirect call to " - << CalleeFunctionName << " because " << Reason - << "\n"); - continue; + DEBUG(dbgs() + << "\nFailed to promote indirect call to " + << CalleeFunctionName << " because " << Reason << "\n"); } } - // If there is profile mismatch, we should not attempt to inline DI. - if (!isa(DI) && !isa(DI)) - continue; - } - if (!CalledFunction || !CalledFunction->getSubprogram()) { - // Handles functions that are imported from other modules. - for (const FunctionSamples *FS : findIndirectCallFunctionSamples(*I)) - FS->findImportedFunctions( - ImportGUIDs, F.getParent(), - Samples->getTotalSamples() * SampleProfileHotThreshold / 100); - continue; - } - assert(isa(DI) || isa(DI)); - CallSite CS(DI); - DebugLoc DLoc = I->getDebugLoc(); - BasicBlock *BB = I->getParent(); - InlineParams Params = getInlineParams(); - Params.ComputeFullInlineCost = true; - // Checks if there is anything in the reachable portion of the callee at - // this callsite that makes this inlining potentially illegal. Need to - // set ComputeFullInlineCost, otherwise getInlineCost may return early - // when cost exceeds threshold without checking all IRs in the callee. - // The acutal cost does not matter because we only checks isNever() to - // see if it is legal to inline the callsite. - InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC, - None, nullptr, nullptr); - if (Cost.isNever()) { - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB) - << "incompatible inlining"); - continue; - } - if (InlineFunction(CS, IFI)) { - LocalChanged = true; - // The call to InlineFunction erases DI, so we can't pass it here. - ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB) - << "inlined hot callee '" - << ore::NV("Callee", CalledFunction) << "' into '" - << ore::NV("Caller", &F) << "'"); + } else if (CalledFunction && CalledFunction->getSubprogram() && + !CalledFunction->isDeclaration()) { + if (inlineCallInstruction(I)) + LocalChanged = true; + } else if (IsThinLTOPreLink) { + findCalleeFunctionSamples(*I)->findImportedFunctions( + ImportGUIDs, F.getParent(), + Samples->getTotalSamples() * SampleProfileHotThreshold / 100); } } if (LocalChanged) { @@ -1232,7 +1290,7 @@ void SampleProfileLoader::propagateWeights(Function &F) { if (!FS) continue; auto T = FS->findCallTargetMapAt(LineOffset, Discriminator); - if (!T || T.get().size() == 0) + if (!T || T.get().empty()) continue; SmallVector SortedCallTargets; uint64_t Sum = SortCallTargets(SortedCallTargets, T.get()); @@ -1291,11 +1349,13 @@ void SampleProfileLoader::propagateWeights(Function &F) { // weights, the second pass does not need to set it. if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) { DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n"); - TI->setMetadata(llvm::LLVMContext::MD_prof, + TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); - ORE->emit(OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst) - << "most popular destination for conditional branches at " - << ore::NV("CondBranchesLoc", BranchLoc)); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst) + << "most popular destination for conditional branches at " + << ore::NV("CondBranchesLoc", BranchLoc); + }); } else { DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); } @@ -1448,6 +1508,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { } char SampleProfileLoaderLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) @@ -1551,9 +1612,9 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M, return FAM.getResult(F); }; - SampleProfileLoader SampleLoader(ProfileFileName.empty() ? SampleProfileFile - : ProfileFileName, - GetAssumptionCache, GetTTI); + SampleProfileLoader SampleLoader( + ProfileFileName.empty() ? SampleProfileFile : ProfileFileName, + IsThinLTOPreLink, GetAssumptionCache, GetTTI); SampleLoader.doInitialization(M); diff --git a/lib/Transforms/IPO/WholeProgramDevirt.cpp b/lib/Transforms/IPO/WholeProgramDevirt.cpp index c7308581f3f6b..ec34deb9a08da 100644 --- a/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -51,7 +51,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TypeMetadataUtils.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index bcd60bca177bb..18b246b5d99f3 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -12,12 +12,26 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/KnownBits.h" +#include +#include using namespace llvm; using namespace PatternMatch; @@ -39,10 +53,15 @@ namespace { // is expensive. In order to avoid the cost of the constructor, we should // reuse some instances whenever possible. The pre-created instances // FAddCombine::Add[0-5] embodies this idea. - // - FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} + FAddendCoef() = default; ~FAddendCoef(); + // If possible, don't define operator+/operator- etc because these + // operators inevitably call FAddendCoef's constructor which is not cheap. + void operator=(const FAddendCoef &A); + void operator+=(const FAddendCoef &A); + void operator*=(const FAddendCoef &S); + void set(short C) { assert(!insaneIntVal(C) && "Insane coefficient"); IsFp = false; IntVal = C; @@ -55,12 +74,6 @@ namespace { bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } Value *getValue(Type *) const; - // If possible, don't define operator+/operator- etc because these - // operators inevitably call FAddendCoef's constructor which is not cheap. - void operator=(const FAddendCoef &A); - void operator+=(const FAddendCoef &A); - void operator*=(const FAddendCoef &S); - bool isOne() const { return isInt() && IntVal == 1; } bool isTwo() const { return isInt() && IntVal == 2; } bool isMinusOne() const { return isInt() && IntVal == -1; } @@ -68,10 +81,12 @@ namespace { private: bool insaneIntVal(int V) { return V > 4 || V < -4; } + APFloat *getFpValPtr() - { return reinterpret_cast(&FpValBuf.buffer[0]); } + { return reinterpret_cast(&FpValBuf.buffer[0]); } + const APFloat *getFpValPtr() const - { return reinterpret_cast(&FpValBuf.buffer[0]); } + { return reinterpret_cast(&FpValBuf.buffer[0]); } const APFloat &getFpVal() const { assert(IsFp && BufHasFpVal && "Incorret state"); @@ -94,17 +109,16 @@ namespace { // from an *SIGNED* integer. APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); - private: - bool IsFp; + bool IsFp = false; // True iff FpValBuf contains an instance of APFloat. - bool BufHasFpVal; + bool BufHasFpVal = false; // The integer coefficient of an individual addend is either 1 or -1, // and we try to simplify at most 4 addends from neighboring at most // two instructions. So the range of falls in [-4, 4]. APInt // is overkill of this end. - short IntVal; + short IntVal = 0; AlignedCharArrayUnion FpValBuf; }; @@ -112,10 +126,14 @@ namespace { /// FAddend is used to represent floating-point addend. An addend is /// represented as , where the V is a symbolic value, and C is a /// constant coefficient. A constant addend is represented as . - /// class FAddend { public: - FAddend() : Val(nullptr) {} + FAddend() = default; + + void operator+=(const FAddend &T) { + assert((Val == T.Val) && "Symbolic-values disagree"); + Coeff += T.Coeff; + } Value *getSymVal() const { return Val; } const FAddendCoef &getCoef() const { return Coeff; } @@ -146,16 +164,11 @@ namespace { /// splitted is the addend itself. unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; - void operator+=(const FAddend &T) { - assert((Val == T.Val) && "Symbolic-values disagree"); - Coeff += T.Coeff; - } - private: void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } // This addend has the value of "Coeff * Val". - Value *Val; + Value *Val = nullptr; FAddendCoef Coeff; }; @@ -164,11 +177,12 @@ namespace { /// class FAddCombine { public: - FAddCombine(InstCombiner::BuilderTy &B) : Builder(B), Instr(nullptr) {} + FAddCombine(InstCombiner::BuilderTy &B) : Builder(B) {} + Value *simplify(Instruction *FAdd); private: - typedef SmallVector AddendVect; + using AddendVect = SmallVector; Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); @@ -179,6 +193,7 @@ namespace { /// Return the number of instructions needed to emit the N-ary addition. unsigned calcInstrNumber(const AddendVect& Vect); + Value *createFSub(Value *Opnd0, Value *Opnd1); Value *createFAdd(Value *Opnd0, Value *Opnd1); Value *createFMul(Value *Opnd0, Value *Opnd1); @@ -187,9 +202,6 @@ namespace { Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst, bool NoNumber = false); - InstCombiner::BuilderTy &Builder; - Instruction *Instr; - // Debugging stuff are clustered here. #ifndef NDEBUG unsigned CreateInstrNum; @@ -199,9 +211,12 @@ namespace { void initCreateInstNum() {} void incCreateInstNum() {} #endif + + InstCombiner::BuilderTy &Builder; + Instruction *Instr = nullptr; }; -} // anonymous namespace +} // end anonymous namespace //===----------------------------------------------------------------------===// // @@ -332,7 +347,6 @@ Value *FAddendCoef::getValue(Type *Ty) const { // 0 +/- 0 <0, NULL> (corner case) // // Legend: A and B are not constant, C is constant -// unsigned FAddend::drillValueDownOneStep (Value *Val, FAddend &Addend0, FAddend &Addend1) { Instruction *I = nullptr; @@ -396,7 +410,6 @@ unsigned FAddend::drillValueDownOneStep // Try to break *this* addend into two addends. e.g. Suppose this addend is // <2.3, V>, and V = X + Y, by calling this function, we obtain two addends, // i.e. <2.3, X> and <2.3, Y>. -// unsigned FAddend::drillAddendDownOneStep (FAddend &Addend0, FAddend &Addend1) const { if (isConstant()) @@ -421,7 +434,6 @@ unsigned FAddend::drillAddendDownOneStep // ------------------------------------------------------- // (x * y) +/- (x * z) x * (y +/- z) // (y / x) +/- (z / x) (y +/- z) / x -// Value *FAddCombine::performFactorization(Instruction *I) { assert((I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub"); @@ -447,7 +459,6 @@ Value *FAddCombine::performFactorization(Instruction *I) { // ---------------------------------------------- // (x*y) +/- (x*z) x y z // (y/x) +/- (z/x) x y z - // Value *Factor = nullptr; Value *AddSub0 = nullptr, *AddSub1 = nullptr; @@ -599,7 +610,6 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { // desirable to reside at the top of the resulting expression tree. Placing // constant close to supper-expr(s) will potentially reveal some optimization // opportunities in super-expr(s). - // const FAddend *ConstAdd = nullptr; // Simplified addends are placed . @@ -608,7 +618,6 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { // The outer loop works on one symbolic-value at a time. Suppose the input // addends are : , , , , , ... // The symbolic-values will be processed in this order: x, y, z. - // for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { const FAddend *ThisAddend = Addends[SymIdx]; @@ -626,7 +635,6 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { // example, if the symbolic value "y" is being processed, the inner loop // will collect two addends "" and "". These two addends will // be later on folded into "". - // for (unsigned SameSymIdx = SymIdx + 1; SameSymIdx < AddendNum; SameSymIdx++) { const FAddend *T = Addends[SameSymIdx]; @@ -681,7 +689,7 @@ Value *FAddCombine::createNaryFAdd assert(!Opnds.empty() && "Expect at least one addend"); // Step 1: Check if the # of instructions needed exceeds the quota. - // + unsigned InstrNeeded = calcInstrNumber(Opnds); if (InstrNeeded > InstrQuota) return nullptr; @@ -726,10 +734,10 @@ Value *FAddCombine::createNaryFAdd LastVal = createFNeg(LastVal); } - #ifndef NDEBUG - assert(CreateInstrNum == InstrNeeded && - "Inconsistent in instruction numbers"); - #endif +#ifndef NDEBUG + assert(CreateInstrNum == InstrNeeded && + "Inconsistent in instruction numbers"); +#endif return LastVal; } @@ -950,9 +958,25 @@ static Value *checkForNegativeOperand(BinaryOperator &I, return nullptr; } -static Instruction *foldAddWithConstant(BinaryOperator &Add, - InstCombiner::BuilderTy &Builder) { +Instruction *InstCombiner::foldAddWithConstant(BinaryOperator &Add) { Value *Op0 = Add.getOperand(0), *Op1 = Add.getOperand(1); + Constant *Op1C; + if (!match(Op1, m_Constant(Op1C))) + return nullptr; + + if (Instruction *NV = foldOpWithConstantIntoOperand(Add)) + return NV; + + Value *X; + // zext(bool) + C -> bool ? C + 1 : C + if (match(Op0, m_ZExt(m_Value(X))) && + X->getType()->getScalarSizeInBits() == 1) + return SelectInst::Create(X, AddOne(Op1C), Op1); + + // ~X + C --> (C-1) - X + if (match(Op0, m_Not(m_Value(X)))) + return BinaryOperator::CreateSub(SubOne(Op1C), X); + const APInt *C; if (!match(Op1, m_APInt(C))) return nullptr; @@ -968,21 +992,17 @@ static Instruction *foldAddWithConstant(BinaryOperator &Add, return BinaryOperator::CreateXor(Op0, Op1); } - Value *X; - const APInt *C2; - Type *Ty = Add.getType(); - // Is this add the last step in a convoluted sext? // add(zext(xor i16 X, -32768), -32768) --> sext X + Type *Ty = Add.getType(); + const APInt *C2; if (match(Op0, m_ZExt(m_Xor(m_Value(X), m_APInt(C2)))) && C2->isMinSignedValue() && C2->sext(Ty->getScalarSizeInBits()) == *C) return CastInst::Create(Instruction::SExt, X, Ty); // (add (zext (add nuw X, C2)), C) --> (zext (add nuw X, C2 + C)) - // FIXME: This should check hasOneUse to not increase the instruction count? - if (C->isNegative() && - match(Op0, m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2)))) && - C->sge(-C2->sext(C->getBitWidth()))) { + if (match(Op0, m_OneUse(m_ZExt(m_NUWAdd(m_Value(X), m_APInt(C2))))) && + C->isNegative() && C->sge(-C2->sext(C->getBitWidth()))) { Constant *NewC = ConstantInt::get(X->getType(), *C2 + C->trunc(C2->getBitWidth())); return new ZExtInst(Builder.CreateNUWAdd(X, NewC), Ty); @@ -1013,34 +1033,29 @@ static Instruction *foldAddWithConstant(BinaryOperator &Add, Instruction *InstCombiner::visitAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) return replaceInstUsesWith(I, V); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); - // (A*B)+(A*C) -> A*(B+C) etc + // (A*B)+(A*C) -> A*(B+C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Instruction *X = foldAddWithConstant(I, Builder)) + if (Instruction *X = foldAddWithConstant(I)) return X; // FIXME: This should be moved into the above helper function to allow these - // transforms for splat vectors. + // transforms for general constant or constant splat vectors. + Type *Ty = I.getType(); if (ConstantInt *CI = dyn_cast(RHS)) { - // zext(bool) + C -> bool ? C + 1 : C - if (ZExtInst *ZI = dyn_cast(LHS)) - if (ZI->getSrcTy()->isIntegerTy(1)) - return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr; if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { - uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); + unsigned TySizeBits = Ty->getScalarSizeInBits(); const APInt &RHSVal = CI->getValue(); unsigned ExtendAmt = 0; // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. @@ -1059,7 +1074,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } if (ExtendAmt) { - Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); + Constant *ShAmt = ConstantInt::get(Ty, ExtendAmt); Value *NewShl = Builder.CreateShl(XorLHS, ShAmt, "sext"); return BinaryOperator::CreateAShr(NewShl, ShAmt); } @@ -1080,38 +1095,30 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } - if (isa(RHS)) - if (Instruction *NV = foldOpWithConstantIntoOperand(I)) - return NV; - - if (I.getType()->isIntOrIntVectorTy(1)) + if (Ty->isIntOrIntVectorTy(1)) return BinaryOperator::CreateXor(LHS, RHS); // X + X --> X << 1 if (LHS == RHS) { - BinaryOperator *New = - BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); - New->setHasNoSignedWrap(I.hasNoSignedWrap()); - New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - return New; + auto *Shl = BinaryOperator::CreateShl(LHS, ConstantInt::get(Ty, 1)); + Shl->setHasNoSignedWrap(I.hasNoSignedWrap()); + Shl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return Shl; } - // -A + B --> B - A - // -A + -B --> -(A + B) - if (Value *LHSV = dyn_castNegVal(LHS)) { - if (!isa(RHS)) - if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder.CreateAdd(LHSV, RHSV, "sum"); - return BinaryOperator::CreateNeg(NewAdd); - } + Value *A, *B; + if (match(LHS, m_Neg(m_Value(A)))) { + // -A + -B --> -(A + B) + if (match(RHS, m_Neg(m_Value(B)))) + return BinaryOperator::CreateNeg(Builder.CreateAdd(A, B)); - return BinaryOperator::CreateSub(RHS, LHSV); + // -A + B --> B - A + return BinaryOperator::CreateSub(RHS, A); } // A + -B --> A - B - if (!isa(RHS)) - if (Value *V = dyn_castNegVal(RHS)) - return BinaryOperator::CreateSub(LHS, V); + if (match(RHS, m_Neg(m_Value(B)))) + return BinaryOperator::CreateSub(LHS, B); if (Value *V = checkForNegativeOperand(I, Builder)) return replaceInstUsesWith(I, V); @@ -1120,12 +1127,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT)) return BinaryOperator::CreateOr(LHS, RHS); - if (Constant *CRHS = dyn_cast(RHS)) { - Value *X; - if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X - return BinaryOperator::CreateSub(SubOne(CRHS), X); - } - // FIXME: We already did a check for ConstantInt RHS above this. // FIXME: Is this pattern covered by another fold? No regression tests fail on // removal. @@ -1187,12 +1188,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (LHSConv->hasOneUse()) { Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); - if (ConstantExpr::getSExt(CI, I.getType()) == RHSC && + if (ConstantExpr::getSExt(CI, Ty) == RHSC && willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); - return new SExtInst(NewAdd, I.getType()); + return new SExtInst(NewAdd, Ty); } } } @@ -1210,7 +1211,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Insert the new integer add. Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); - return new SExtInst(NewAdd, I.getType()); + return new SExtInst(NewAdd, Ty); } } } @@ -1223,12 +1224,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (LHSConv->hasOneUse()) { Constant *CI = ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); - if (ConstantExpr::getZExt(CI, I.getType()) == RHSC && + if (ConstantExpr::getZExt(CI, Ty) == RHSC && willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); - return new ZExtInst(NewAdd, I.getType()); + return new ZExtInst(NewAdd, Ty); } } } @@ -1246,41 +1247,35 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Insert the new integer add. Value *NewAdd = Builder.CreateNUWAdd( LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); - return new ZExtInst(NewAdd, I.getType()); + return new ZExtInst(NewAdd, Ty); } } } // (add (xor A, B) (and A, B)) --> (or A, B) - { - Value *A = nullptr, *B = nullptr; - if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && - match(LHS, m_c_And(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateOr(A, B); - - if (match(LHS, m_Xor(m_Value(A), m_Value(B))) && - match(RHS, m_c_And(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateOr(A, B); - } + if (match(LHS, m_Xor(m_Value(A), m_Value(B))) && + match(RHS, m_c_And(m_Specific(A), m_Specific(B)))) + return BinaryOperator::CreateOr(A, B); + + // (add (and A, B) (xor A, B)) --> (or A, B) + if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && + match(LHS, m_c_And(m_Specific(A), m_Specific(B)))) + return BinaryOperator::CreateOr(A, B); // (add (or A, B) (and A, B)) --> (add A, B) - { - Value *A = nullptr, *B = nullptr; - if (match(RHS, m_Or(m_Value(A), m_Value(B))) && - match(LHS, m_c_And(m_Specific(A), m_Specific(B)))) { - auto *New = BinaryOperator::CreateAdd(A, B); - New->setHasNoSignedWrap(I.hasNoSignedWrap()); - New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - return New; - } + if (match(LHS, m_Or(m_Value(A), m_Value(B))) && + match(RHS, m_c_And(m_Specific(A), m_Specific(B)))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; + } - if (match(LHS, m_Or(m_Value(A), m_Value(B))) && - match(RHS, m_c_And(m_Specific(A), m_Specific(B)))) { - auto *New = BinaryOperator::CreateAdd(A, B); - New->setHasNoSignedWrap(I.hasNoSignedWrap()); - New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - return New; - } + // (add (and A, B) (or A, B)) --> (add A, B) + if (match(RHS, m_Or(m_Value(A), m_Value(B))) && + match(LHS, m_c_And(m_Specific(A), m_Specific(B)))) { + I.setOperand(0, A); + I.setOperand(1, B); + return &I; } // TODO(jingyue): Consider willNotOverflowSignedAdd and @@ -1402,7 +1397,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { /// Optimize pointer differences into the same array into a size. Consider: /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. -/// Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Type *Ty) { // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize @@ -1624,7 +1618,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Builder.CreateSub(Z, Y, Op1->getName())); // (X - (X & Y)) --> (X & ~Y) - // if (match(Op1, m_c_And(m_Value(Y), m_Specific(Op0)))) return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(Y, Y->getName() + ".not")); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 61f0329f704f5..32dd21f93a368 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -16,16 +16,20 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -40,18 +44,26 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" +#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include #include #include #include +#include #include using namespace llvm; @@ -515,7 +527,7 @@ static Value *simplifyX86varShift(const IntrinsicInst &II, // If all elements out of range or UNDEF, return vector of zeros/undefs. // ArithmeticShift should only hit this if they are all UNDEF. auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); }; - if (all_of(ShiftAmts, OutOfRange)) { + if (llvm::all_of(ShiftAmts, OutOfRange)) { SmallVector ConstantVec; for (int Idx : ShiftAmts) { if (Idx < 0) { @@ -1584,7 +1596,6 @@ static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) { // IntrinsicInstr with target-generic LLVM IR. const SimplifyAction Action = [II]() -> SimplifyAction { switch (II->getIntrinsicID()) { - // NVVM intrinsics that map directly to LLVM intrinsics. case Intrinsic::nvvm_ceil_d: return {Intrinsic::ceil, FTZ_Any}; @@ -2313,11 +2324,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_pmovmskb_128: case Intrinsic::x86_avx_movmsk_pd_256: case Intrinsic::x86_avx_movmsk_ps_256: - case Intrinsic::x86_avx2_pmovmskb: { + case Intrinsic::x86_avx2_pmovmskb: if (Value *V = simplifyX86movmsk(*II)) return replaceInstUsesWith(*II, V); break; - } case Intrinsic::x86_sse_comieq_ss: case Intrinsic::x86_sse_comige_ss: @@ -3371,7 +3381,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return II; break; - } case Intrinsic::amdgcn_fmed3: { // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled @@ -3532,6 +3541,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::amdgcn_wqm_vote: { + // wqm_vote is identity when the argument is constant. + if (!isa(II->getArgOperand(0))) + break; + + return replaceInstUsesWith(*II, II->getArgOperand(0)); + } + case Intrinsic::amdgcn_kill: { + const ConstantInt *C = dyn_cast(II->getArgOperand(0)); + if (!C || !C->getZExtValue()) + break; + + // amdgcn.kill(i1 1) is a no-op + return eraseInstFromFunction(CI); + } case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. @@ -3712,7 +3736,6 @@ Instruction *InstCombiner::visitFenceInst(FenceInst &FI) { } // InvokeInst simplification -// Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { return visitCallSite(&II); } @@ -3825,7 +3848,6 @@ static IntrinsicInst *findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, // Given a call to llvm.adjust.trampoline, find and return the corresponding // call to llvm.init.trampoline if the call to the trampoline can be optimized // to a direct call to a function. Otherwise return NULL. -// static IntrinsicInst *findInitTrampoline(Value *Callee) { Callee = Callee->stripPointerCasts(); IntrinsicInst *AdjustTramp = dyn_cast(Callee); @@ -3993,7 +4015,6 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Okay, this is a cast from a function to a different type. Unless doing so // would cause a type conversion of one of our arguments, change this call to // be a direct call with arguments casted to the appropriate types. - // FunctionType *FT = Callee->getFunctionType(); Type *OldRetTy = Caller->getType(); Type *NewRetTy = FT->getReturnType(); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index f7be0f9bc3f33..5e4fd8c265679 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -818,9 +818,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) { // (X&4) == 2 --> false // (X&4) != 2 --> true - Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()), - isNE); - Res = ConstantExpr::getZExt(Res, CI.getType()); + Constant *Res = ConstantInt::get(CI.getType(), isNE); return replaceInstUsesWith(CI, Res); } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 124499908902b..cb4788576c59a 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -37,77 +37,30 @@ using namespace PatternMatch; STATISTIC(NumSel, "Number of select opts"); -static ConstantInt *extractElement(Constant *V, Constant *Idx) { - return cast(ConstantExpr::getExtractElement(V, Idx)); -} - -static bool hasAddOverflow(ConstantInt *Result, - ConstantInt *In1, ConstantInt *In2, - bool IsSigned) { - if (!IsSigned) - return Result->getValue().ult(In1->getValue()); - - if (In2->isNegative()) - return Result->getValue().sgt(In1->getValue()); - return Result->getValue().slt(In1->getValue()); -} - /// Compute Result = In1+In2, returning true if the result overflowed for this /// type. -static bool addWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, bool IsSigned = false) { - Result = ConstantExpr::getAdd(In1, In2); - - if (VectorType *VTy = dyn_cast(In1->getType())) { - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); - if (hasAddOverflow(extractElement(Result, Idx), - extractElement(In1, Idx), - extractElement(In2, Idx), - IsSigned)) - return true; - } - return false; - } - - return hasAddOverflow(cast(Result), - cast(In1), cast(In2), - IsSigned); -} - -static bool hasSubOverflow(ConstantInt *Result, - ConstantInt *In1, ConstantInt *In2, - bool IsSigned) { - if (!IsSigned) - return Result->getValue().ugt(In1->getValue()); - - if (In2->isNegative()) - return Result->getValue().slt(In1->getValue()); +static bool addWithOverflow(APInt &Result, const APInt &In1, + const APInt &In2, bool IsSigned = false) { + bool Overflow; + if (IsSigned) + Result = In1.sadd_ov(In2, Overflow); + else + Result = In1.uadd_ov(In2, Overflow); - return Result->getValue().sgt(In1->getValue()); + return Overflow; } /// Compute Result = In1-In2, returning true if the result overflowed for this /// type. -static bool subWithOverflow(Constant *&Result, Constant *In1, - Constant *In2, bool IsSigned = false) { - Result = ConstantExpr::getSub(In1, In2); - - if (VectorType *VTy = dyn_cast(In1->getType())) { - for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) { - Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i); - if (hasSubOverflow(extractElement(Result, Idx), - extractElement(In1, Idx), - extractElement(In2, Idx), - IsSigned)) - return true; - } - return false; - } +static bool subWithOverflow(APInt &Result, const APInt &In1, + const APInt &In2, bool IsSigned = false) { + bool Overflow; + if (IsSigned) + Result = In1.ssub_ov(In2, Overflow); + else + Result = In1.usub_ov(In2, Overflow); - return hasSubOverflow(cast(Result), - cast(In1), cast(In2), - IsSigned); + return Overflow; } /// Given an icmp instruction, return true if any use of this comparison is a @@ -1365,6 +1318,24 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } +// Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) +Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) { + CmpInst::Predicate Pred = Cmp.getPredicate(); + Value *X = Cmp.getOperand(0); + + if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) { + Value *A, *B; + SelectPatternResult SPR = matchSelectPattern(X, A, B); + if (SPR.Flavor == SPF_SMIN) { + if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) + return new ICmpInst(Pred, B, Cmp.getOperand(1)); + if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT)) + return new ICmpInst(Pred, A, Cmp.getOperand(1)); + } + } + return nullptr; +} + // Fold icmp Pred X, C. Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { CmpInst::Predicate Pred = Cmp.getPredicate(); @@ -1396,17 +1367,6 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { return Res; } - // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0) - if (C->isNullValue() && Pred == ICmpInst::ICMP_SGT) { - SelectPatternResult SPR = matchSelectPattern(X, A, B); - if (SPR.Flavor == SPF_SMIN) { - if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) - return new ICmpInst(Pred, B, Cmp.getOperand(1)); - if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT)) - return new ICmpInst(Pred, A, Cmp.getOperand(1)); - } - } - // FIXME: Use m_APInt to allow folds for splat constants. ConstantInt *CI = dyn_cast(Cmp.getOperand(1)); if (!CI) @@ -1461,10 +1421,10 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { /// Fold icmp (trunc X, Y), C. Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc, - const APInt *C) { + const APInt &C) { ICmpInst::Predicate Pred = Cmp.getPredicate(); Value *X = Trunc->getOperand(0); - if (C->isOneValue() && C->getBitWidth() > 1) { + if (C.isOneValue() && C.getBitWidth() > 1) { // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1 Value *V = nullptr; if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V)))) @@ -1482,7 +1442,7 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, // If all the high bits are known, we can do this xform. if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) { // Pull in the high bits from known-ones set. - APInt NewRHS = C->zext(SrcBits); + APInt NewRHS = C.zext(SrcBits); NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits); return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS)); } @@ -1494,7 +1454,7 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, /// Fold icmp (xor X, Y), C. Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, BinaryOperator *Xor, - const APInt *C) { + const APInt &C) { Value *X = Xor->getOperand(0); Value *Y = Xor->getOperand(1); const APInt *XorC; @@ -1504,8 +1464,8 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, // If this is a comparison that tests the signbit (X < 0) or (x > -1), // fold the xor. ICmpInst::Predicate Pred = Cmp.getPredicate(); - if ((Pred == ICmpInst::ICMP_SLT && C->isNullValue()) || - (Pred == ICmpInst::ICMP_SGT && C->isAllOnesValue())) { + bool TrueIfSigned = false; + if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) { // If the sign bit of the XorCst is not set, there is no change to // the operation, just stop using the Xor. @@ -1515,17 +1475,13 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, return &Cmp; } - // Was the old condition true if the operand is positive? - bool isTrueIfPositive = Pred == ICmpInst::ICMP_SGT; - - // If so, the new one isn't. - isTrueIfPositive ^= true; - - Constant *CmpConstant = cast(Cmp.getOperand(1)); - if (isTrueIfPositive) - return new ICmpInst(ICmpInst::ICMP_SGT, X, SubOne(CmpConstant)); + // Emit the opposite comparison. + if (TrueIfSigned) + return new ICmpInst(ICmpInst::ICMP_SGT, X, + ConstantInt::getAllOnesValue(X->getType())); else - return new ICmpInst(ICmpInst::ICMP_SLT, X, AddOne(CmpConstant)); + return new ICmpInst(ICmpInst::ICMP_SLT, X, + ConstantInt::getNullValue(X->getType())); } if (Xor->hasOneUse()) { @@ -1533,7 +1489,7 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, if (!Cmp.isEquality() && XorC->isSignMask()) { Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() : Cmp.getSignedPredicate(); - return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC)); + return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC)); } // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask)) @@ -1541,18 +1497,18 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() : Cmp.getSignedPredicate(); Pred = Cmp.getSwappedPredicate(Pred); - return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC)); + return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC)); } } // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C) // iff -C is a power of 2 - if (Pred == ICmpInst::ICMP_UGT && *XorC == ~(*C) && (*C + 1).isPowerOf2()) + if (Pred == ICmpInst::ICMP_UGT && *XorC == ~C && (C + 1).isPowerOf2()) return new ICmpInst(ICmpInst::ICMP_ULT, X, Y); // (icmp ult (xor X, C), -C) -> (icmp uge X, C) // iff -C is a power of 2 - if (Pred == ICmpInst::ICMP_ULT && *XorC == -(*C) && C->isPowerOf2()) + if (Pred == ICmpInst::ICMP_ULT && *XorC == -C && C.isPowerOf2()) return new ICmpInst(ICmpInst::ICMP_UGE, X, Y); return nullptr; @@ -1560,7 +1516,7 @@ Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp, /// Fold icmp (and (sh X, Y), C2), C1. Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, - const APInt *C1, const APInt *C2) { + const APInt &C1, const APInt &C2) { BinaryOperator *Shift = dyn_cast(And->getOperand(0)); if (!Shift || !Shift->isShift()) return nullptr; @@ -1575,32 +1531,35 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, const APInt *C3; if (match(Shift->getOperand(1), m_APInt(C3))) { bool CanFold = false; - if (ShiftOpcode == Instruction::AShr) { - // There may be some constraints that make this possible, but nothing - // simple has been discovered yet. - CanFold = false; - } else if (ShiftOpcode == Instruction::Shl) { + if (ShiftOpcode == Instruction::Shl) { // For a left shift, we can fold if the comparison is not signed. We can // also fold a signed comparison if the mask value and comparison value // are not negative. These constraints may not be obvious, but we can // prove that they are correct using an SMT solver. - if (!Cmp.isSigned() || (!C2->isNegative() && !C1->isNegative())) + if (!Cmp.isSigned() || (!C2.isNegative() && !C1.isNegative())) CanFold = true; - } else if (ShiftOpcode == Instruction::LShr) { + } else { + bool IsAshr = ShiftOpcode == Instruction::AShr; // For a logical right shift, we can fold if the comparison is not signed. // We can also fold a signed comparison if the shifted mask value and the // shifted comparison value are not negative. These constraints may not be // obvious, but we can prove that they are correct using an SMT solver. - if (!Cmp.isSigned() || - (!C2->shl(*C3).isNegative() && !C1->shl(*C3).isNegative())) - CanFold = true; + // For an arithmetic shift right we can do the same, if we ensure + // the And doesn't use any bits being shifted in. Normally these would + // be turned into lshr by SimplifyDemandedBits, but not if there is an + // additional user. + if (!IsAshr || (C2.shl(*C3).lshr(*C3) == C2)) { + if (!Cmp.isSigned() || + (!C2.shl(*C3).isNegative() && !C1.shl(*C3).isNegative())) + CanFold = true; + } } if (CanFold) { - APInt NewCst = IsShl ? C1->lshr(*C3) : C1->shl(*C3); + APInt NewCst = IsShl ? C1.lshr(*C3) : C1.shl(*C3); APInt SameAsC1 = IsShl ? NewCst.shl(*C3) : NewCst.lshr(*C3); // Check to see if we are shifting out any of the bits being compared. - if (SameAsC1 != *C1) { + if (SameAsC1 != C1) { // If we shifted bits out, the fold is not going to work out. As a // special case, check to see if this means that the result is always // true or false now. @@ -1610,7 +1569,7 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType())); } else { Cmp.setOperand(1, ConstantInt::get(And->getType(), NewCst)); - APInt NewAndCst = IsShl ? C2->lshr(*C3) : C2->shl(*C3); + APInt NewAndCst = IsShl ? C2.lshr(*C3) : C2.shl(*C3); And->setOperand(1, ConstantInt::get(And->getType(), NewAndCst)); And->setOperand(0, Shift->getOperand(0)); Worklist.Add(Shift); // Shift is dead. @@ -1622,7 +1581,7 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, // Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is // preferable because it allows the C2 << Y expression to be hoisted out of a // loop if Y is invariant and X is not. - if (Shift->hasOneUse() && C1->isNullValue() && Cmp.isEquality() && + if (Shift->hasOneUse() && C1.isNullValue() && Cmp.isEquality() && !Shift->isArithmeticShift() && !isa(Shift->getOperand(0))) { // Compute C2 << Y. Value *NewShift = @@ -1641,7 +1600,7 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, /// Fold icmp (and X, C2), C1. Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, BinaryOperator *And, - const APInt *C1) { + const APInt &C1) { const APInt *C2; if (!match(And->getOperand(1), m_APInt(C2))) return nullptr; @@ -1659,28 +1618,28 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, // when we're checking the sign bit would not work. Value *W; if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) && - (Cmp.isEquality() || (!C1->isNegative() && !C2->isNegative()))) { + (Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) { // TODO: Is this a good transform for vectors? Wider types may reduce // throughput. Should this transform be limited (even for scalars) by using // shouldChangeType()? if (!Cmp.getType()->isVectorTy()) { Type *WideType = W->getType(); unsigned WideScalarBits = WideType->getScalarSizeInBits(); - Constant *ZextC1 = ConstantInt::get(WideType, C1->zext(WideScalarBits)); + Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits)); Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits)); Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName()); return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1); } } - if (Instruction *I = foldICmpAndShift(Cmp, And, C1, C2)) + if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2)) return I; // (icmp pred (and (or (lshr A, B), A), 1), 0) --> // (icmp pred (and A, (or (shl 1, B), 1), 0)) // // iff pred isn't signed - if (!Cmp.isSigned() && C1->isNullValue() && And->getOperand(0)->hasOneUse() && + if (!Cmp.isSigned() && C1.isNullValue() && And->getOperand(0)->hasOneUse() && match(And->getOperand(1), m_One())) { Constant *One = cast(And->getOperand(1)); Value *Or = And->getOperand(0); @@ -1720,7 +1679,7 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, /// Fold icmp (and X, Y), C. Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And, - const APInt *C) { + const APInt &C) { if (Instruction *I = foldICmpAndConstConst(Cmp, And, C)) return I; @@ -1745,7 +1704,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, // X & -C == -C -> X > u ~C // X & -C != -C -> X <= u ~C // iff C is a power of 2 - if (Cmp.getOperand(1) == Y && (-(*C)).isPowerOf2()) { + if (Cmp.getOperand(1) == Y && (-C).isPowerOf2()) { auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE; return new ICmpInst(NewPred, X, SubOne(cast(Cmp.getOperand(1)))); @@ -1755,7 +1714,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, // (X & C2) != 0 -> (trunc X) < 0 // iff C2 is a power of 2 and it masks the sign bit of a legal integer type. const APInt *C2; - if (And->hasOneUse() && C->isNullValue() && match(Y, m_APInt(C2))) { + if (And->hasOneUse() && C.isNullValue() && match(Y, m_APInt(C2))) { int32_t ExactLogBase2 = C2->exactLogBase2(); if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) { Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1); @@ -1773,9 +1732,9 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, /// Fold icmp (or X, Y), C. Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, - const APInt *C) { + const APInt &C) { ICmpInst::Predicate Pred = Cmp.getPredicate(); - if (C->isOneValue()) { + if (C.isOneValue()) { // icmp slt signum(V) 1 --> icmp slt V, 1 Value *V = nullptr; if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V)))) @@ -1787,12 +1746,12 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, // X | C != C --> X >u C // iff C+1 is a power of 2 (C is a bitmask of the low bits) if (Cmp.isEquality() && Cmp.getOperand(1) == Or->getOperand(1) && - (*C + 1).isPowerOf2()) { + (C + 1).isPowerOf2()) { Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1)); } - if (!Cmp.isEquality() || !C->isNullValue() || !Or->hasOneUse()) + if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse()) return nullptr; Value *P, *Q; @@ -1826,7 +1785,7 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, /// Fold icmp (mul X, Y), C. Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp, BinaryOperator *Mul, - const APInt *C) { + const APInt &C) { const APInt *MulC; if (!match(Mul->getOperand(1), m_APInt(MulC))) return nullptr; @@ -1834,7 +1793,7 @@ Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp, // If this is a test of the sign bit and the multiply is sign-preserving with // a constant operand, use the multiply LHS operand instead. ICmpInst::Predicate Pred = Cmp.getPredicate(); - if (isSignTest(Pred, *C) && Mul->hasNoSignedWrap()) { + if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) { if (MulC->isNegative()) Pred = ICmpInst::getSwappedPredicate(Pred); return new ICmpInst(Pred, Mul->getOperand(0), @@ -1846,14 +1805,14 @@ Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp, /// Fold icmp (shl 1, Y), C. static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, - const APInt *C) { + const APInt &C) { Value *Y; if (!match(Shl, m_Shl(m_One(), m_Value(Y)))) return nullptr; Type *ShiftType = Shl->getType(); - uint32_t TypeBits = C->getBitWidth(); - bool CIsPowerOf2 = C->isPowerOf2(); + unsigned TypeBits = C.getBitWidth(); + bool CIsPowerOf2 = C.isPowerOf2(); ICmpInst::Predicate Pred = Cmp.getPredicate(); if (Cmp.isUnsigned()) { // (1 << Y) pred C -> Y pred Log2(C) @@ -1870,7 +1829,7 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, // (1 << Y) >= 2147483648 -> Y >= 31 -> Y == 31 // (1 << Y) < 2147483648 -> Y < 31 -> Y != 31 - unsigned CLog2 = C->logBase2(); + unsigned CLog2 = C.logBase2(); if (CLog2 == TypeBits - 1) { if (Pred == ICmpInst::ICMP_UGE) Pred = ICmpInst::ICMP_EQ; @@ -1880,7 +1839,7 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2)); } else if (Cmp.isSigned()) { Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1); - if (C->isAllOnesValue()) { + if (C.isAllOnesValue()) { // (1 << Y) <= -1 -> Y == 31 if (Pred == ICmpInst::ICMP_SLE) return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne); @@ -1888,7 +1847,7 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, // (1 << Y) > -1 -> Y != 31 if (Pred == ICmpInst::ICMP_SGT) return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne); - } else if (!(*C)) { + } else if (!C) { // (1 << Y) < 0 -> Y == 31 // (1 << Y) <= 0 -> Y == 31 if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) @@ -1900,7 +1859,7 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne); } } else if (Cmp.isEquality() && CIsPowerOf2) { - return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, C->logBase2())); + return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, C.logBase2())); } return nullptr; @@ -1909,10 +1868,10 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl, /// Fold icmp (shl X, Y), C. Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, BinaryOperator *Shl, - const APInt *C) { + const APInt &C) { const APInt *ShiftVal; if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal))) - return foldICmpShlConstConst(Cmp, Shl->getOperand(1), *C, *ShiftVal); + return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal); const APInt *ShiftAmt; if (!match(Shl->getOperand(1), m_APInt(ShiftAmt))) @@ -1920,7 +1879,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, // Check that the shift amount is in range. If not, don't perform undefined // shifts. When the shift is visited, it will be simplified. - unsigned TypeBits = C->getBitWidth(); + unsigned TypeBits = C.getBitWidth(); if (ShiftAmt->uge(TypeBits)) return nullptr; @@ -1934,15 +1893,15 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, if (Shl->hasNoSignedWrap()) { if (Pred == ICmpInst::ICMP_SGT) { // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt) - APInt ShiftedC = C->ashr(*ShiftAmt); + APInt ShiftedC = C.ashr(*ShiftAmt); return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); } if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) { // This is the same code as the SGT case, but assert the pre-condition // that is needed for this to work with equality predicates. - assert(C->ashr(*ShiftAmt).shl(*ShiftAmt) == *C && + assert(C.ashr(*ShiftAmt).shl(*ShiftAmt) == C && "Compare known true or false was not folded"); - APInt ShiftedC = C->ashr(*ShiftAmt); + APInt ShiftedC = C.ashr(*ShiftAmt); return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); } if (Pred == ICmpInst::ICMP_SLT) { @@ -1950,14 +1909,14 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, // (X << S) <=s C is equiv to X <=s (C >> S) for all C // (X << S) > S) + 1 if C > S) + 1 if C >s SMIN - assert(!C->isMinSignedValue() && "Unexpected icmp slt"); - APInt ShiftedC = (*C - 1).ashr(*ShiftAmt) + 1; + assert(!C.isMinSignedValue() && "Unexpected icmp slt"); + APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1; return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); } // If this is a signed comparison to 0 and the shift is sign preserving, // use the shift LHS operand instead; isSignTest may change 'Pred', so only // do that if we're sure to not continue on in this function. - if (isSignTest(Pred, *C)) + if (isSignTest(Pred, C)) return new ICmpInst(Pred, X, Constant::getNullValue(ShType)); } @@ -1967,15 +1926,15 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, if (Shl->hasNoUnsignedWrap()) { if (Pred == ICmpInst::ICMP_UGT) { // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt) - APInt ShiftedC = C->lshr(*ShiftAmt); + APInt ShiftedC = C.lshr(*ShiftAmt); return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); } if (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) { // This is the same code as the UGT case, but assert the pre-condition // that is needed for this to work with equality predicates. - assert(C->lshr(*ShiftAmt).shl(*ShiftAmt) == *C && + assert(C.lshr(*ShiftAmt).shl(*ShiftAmt) == C && "Compare known true or false was not folded"); - APInt ShiftedC = C->lshr(*ShiftAmt); + APInt ShiftedC = C.lshr(*ShiftAmt); return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); } if (Pred == ICmpInst::ICMP_ULT) { @@ -1983,8 +1942,8 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, // (X << S) <=u C is equiv to X <=u (C >> S) for all C // (X << S) > S) + 1 if C > S) + 1 if C >u 0 - assert(C->ugt(0) && "ult 0 should have been eliminated"); - APInt ShiftedC = (*C - 1).lshr(*ShiftAmt) + 1; + assert(C.ugt(0) && "ult 0 should have been eliminated"); + APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1; return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC)); } } @@ -1995,13 +1954,13 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, ShType, APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue())); Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); - Constant *LShrC = ConstantInt::get(ShType, C->lshr(*ShiftAmt)); + Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt)); return new ICmpInst(Pred, And, LShrC); } // Otherwise, if this is a comparison of the sign bit, simplify to and/test. bool TrueIfSigned = false; - if (Shl->hasOneUse() && isSignBitCheck(Pred, *C, TrueIfSigned)) { + if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) { // (X << 31) (X & 1) != 0 Constant *Mask = ConstantInt::get( ShType, @@ -2018,13 +1977,13 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, // free on the target. It has the additional benefit of comparing to a // smaller constant that may be more target-friendly. unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1); - if (Shl->hasOneUse() && Amt != 0 && C->countTrailingZeros() >= Amt && + if (Shl->hasOneUse() && Amt != 0 && C.countTrailingZeros() >= Amt && DL.isLegalInteger(TypeBits - Amt)) { Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt); if (ShType->isVectorTy()) TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements()); Constant *NewC = - ConstantInt::get(TruncTy, C->ashr(*ShiftAmt).trunc(TypeBits - Amt)); + ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt)); return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC); } @@ -2034,18 +1993,18 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, /// Fold icmp ({al}shr X, Y), C. Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, BinaryOperator *Shr, - const APInt *C) { + const APInt &C) { // An exact shr only shifts out zero bits, so: // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0 Value *X = Shr->getOperand(0); CmpInst::Predicate Pred = Cmp.getPredicate(); if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && - C->isNullValue()) + C.isNullValue()) return new ICmpInst(Pred, X, Cmp.getOperand(1)); const APInt *ShiftVal; if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal))) - return foldICmpShrConstConst(Cmp, Shr->getOperand(1), *C, *ShiftVal); + return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftVal); const APInt *ShiftAmt; if (!match(Shr->getOperand(1), m_APInt(ShiftAmt))) @@ -2053,71 +2012,73 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, // Check that the shift amount is in range. If not, don't perform undefined // shifts. When the shift is visited it will be simplified. - unsigned TypeBits = C->getBitWidth(); + unsigned TypeBits = C.getBitWidth(); unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits); if (ShAmtVal >= TypeBits || ShAmtVal == 0) return nullptr; bool IsAShr = Shr->getOpcode() == Instruction::AShr; - if (!Cmp.isEquality()) { - // If we have an unsigned comparison and an ashr, we can't simplify this. - // Similarly for signed comparisons with lshr. - if (Cmp.isSigned() != IsAShr) - return nullptr; - - // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv - // by a power of 2. Since we already have logic to simplify these, - // transform to div and then simplify the resultant comparison. - if (IsAShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1)) - return nullptr; - - // Revisit the shift (to delete it). - Worklist.Add(Shr); - - Constant *DivCst = ConstantInt::get( - Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); - - Value *Tmp = IsAShr ? Builder.CreateSDiv(X, DivCst, "", Shr->isExact()) - : Builder.CreateUDiv(X, DivCst, "", Shr->isExact()); - - Cmp.setOperand(0, Tmp); - - // If the builder folded the binop, just return it. - BinaryOperator *TheDiv = dyn_cast(Tmp); - if (!TheDiv) - return &Cmp; - - // Otherwise, fold this div/compare. - assert(TheDiv->getOpcode() == Instruction::SDiv || - TheDiv->getOpcode() == Instruction::UDiv); - - Instruction *Res = foldICmpDivConstant(Cmp, TheDiv, C); - assert(Res && "This div/cst should have folded!"); - return Res; + bool IsExact = Shr->isExact(); + Type *ShrTy = Shr->getType(); + // TODO: If we could guarantee that InstSimplify would handle all of the + // constant-value-based preconditions in the folds below, then we could assert + // those conditions rather than checking them. This is difficult because of + // undef/poison (PR34838). + if (IsAShr) { + if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) { + // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC) + // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC) + APInt ShiftedC = C.shl(ShAmtVal); + if (ShiftedC.ashr(ShAmtVal) == C) + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); + } + if (Pred == CmpInst::ICMP_SGT) { + // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1 + APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; + if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() && + (ShiftedC + 1).ashr(ShAmtVal) == (C + 1)) + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); + } + } else { + if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) { + // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC) + // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC) + APInt ShiftedC = C.shl(ShAmtVal); + if (ShiftedC.lshr(ShAmtVal) == C) + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); + } + if (Pred == CmpInst::ICMP_UGT) { + // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1 + APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1; + if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1)) + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC)); + } } + if (!Cmp.isEquality()) + return nullptr; + // Handle equality comparisons of shift-by-constant. // If the comparison constant changes with the shift, the comparison cannot // succeed (bits of the comparison constant cannot match the shifted value). // This should be known by InstSimplify and already be folded to true/false. - assert(((IsAShr && C->shl(ShAmtVal).ashr(ShAmtVal) == *C) || - (!IsAShr && C->shl(ShAmtVal).lshr(ShAmtVal) == *C)) && + assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) || + (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) && "Expected icmp+shr simplify did not occur."); - // Check if the bits shifted out are known to be zero. If so, we can compare - // against the unshifted value: + // If the bits shifted out are known zero, compare the unshifted value: // (X & 4) >> 1 == 2 --> (X & 4) == 4. - Constant *ShiftedCmpRHS = ConstantInt::get(Shr->getType(), *C << ShAmtVal); - if (Shr->hasOneUse()) { - if (Shr->isExact()) - return new ICmpInst(Pred, X, ShiftedCmpRHS); + if (Shr->isExact()) + return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal)); - // Otherwise strength reduce the shift into an 'and'. + if (Shr->hasOneUse()) { + // Canonicalize the shift into an 'and': + // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt) APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); - Constant *Mask = ConstantInt::get(Shr->getType(), Val); + Constant *Mask = ConstantInt::get(ShrTy, Val); Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask"); - return new ICmpInst(Pred, And, ShiftedCmpRHS); + return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal)); } return nullptr; @@ -2126,7 +2087,7 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, /// Fold icmp (udiv X, Y), C. Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv, - const APInt *C) { + const APInt &C) { const APInt *C2; if (!match(UDiv->getOperand(0), m_APInt(C2))) return nullptr; @@ -2136,17 +2097,17 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1)) Value *Y = UDiv->getOperand(1); if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) { - assert(!C->isMaxValue() && + assert(!C.isMaxValue() && "icmp ugt X, UINT_MAX should have been simplified already."); return new ICmpInst(ICmpInst::ICMP_ULE, Y, - ConstantInt::get(Y->getType(), C2->udiv(*C + 1))); + ConstantInt::get(Y->getType(), C2->udiv(C + 1))); } // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C) if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) { - assert(*C != 0 && "icmp ult X, 0 should have been simplified already."); + assert(C != 0 && "icmp ult X, 0 should have been simplified already."); return new ICmpInst(ICmpInst::ICMP_UGT, Y, - ConstantInt::get(Y->getType(), C2->udiv(*C))); + ConstantInt::get(Y->getType(), C2->udiv(C))); } return nullptr; @@ -2155,7 +2116,7 @@ Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp, /// Fold icmp ({su}div X, Y), C. Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, BinaryOperator *Div, - const APInt *C) { + const APInt &C) { // Fold: icmp pred ([us]div X, C2), C -> range test // Fold this div into the comparison, producing a range check. // Determine, based on the divide type, what the range is being @@ -2186,28 +2147,22 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, (DivIsSigned && C2->isAllOnesValue())) return nullptr; - // TODO: We could do all of the computations below using APInt. - Constant *CmpRHS = cast(Cmp.getOperand(1)); - Constant *DivRHS = cast(Div->getOperand(1)); - - // Compute Prod = CmpRHS * DivRHS. We are essentially solving an equation of - // form X / C2 = C. We solve for X by multiplying C2 (DivRHS) and C (CmpRHS). + // Compute Prod = C * C2. We are essentially solving an equation of + // form X / C2 = C. We solve for X by multiplying C2 and C. // By solving for X, we can turn this into a range check instead of computing // a divide. - Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS); + APInt Prod = C * *C2; // Determine if the product overflows by seeing if the product is not equal to // the divide. Make sure we do the same kind of divide as in the LHS // instruction that we're folding. - bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) - : ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS; + bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C; ICmpInst::Predicate Pred = Cmp.getPredicate(); // If the division is known to be exact, then there is no remainder from the // divide, so the covered range size is unit, otherwise it is the divisor. - Constant *RangeSize = - Div->isExact() ? ConstantInt::get(Div->getType(), 1) : DivRHS; + APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2; // Figure out the interval that is being checked. For example, a comparison // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). @@ -2217,7 +2172,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, // overflow variable is set to 0 if it's corresponding bound variable is valid // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. int LoOverflow = 0, HiOverflow = 0; - Constant *LoBound = nullptr, *HiBound = nullptr; + APInt LoBound, HiBound; if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) @@ -2229,38 +2184,38 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false); } } else if (C2->isStrictlyPositive()) { // Divisor is > 0. - if (C->isNullValue()) { // (X / pos) op 0 + if (C.isNullValue()) { // (X / pos) op 0 // Can't overflow. e.g. X/2 op 0 --> [-1, 2) - LoBound = ConstantExpr::getNeg(SubOne(RangeSize)); + LoBound = -(RangeSize - 1); HiBound = RangeSize; - } else if (C->isStrictlyPositive()) { // (X / pos) op pos + } else if (C.isStrictlyPositive()) { // (X / pos) op pos LoBound = Prod; // e.g. X/5 op 3 --> [15, 20) HiOverflow = LoOverflow = ProdOV; if (!HiOverflow) HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true); } else { // (X / pos) op neg // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14) - HiBound = AddOne(Prod); + HiBound = Prod + 1; LoOverflow = HiOverflow = ProdOV ? -1 : 0; if (!LoOverflow) { - Constant *DivNeg = ConstantExpr::getNeg(RangeSize); + APInt DivNeg = -RangeSize; LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0; } } } else if (C2->isNegative()) { // Divisor is < 0. if (Div->isExact()) - RangeSize = ConstantExpr::getNeg(RangeSize); - if (C->isNullValue()) { // (X / neg) op 0 + RangeSize.negate(); + if (C.isNullValue()) { // (X / neg) op 0 // e.g. X/-5 op 0 --> [-4, 5) - LoBound = AddOne(RangeSize); - HiBound = ConstantExpr::getNeg(RangeSize); - if (HiBound == DivRHS) { // -INTMIN = INTMIN + LoBound = RangeSize + 1; + HiBound = -RangeSize; + if (HiBound == *C2) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) - HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN + HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN } - } else if (C->isStrictlyPositive()) { // (X / neg) op pos + } else if (C.isStrictlyPositive()) { // (X / neg) op pos // e.g. X/-5 op 3 --> [-19, -14) - HiBound = AddOne(Prod); + HiBound = Prod + 1; HiOverflow = LoOverflow = ProdOV ? -1 : 0; if (!LoOverflow) LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0; @@ -2283,25 +2238,27 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, return replaceInstUsesWith(Cmp, Builder.getFalse()); if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, LoBound); + ICmpInst::ICMP_UGE, X, + ConstantInt::get(Div->getType(), LoBound)); if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, HiBound); + ICmpInst::ICMP_ULT, X, + ConstantInt::get(Div->getType(), HiBound)); return replaceInstUsesWith( - Cmp, insertRangeTest(X, LoBound->getUniqueInteger(), - HiBound->getUniqueInteger(), DivIsSigned, true)); + Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) return replaceInstUsesWith(Cmp, Builder.getTrue()); if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, X, LoBound); + ICmpInst::ICMP_ULT, X, + ConstantInt::get(Div->getType(), LoBound)); if (LoOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : - ICmpInst::ICMP_UGE, X, HiBound); + ICmpInst::ICMP_UGE, X, + ConstantInt::get(Div->getType(), HiBound)); return replaceInstUsesWith(Cmp, - insertRangeTest(X, LoBound->getUniqueInteger(), - HiBound->getUniqueInteger(), + insertRangeTest(X, LoBound, HiBound, DivIsSigned, false)); case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: @@ -2309,7 +2266,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, return replaceInstUsesWith(Cmp, Builder.getTrue()); if (LoOverflow == -1) // Low bound is less than input range. return replaceInstUsesWith(Cmp, Builder.getFalse()); - return new ICmpInst(Pred, X, LoBound); + return new ICmpInst(Pred, X, ConstantInt::get(Div->getType(), LoBound)); case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: if (HiOverflow == +1) // High bound greater than input range. @@ -2317,8 +2274,10 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, if (HiOverflow == -1) // High bound less than input range. return replaceInstUsesWith(Cmp, Builder.getTrue()); if (Pred == ICmpInst::ICMP_UGT) - return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); - return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); + return new ICmpInst(ICmpInst::ICMP_UGE, X, + ConstantInt::get(Div->getType(), HiBound)); + return new ICmpInst(ICmpInst::ICMP_SGE, X, + ConstantInt::get(Div->getType(), HiBound)); } return nullptr; @@ -2327,7 +2286,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, /// Fold icmp (sub X, Y), C. Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, BinaryOperator *Sub, - const APInt *C) { + const APInt &C) { Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1); ICmpInst::Predicate Pred = Cmp.getPredicate(); @@ -2338,19 +2297,19 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, if (Sub->hasNoSignedWrap()) { // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y) - if (Pred == ICmpInst::ICMP_SGT && C->isAllOnesValue()) + if (Pred == ICmpInst::ICMP_SGT && C.isAllOnesValue()) return new ICmpInst(ICmpInst::ICMP_SGE, X, Y); // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y) - if (Pred == ICmpInst::ICMP_SGT && C->isNullValue()) + if (Pred == ICmpInst::ICMP_SGT && C.isNullValue()) return new ICmpInst(ICmpInst::ICMP_SGT, X, Y); // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y) - if (Pred == ICmpInst::ICMP_SLT && C->isNullValue()) + if (Pred == ICmpInst::ICMP_SLT && C.isNullValue()) return new ICmpInst(ICmpInst::ICMP_SLT, X, Y); // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y) - if (Pred == ICmpInst::ICMP_SLT && C->isOneValue()) + if (Pred == ICmpInst::ICMP_SLT && C.isOneValue()) return new ICmpInst(ICmpInst::ICMP_SLE, X, Y); } @@ -2360,14 +2319,14 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, // C2 - Y (Y | (C - 1)) == C2 // iff (C2 & (C - 1)) == C - 1 and C is a power of 2 - if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && - (*C2 & (*C - 1)) == (*C - 1)) - return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, *C - 1), X); + if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && + (*C2 & (C - 1)) == (C - 1)) + return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, C - 1), X); // C2 - Y >u C -> (Y | C) != C2 // iff C2 & C == C and C + 1 is a power of 2 - if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == *C) - return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, *C), X); + if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C) + return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X); return nullptr; } @@ -2375,7 +2334,7 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, /// Fold icmp (add X, Y), C. Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, BinaryOperator *Add, - const APInt *C) { + const APInt &C) { Value *Y = Add->getOperand(1); const APInt *C2; if (Cmp.isEquality() || !match(Y, m_APInt(C2))) @@ -2392,7 +2351,7 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, if (Add->hasNoSignedWrap() && (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) { bool Overflow; - APInt NewC = C->ssub_ov(*C2, Overflow); + APInt NewC = C.ssub_ov(*C2, Overflow); // If there is overflow, the result must be true or false. // TODO: Can we assert there is no overflow because InstSimplify always // handles those cases? @@ -2401,7 +2360,7 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC)); } - auto CR = ConstantRange::makeExactICmpRegion(Pred, *C).subtract(*C2); + auto CR = ConstantRange::makeExactICmpRegion(Pred, C).subtract(*C2); const APInt &Upper = CR.getUpper(); const APInt &Lower = CR.getLower(); if (Cmp.isSigned()) { @@ -2422,15 +2381,15 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, // X+C (X & -C2) == C // iff C & (C2-1) == 0 // C2 is a power of 2 - if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && (*C2 & (*C - 1)) == 0) - return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -(*C)), + if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && (*C2 & (C - 1)) == 0) + return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -C), ConstantExpr::getNeg(cast(Y))); // X+C >u C2 -> (X & ~C2) != C // iff C & C2 == 0 // C2+1 is a power of 2 - if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == 0) - return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~(*C)), + if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == 0) + return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C), ConstantExpr::getNeg(cast(Y))); return nullptr; @@ -2517,51 +2476,51 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { if (auto *BO = dyn_cast(Cmp.getOperand(0))) { switch (BO->getOpcode()) { case Instruction::Xor: - if (Instruction *I = foldICmpXorConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpXorConstant(Cmp, BO, *C)) return I; break; case Instruction::And: - if (Instruction *I = foldICmpAndConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpAndConstant(Cmp, BO, *C)) return I; break; case Instruction::Or: - if (Instruction *I = foldICmpOrConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpOrConstant(Cmp, BO, *C)) return I; break; case Instruction::Mul: - if (Instruction *I = foldICmpMulConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpMulConstant(Cmp, BO, *C)) return I; break; case Instruction::Shl: - if (Instruction *I = foldICmpShlConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpShlConstant(Cmp, BO, *C)) return I; break; case Instruction::LShr: case Instruction::AShr: - if (Instruction *I = foldICmpShrConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C)) return I; break; case Instruction::UDiv: - if (Instruction *I = foldICmpUDivConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C)) return I; LLVM_FALLTHROUGH; case Instruction::SDiv: - if (Instruction *I = foldICmpDivConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpDivConstant(Cmp, BO, *C)) return I; break; case Instruction::Sub: - if (Instruction *I = foldICmpSubConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpSubConstant(Cmp, BO, *C)) return I; break; case Instruction::Add: - if (Instruction *I = foldICmpAddConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpAddConstant(Cmp, BO, *C)) return I; break; default: break; } // TODO: These folds could be refactored to be part of the above calls. - if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, C)) + if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, *C)) return I; } @@ -2577,11 +2536,11 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { } if (auto *TI = dyn_cast(Cmp.getOperand(0))) { - if (Instruction *I = foldICmpTruncConstant(Cmp, TI, C)) + if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C)) return I; } - if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, C)) + if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C)) return I; return nullptr; @@ -2591,7 +2550,7 @@ Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) { /// icmp eq/ne BO, C. Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, BinaryOperator *BO, - const APInt *C) { + const APInt &C) { // TODO: Some of these folds could work with arbitrary constants, but this // function is limited to scalar and vector splat constants. if (!Cmp.isEquality()) @@ -2605,7 +2564,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, switch (BO->getOpcode()) { case Instruction::SRem: // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one. - if (C->isNullValue() && BO->hasOneUse()) { + if (C.isNullValue() && BO->hasOneUse()) { const APInt *BOC; if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) { Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName()); @@ -2622,7 +2581,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, Constant *SubC = ConstantExpr::getSub(RHS, cast(BOp1)); return new ICmpInst(Pred, BOp0, SubC); } - } else if (C->isNullValue()) { + } else if (C.isNullValue()) { // Replace ((add A, B) != 0) with (A != -B) if A or B is // efficiently invertible, or if the add has just this one use. if (Value *NegVal = dyn_castNegVal(BOp1)) @@ -2643,7 +2602,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // For the xor case, we can xor two constants together, eliminating // the explicit xor. return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC)); - } else if (C->isNullValue()) { + } else if (C.isNullValue()) { // Replace ((xor A, B) != 0) with (A != B) return new ICmpInst(Pred, BOp0, BOp1); } @@ -2656,7 +2615,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // Replace ((sub BOC, B) != C) with (B != BOC-C). Constant *SubC = ConstantExpr::getSub(cast(BOp0), RHS); return new ICmpInst(Pred, BOp1, SubC); - } else if (C->isNullValue()) { + } else if (C.isNullValue()) { // Replace ((sub A, B) != 0) with (A != B). return new ICmpInst(Pred, BOp0, BOp1); } @@ -2678,7 +2637,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, const APInt *BOC; if (match(BOp1, m_APInt(BOC))) { // If we have ((X & C) == C), turn it into ((X & C) != 0). - if (C == BOC && C->isPowerOf2()) + if (C == *BOC && C.isPowerOf2()) return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, BO, Constant::getNullValue(RHS->getType())); @@ -2694,7 +2653,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, } // ((X & ~7) == 0) --> X < 8 - if (C->isNullValue() && (~(*BOC) + 1).isPowerOf2()) { + if (C.isNullValue() && (~(*BOC) + 1).isPowerOf2()) { Constant *NegBOC = ConstantExpr::getNeg(cast(BOp1)); auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; return new ICmpInst(NewPred, BOp0, NegBOC); @@ -2703,7 +2662,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, break; } case Instruction::Mul: - if (C->isNullValue() && BO->hasNoSignedWrap()) { + if (C.isNullValue() && BO->hasNoSignedWrap()) { const APInt *BOC; if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) { // The trivial case (mul X, 0) is handled by InstSimplify. @@ -2714,7 +2673,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, } break; case Instruction::UDiv: - if (C->isNullValue()) { + if (C.isNullValue()) { // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A) auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT; return new ICmpInst(NewPred, BOp1, BOp0); @@ -2728,7 +2687,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C. Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, - const APInt *C) { + const APInt &C) { IntrinsicInst *II = dyn_cast(Cmp.getOperand(0)); if (!II || !Cmp.isEquality()) return nullptr; @@ -2739,13 +2698,13 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, case Intrinsic::bswap: Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); - Cmp.setOperand(1, ConstantInt::get(Ty, C->byteSwap())); + Cmp.setOperand(1, ConstantInt::get(Ty, C.byteSwap())); return &Cmp; case Intrinsic::ctlz: case Intrinsic::cttz: // ctz(A) == bitwidth(A) -> A == 0 and likewise for != - if (*C == C->getBitWidth()) { + if (C == C.getBitWidth()) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); Cmp.setOperand(1, ConstantInt::getNullValue(Ty)); @@ -2756,8 +2715,8 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, case Intrinsic::ctpop: { // popcount(A) == 0 -> A == 0 and likewise for != // popcount(A) == bitwidth(A) -> A == -1 and likewise for != - bool IsZero = C->isNullValue(); - if (IsZero || *C == C->getBitWidth()) { + bool IsZero = C.isNullValue(); + if (IsZero || C == C.getBitWidth()) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); auto *NewOp = @@ -4509,6 +4468,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) return nullptr; + // Do this after checking for min/max to prevent infinite looping. + if (Instruction *Res = foldICmpWithZero(I)) + return Res; + // FIXME: We only do this after checking for min/max to prevent infinite // looping caused by a reverse canonicalization of these patterns for min/max. // FIXME: The organization of folds is a mess. These would naturally go into diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 22edcfa044415..51ba30a986074 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -6,42 +6,59 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// /// This file provides internal interfaces used to implement the InstCombine. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H #define LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Dominators.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Operator.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/Pass.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/Utils/Local.h" +#include +#include #define DEBUG_TYPE "instcombine" namespace llvm { + +class APInt; +class AssumptionCache; class CallSite; class DataLayout; class DominatorTree; -class TargetLibraryInfo; -class MemIntrinsic; -class MemSetInst; +class GEPOperator; +class GlobalVariable; +class LoopInfo; class OptimizationRemarkEmitter; +class TargetLibraryInfo; +class User; /// Assign a complexity or rank value to LLVM Values. This is used to reduce /// the amount of pattern matching needed for compares and commutative @@ -109,6 +126,7 @@ static inline Value *peekThroughBitcast(Value *V, bool OneUseOnly = false) { static inline Constant *AddOne(Constant *C) { return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); } + /// \brief Subtract one from a Constant static inline Constant *SubOne(Constant *C) { return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); @@ -118,7 +136,6 @@ static inline Constant *SubOne(Constant *C) { /// This happens in cases where the ~ can be eliminated. If WillInvertAllUses /// is true, work under the assumption that the caller intends to remove all /// uses of V and only keep uses of ~V. -/// static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) { // ~(~(X)) -> X. if (BinaryOperator::isNot(V)) @@ -161,7 +178,6 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) { return false; } - /// \brief Specific patterns of overflow check idioms that we match. enum OverflowCheckFlavor { OCF_UNSIGNED_ADD, @@ -209,12 +225,13 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner /// \brief An IRBuilder that automatically inserts new instructions into the /// worklist. - typedef IRBuilder BuilderTy; + using BuilderTy = IRBuilder; BuilderTy &Builder; private: // Mode in which we are running the combiner. const bool MinimizeSize; + /// Enable combines that trigger rarely but are costly in compiletime. const bool ExpensiveCombines; @@ -227,11 +244,12 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner const DataLayout &DL; const SimplifyQuery SQ; OptimizationRemarkEmitter &ORE; + // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. LoopInfo *LI; - bool MadeIRChange; + bool MadeIRChange = false; public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, @@ -241,7 +259,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI), MadeIRChange(false) {} + DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {} /// \brief Run the combiner over the entire worklist until it is empty. /// @@ -277,7 +295,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *visitURem(BinaryOperator &I); Instruction *visitSRem(BinaryOperator &I); Instruction *visitFRem(BinaryOperator &I); - bool SimplifyDivRemOfSelect(BinaryOperator &I); + bool simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I); Instruction *commonRemTransforms(BinaryOperator &I); Instruction *commonIRemTransforms(BinaryOperator &I); Instruction *commonDivTransforms(BinaryOperator &I); @@ -413,27 +431,32 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner bool DoTransform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); + bool willNotOverflowSignedAdd(const Value *LHS, const Value *RHS, const Instruction &CxtI) const { return computeOverflowForSignedAdd(LHS, RHS, &CxtI) == OverflowResult::NeverOverflows; - }; + } + bool willNotOverflowUnsignedAdd(const Value *LHS, const Value *RHS, const Instruction &CxtI) const { return computeOverflowForUnsignedAdd(LHS, RHS, &CxtI) == OverflowResult::NeverOverflows; - }; + } + bool willNotOverflowSignedSub(const Value *LHS, const Value *RHS, const Instruction &CxtI) const; bool willNotOverflowUnsignedSub(const Value *LHS, const Value *RHS, const Instruction &CxtI) const; bool willNotOverflowSignedMul(const Value *LHS, const Value *RHS, const Instruction &CxtI) const; + bool willNotOverflowUnsignedMul(const Value *LHS, const Value *RHS, const Instruction &CxtI) const { return computeOverflowForUnsignedMul(LHS, RHS, &CxtI) == OverflowResult::NeverOverflows; - }; + } + Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask); @@ -548,6 +571,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner unsigned Depth, const Instruction *CxtI) const { llvm::computeKnownBits(V, Known, DL, Depth, &AC, CxtI, &DT); } + KnownBits computeKnownBits(const Value *V, unsigned Depth, const Instruction *CxtI) const { return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT); @@ -563,20 +587,24 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner const Instruction *CxtI = nullptr) const { return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT); } + unsigned ComputeNumSignBits(const Value *Op, unsigned Depth = 0, const Instruction *CxtI = nullptr) const { return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT); } + OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const Instruction *CxtI) const { return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT); } + OverflowResult computeOverflowForUnsignedAdd(const Value *LHS, const Value *RHS, const Instruction *CxtI) const { return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT); } + OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS, const Instruction *CxtI) const { @@ -626,6 +654,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, unsigned Depth = 0); + /// Helper routine of SimplifyDemandedUseBits. It computes KnownZero/KnownOne /// bits. It also tries to handle simplifications that can be done based on /// DemandedMask, but without modifying the Instruction. @@ -633,6 +662,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner const APInt &DemandedMask, KnownBits &Known, unsigned Depth, Instruction *CxtI); + /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. Value *simplifyShrShlDemandedBits( @@ -663,6 +693,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner /// This is a convenience wrapper function for the above two functions. Instruction *foldOpWithConstantIntoOperand(BinaryOperator &I); + Instruction *foldAddWithConstant(BinaryOperator &Add); + /// \brief Try to rotate an operation below a PHI node, using PHI nodes for /// its operands. Instruction *FoldPHIArgOpIntoPHI(PHINode &PN); @@ -671,9 +703,14 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); Instruction *FoldPHIArgZextsIntoPHI(PHINode &PN); - /// Helper function for FoldPHIArgXIntoPHI() to get debug location for the + /// If an integer typed PHI has only one use which is an IntToPtr operation, + /// replace the PHI with an existing pointer typed PHI if it exists. Otherwise + /// insert a new pointer typed PHI and replace the original one. + Instruction *FoldIntegerTypedPHI(PHINode &PN); + + /// Helper function for FoldPHIArgXIntoPHI() to set debug location for the /// folded operation. - DebugLoc PHIArgMergedDebugLoc(PHINode &PN); + void PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN); Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); @@ -694,35 +731,36 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp); Instruction *foldICmpBinOp(ICmpInst &Cmp); Instruction *foldICmpEquality(ICmpInst &Cmp); + Instruction *foldICmpWithZero(ICmpInst &Cmp); Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select, ConstantInt *C); Instruction *foldICmpTruncConstant(ICmpInst &Cmp, TruncInst *Trunc, - const APInt *C); + const APInt &C); Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And, - const APInt *C); + const APInt &C); Instruction *foldICmpXorConstant(ICmpInst &Cmp, BinaryOperator *Xor, - const APInt *C); + const APInt &C); Instruction *foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, - const APInt *C); + const APInt &C); Instruction *foldICmpMulConstant(ICmpInst &Cmp, BinaryOperator *Mul, - const APInt *C); + const APInt &C); Instruction *foldICmpShlConstant(ICmpInst &Cmp, BinaryOperator *Shl, - const APInt *C); + const APInt &C); Instruction *foldICmpShrConstant(ICmpInst &Cmp, BinaryOperator *Shr, - const APInt *C); + const APInt &C); Instruction *foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv, - const APInt *C); + const APInt &C); Instruction *foldICmpDivConstant(ICmpInst &Cmp, BinaryOperator *Div, - const APInt *C); + const APInt &C); Instruction *foldICmpSubConstant(ICmpInst &Cmp, BinaryOperator *Sub, - const APInt *C); + const APInt &C); Instruction *foldICmpAddConstant(ICmpInst &Cmp, BinaryOperator *Add, - const APInt *C); + const APInt &C); Instruction *foldICmpAndConstConst(ICmpInst &Cmp, BinaryOperator *And, - const APInt *C1); + const APInt &C1); Instruction *foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, - const APInt *C1, const APInt *C2); + const APInt &C1, const APInt &C2); Instruction *foldICmpShrConstConst(ICmpInst &I, Value *ShAmt, const APInt &C1, const APInt &C2); Instruction *foldICmpShlConstConst(ICmpInst &I, Value *ShAmt, const APInt &C1, @@ -730,8 +768,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, BinaryOperator *BO, - const APInt *C); - Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, const APInt *C); + const APInt &C); + Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, const APInt &C); // Helpers of visitSelectInst(). Instruction *foldSelectExtConst(SelectInst &Sel); @@ -764,8 +802,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); }; -} // end namespace llvm. +} // end namespace llvm #undef DEBUG_TYPE -#endif +#endif // LLVM_LIB_TRANSFORMS_INSTCOMBINE_INSTCOMBINEINTERNAL_H diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 451036545741a..5d2402361ad37 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -1544,8 +1544,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { SI.getSyncScopeID()); InsertNewInstBefore(NewSI, *BBI); // The debug locations of the original instructions might differ; merge them. - NewSI->setDebugLoc(DILocation::getMergedLocation(SI.getDebugLoc(), - OtherStore->getDebugLoc())); + NewSI->applyMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc()); // If the two stores had AA tags, merge them. AAMDNodes AATags; diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 0f762710fdeed..e6b9753826715 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -13,15 +13,36 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" +#include +#include +#include +#include + using namespace llvm; using namespace PatternMatch; #define DEBUG_TYPE "instcombine" - /// The specific integer value is used in a context where it is known to be /// non-zero. If this allows us to simplify the computation, do so and return /// the new operand, otherwise return null. @@ -73,7 +94,6 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, return MadeChange ? V : nullptr; } - /// True if the multiply can not be expressed in an int this size. static bool MultiplyOverflows(const APInt &C1, const APInt &C2, APInt &Product, bool IsSigned) { @@ -540,7 +560,6 @@ static bool isFMulOrFDivWithConstant(Value *V) { /// This function is to simplify "FMulOrDiv * C" and returns the /// resulting expression. Note that this function could return NULL in /// case the constants cannot be folded into a normal floating-point. -/// Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, Constant *C, Instruction *InsertBefore) { assert(isFMulOrFDivWithConstant(FMulOrDiv) && "V is invalid"); @@ -747,7 +766,6 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { // latency of the instruction Y is amortized by the expression of X*X, // and therefore Y is in a "less critical" position compared to what it // was before the transformation. - // if (AllowReassociate) { Value *Opnd0_0, *Opnd0_1; if (Opnd0->hasOneUse() && @@ -778,24 +796,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { return Changed ? &I : nullptr; } -/// Try to fold a divide or remainder of a select instruction. -bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { - SelectInst *SI = cast(I.getOperand(1)); - - // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y - int NonNullOperand = -1; - if (Constant *ST = dyn_cast(SI->getOperand(1))) - if (ST->isNullValue()) - NonNullOperand = 2; - // div/rem X, (Cond ? Y : 0) -> div/rem X, Y - if (Constant *ST = dyn_cast(SI->getOperand(2))) - if (ST->isNullValue()) - NonNullOperand = 1; - - if (NonNullOperand == -1) +/// Fold a divide or remainder with a select instruction divisor when one of the +/// select operands is zero. In that case, we can use the other select operand +/// because div/rem by zero is undefined. +bool InstCombiner::simplifyDivRemOfSelectWithZeroOp(BinaryOperator &I) { + SelectInst *SI = dyn_cast(I.getOperand(1)); + if (!SI) return false; - Value *SelectCond = SI->getOperand(0); + int NonNullOperand; + if (match(SI->getTrueValue(), m_Zero())) + // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y + NonNullOperand = 2; + else if (match(SI->getFalseValue(), m_Zero())) + // div/rem X, (Cond ? Y : 0) -> div/rem X, Y + NonNullOperand = 1; + else + return false; // Change the div/rem to use 'Y' instead of the select. I.setOperand(1, SI->getOperand(NonNullOperand)); @@ -808,12 +825,13 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { // If the select and condition only have a single use, don't bother with this, // early exit. + Value *SelectCond = SI->getCondition(); if (SI->use_empty() && SelectCond->hasOneUse()) return true; // Scan the current block backward, looking for other uses of SI. BasicBlock::iterator BBI = I.getIterator(), BBFront = I.getParent()->begin(); - + Type *CondTy = SelectCond->getType(); while (BBI != BBFront) { --BBI; // If we found a call to a function, we can't assume it will return, so @@ -828,7 +846,8 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { *I = SI->getOperand(NonNullOperand); Worklist.Add(&*BBI); } else if (*I == SelectCond) { - *I = Builder.getInt1(NonNullOperand == 1); + *I = NonNullOperand == 1 ? ConstantInt::getTrue(CondTy) + : ConstantInt::getFalse(CondTy); Worklist.Add(&*BBI); } } @@ -847,7 +866,6 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { return true; } - /// This function implements the transforms common to both integer division /// instructions (udiv and sdiv). It is called by the visitors to those integer /// division instructions. @@ -863,7 +881,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { // Handle cases involving: [su]div X, (select Cond, Y, Z) // This does not apply for fdiv. - if (isa(Op1) && SimplifyDivRemOfSelect(I)) + if (simplifyDivRemOfSelectWithZeroOp(I)) return &I; if (Instruction *LHS = dyn_cast(Op0)) { @@ -973,25 +991,29 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return nullptr; } +static const unsigned MaxDepth = 6; + namespace { -const unsigned MaxDepth = 6; -typedef Instruction *(*FoldUDivOperandCb)(Value *Op0, Value *Op1, - const BinaryOperator &I, - InstCombiner &IC); + +using FoldUDivOperandCb = Instruction *(*)(Value *Op0, Value *Op1, + const BinaryOperator &I, + InstCombiner &IC); /// \brief Used to maintain state for visitUDivOperand(). struct UDivFoldAction { - FoldUDivOperandCb FoldAction; ///< Informs visitUDiv() how to fold this - ///< operand. This can be zero if this action - ///< joins two actions together. + /// Informs visitUDiv() how to fold this operand. This can be zero if this + /// action joins two actions together. + FoldUDivOperandCb FoldAction; + + /// Which operand to fold. + Value *OperandToFold; - Value *OperandToFold; ///< Which operand to fold. union { - Instruction *FoldResult; ///< The instruction returned when FoldAction is - ///< invoked. + /// The instruction returned when FoldAction is invoked. + Instruction *FoldResult; - size_t SelectLHSIdx; ///< Stores the LHS action index if this action - ///< joins two actions together. + /// Stores the LHS action index if this action joins two actions together. + size_t SelectLHSIdx; }; UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand) @@ -999,7 +1021,8 @@ struct UDivFoldAction { UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS) : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {} }; -} + +} // end anonymous namespace // X udiv 2^C -> X >> C static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1, @@ -1279,8 +1302,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { /// 1) 1/C is exact, or /// 2) reciprocal is allowed. /// If the conversion was successful, the simplified expression "X * 1/C" is -/// returned; otherwise, NULL is returned. -/// +/// returned; otherwise, nullptr is returned. static Instruction *CvtFDivConstToReciprocal(Value *Dividend, Constant *Divisor, bool AllowReciprocal) { if (!isa(Divisor)) // TODO: handle vectors. @@ -1341,7 +1363,6 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Res = BinaryOperator::CreateFMul(X, C); } else if (match(Op0, m_FDiv(m_Value(X), m_Constant(C1)))) { // (X/C1)/C2 => X /(C2*C1) [=> X * 1/(C2*C1) if reciprocal is allowed] - // Constant *C = ConstantExpr::getFMul(C1, C2); if (isNormalFp(C)) { Res = CvtFDivConstToReciprocal(X, C, AllowReciprocal); @@ -1399,7 +1420,6 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) { // (X/Y) / Z => X / (Y*Z) - // if (!isa(Y) || !isa(Op1)) { NewInst = Builder.CreateFMul(Y, Op1); if (Instruction *RI = dyn_cast(NewInst)) { @@ -1411,7 +1431,6 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { } } else if (Op1->hasOneUse() && match(Op1, m_FDiv(m_Value(X), m_Value(Y)))) { // Z / (X/Y) => Z*Y / X - // if (!isa(Y) || !isa(Op0)) { NewInst = Builder.CreateFMul(Op0, Y); if (Instruction *RI = dyn_cast(NewInst)) { @@ -1458,7 +1477,7 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { } // Handle cases involving: rem X, (select Cond, Y, Z) - if (isa(Op1) && SimplifyDivRemOfSelect(I)) + if (simplifyDivRemOfSelectWithZeroOp(I)) return &I; if (isa(Op1)) { @@ -1467,7 +1486,6 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; } else if (auto *PN = dyn_cast(Op0I)) { - using namespace llvm::PatternMatch; const APInt *Op1Int; if (match(Op1, m_APInt(Op1Int)) && !Op1Int->isMinValue() && (I.getOpcode() == Instruction::URem || @@ -1613,7 +1631,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { return replaceInstUsesWith(I, V); // Handle cases involving: rem X, (select Cond, Y, Z) - if (isa(Op1) && SimplifyDivRemOfSelect(I)) + if (simplifyDivRemOfSelectWithZeroOp(I)) return &I; return nullptr; diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 0011412c2bf47..45d448075d687 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -27,16 +27,249 @@ using namespace llvm::PatternMatch; /// The PHI arguments will be folded into a single operation with a PHI node /// as input. The debug location of the single operation will be the merged /// locations of the original PHI node arguments. -DebugLoc InstCombiner::PHIArgMergedDebugLoc(PHINode &PN) { +void InstCombiner::PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN) { auto *FirstInst = cast(PN.getIncomingValue(0)); - const DILocation *Loc = FirstInst->getDebugLoc(); + Inst->setDebugLoc(FirstInst->getDebugLoc()); + // We do not expect a CallInst here, otherwise, N-way merging of DebugLoc + // will be inefficient. + assert(!isa(Inst)); for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) { auto *I = cast(PN.getIncomingValue(i)); - Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc()); + Inst->applyMergedLocation(Inst->getDebugLoc(), I->getDebugLoc()); + } +} + +// Replace Integer typed PHI PN if the PHI's value is used as a pointer value. +// If there is an existing pointer typed PHI that produces the same value as PN, +// replace PN and the IntToPtr operation with it. Otherwise, synthesize a new +// PHI node: +// +// Case-1: +// bb1: +// int_init = PtrToInt(ptr_init) +// br label %bb2 +// bb2: +// int_val = PHI([int_init, %bb1], [int_val_inc, %bb2] +// ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2] +// ptr_val2 = IntToPtr(int_val) +// ... +// use(ptr_val2) +// ptr_val_inc = ... +// inc_val_inc = PtrToInt(ptr_val_inc) +// +// ==> +// bb1: +// br label %bb2 +// bb2: +// ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2] +// ... +// use(ptr_val) +// ptr_val_inc = ... +// +// Case-2: +// bb1: +// int_ptr = BitCast(ptr_ptr) +// int_init = Load(int_ptr) +// br label %bb2 +// bb2: +// int_val = PHI([int_init, %bb1], [int_val_inc, %bb2] +// ptr_val2 = IntToPtr(int_val) +// ... +// use(ptr_val2) +// ptr_val_inc = ... +// inc_val_inc = PtrToInt(ptr_val_inc) +// ==> +// bb1: +// ptr_init = Load(ptr_ptr) +// br label %bb2 +// bb2: +// ptr_val = PHI([ptr_init, %bb1], [ptr_val_inc, %bb2] +// ... +// use(ptr_val) +// ptr_val_inc = ... +// ... +// +Instruction *InstCombiner::FoldIntegerTypedPHI(PHINode &PN) { + if (!PN.getType()->isIntegerTy()) + return nullptr; + if (!PN.hasOneUse()) + return nullptr; + + auto *IntToPtr = dyn_cast(PN.user_back()); + if (!IntToPtr) + return nullptr; + + // Check if the pointer is actually used as pointer: + auto HasPointerUse = [](Instruction *IIP) { + for (User *U : IIP->users()) { + Value *Ptr = nullptr; + if (LoadInst *LoadI = dyn_cast(U)) { + Ptr = LoadI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast(U)) { + Ptr = SI->getPointerOperand(); + } else if (GetElementPtrInst *GI = dyn_cast(U)) { + Ptr = GI->getPointerOperand(); + } + + if (Ptr && Ptr == IIP) + return true; + } + return false; + }; + + if (!HasPointerUse(IntToPtr)) + return nullptr; + + if (DL.getPointerSizeInBits(IntToPtr->getAddressSpace()) != + DL.getTypeSizeInBits(IntToPtr->getOperand(0)->getType())) + return nullptr; + + SmallVector AvailablePtrVals; + for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) { + Value *Arg = PN.getIncomingValue(i); + + // First look backward: + if (auto *PI = dyn_cast(Arg)) { + AvailablePtrVals.emplace_back(PI->getOperand(0)); + continue; + } + + // Next look forward: + Value *ArgIntToPtr = nullptr; + for (User *U : Arg->users()) { + if (isa(U) && U->getType() == IntToPtr->getType() && + (DT.dominates(cast(U), PN.getIncomingBlock(i)) || + cast(U)->getParent() == PN.getIncomingBlock(i))) { + ArgIntToPtr = U; + break; + } + } + + if (ArgIntToPtr) { + AvailablePtrVals.emplace_back(ArgIntToPtr); + continue; + } + + // If Arg is defined by a PHI, allow it. This will also create + // more opportunities iteratively. + if (isa(Arg)) { + AvailablePtrVals.emplace_back(Arg); + continue; + } + + // For a single use integer load: + auto *LoadI = dyn_cast(Arg); + if (!LoadI) + return nullptr; + + if (!LoadI->hasOneUse()) + return nullptr; + + // Push the integer typed Load instruction into the available + // value set, and fix it up later when the pointer typed PHI + // is synthesized. + AvailablePtrVals.emplace_back(LoadI); + } + + // Now search for a matching PHI + auto *BB = PN.getParent(); + assert(AvailablePtrVals.size() == PN.getNumIncomingValues() && + "Not enough available ptr typed incoming values"); + PHINode *MatchingPtrPHI = nullptr; + for (auto II = BB->begin(), EI = BasicBlock::iterator(BB->getFirstNonPHI()); + II != EI; II++) { + PHINode *PtrPHI = dyn_cast(II); + if (!PtrPHI || PtrPHI == &PN || PtrPHI->getType() != IntToPtr->getType()) + continue; + MatchingPtrPHI = PtrPHI; + for (unsigned i = 0; i != PtrPHI->getNumIncomingValues(); ++i) { + if (AvailablePtrVals[i] != + PtrPHI->getIncomingValueForBlock(PN.getIncomingBlock(i))) { + MatchingPtrPHI = nullptr; + break; + } + } + + if (MatchingPtrPHI) + break; + } + + if (MatchingPtrPHI) { + assert(MatchingPtrPHI->getType() == IntToPtr->getType() && + "Phi's Type does not match with IntToPtr"); + // The PtrToCast + IntToPtr will be simplified later + return CastInst::CreateBitOrPointerCast(MatchingPtrPHI, + IntToPtr->getOperand(0)->getType()); } - return Loc; + // If it requires a conversion for every PHI operand, do not do it. + if (std::all_of(AvailablePtrVals.begin(), AvailablePtrVals.end(), + [&](Value *V) { + return (V->getType() != IntToPtr->getType()) || + isa(V); + })) + return nullptr; + + // If any of the operand that requires casting is a terminator + // instruction, do not do it. + if (std::any_of(AvailablePtrVals.begin(), AvailablePtrVals.end(), + [&](Value *V) { + return (V->getType() != IntToPtr->getType()) && + isa(V); + })) + return nullptr; + + PHINode *NewPtrPHI = PHINode::Create( + IntToPtr->getType(), PN.getNumIncomingValues(), PN.getName() + ".ptr"); + + InsertNewInstBefore(NewPtrPHI, PN); + SmallDenseMap Casts; + for (unsigned i = 0; i != PN.getNumIncomingValues(); ++i) { + auto *IncomingBB = PN.getIncomingBlock(i); + auto *IncomingVal = AvailablePtrVals[i]; + + if (IncomingVal->getType() == IntToPtr->getType()) { + NewPtrPHI->addIncoming(IncomingVal, IncomingBB); + continue; + } + +#ifndef NDEBUG + LoadInst *LoadI = dyn_cast(IncomingVal); + assert((isa(IncomingVal) || + IncomingVal->getType()->isPointerTy() || + (LoadI && LoadI->hasOneUse())) && + "Can not replace LoadInst with multiple uses"); +#endif + // Need to insert a BitCast. + // For an integer Load instruction with a single use, the load + IntToPtr + // cast will be simplified into a pointer load: + // %v = load i64, i64* %a.ip, align 8 + // %v.cast = inttoptr i64 %v to float ** + // ==> + // %v.ptrp = bitcast i64 * %a.ip to float ** + // %v.cast = load float *, float ** %v.ptrp, align 8 + Instruction *&CI = Casts[IncomingVal]; + if (!CI) { + CI = CastInst::CreateBitOrPointerCast(IncomingVal, IntToPtr->getType(), + IncomingVal->getName() + ".ptr"); + if (auto *IncomingI = dyn_cast(IncomingVal)) { + BasicBlock::iterator InsertPos(IncomingI); + InsertPos++; + if (isa(IncomingI)) + InsertPos = IncomingI->getParent()->getFirstInsertionPt(); + InsertNewInstBefore(CI, *InsertPos); + } else { + auto *InsertBB = &IncomingBB->getParent()->getEntryBlock(); + InsertNewInstBefore(CI, *InsertBB->getFirstInsertionPt()); + } + } + NewPtrPHI->addIncoming(CI, IncomingBB); + } + + // The PtrToCast + IntToPtr will be simplified later + return CastInst::CreateBitOrPointerCast(NewPtrPHI, + IntToPtr->getOperand(0)->getType()); } /// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the @@ -117,7 +350,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { if (CmpInst *CIOp = dyn_cast(FirstInst)) { CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), LHSVal, RHSVal); - NewCI->setDebugLoc(PHIArgMergedDebugLoc(PN)); + PHIArgMergedDebugLoc(NewCI, PN); return NewCI; } @@ -130,7 +363,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) NewBinOp->andIRFlags(PN.getIncomingValue(i)); - NewBinOp->setDebugLoc(PHIArgMergedDebugLoc(PN)); + PHIArgMergedDebugLoc(NewBinOp, PN); return NewBinOp; } @@ -239,7 +472,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base, makeArrayRef(FixedOperands).slice(1)); if (AllInBounds) NewGEP->setIsInBounds(); - NewGEP->setDebugLoc(PHIArgMergedDebugLoc(PN)); + PHIArgMergedDebugLoc(NewGEP, PN); return NewGEP; } @@ -399,7 +632,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { for (Value *IncValue : PN.incoming_values()) cast(IncValue)->setVolatile(false); - NewLI->setDebugLoc(PHIArgMergedDebugLoc(PN)); + PHIArgMergedDebugLoc(NewLI, PN); return NewLI; } @@ -565,7 +798,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { if (CastInst *FirstCI = dyn_cast(FirstInst)) { CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType()); - NewCI->setDebugLoc(PHIArgMergedDebugLoc(PN)); + PHIArgMergedDebugLoc(NewCI, PN); return NewCI; } @@ -576,14 +809,14 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) BinOp->andIRFlags(PN.getIncomingValue(i)); - BinOp->setDebugLoc(PHIArgMergedDebugLoc(PN)); + PHIArgMergedDebugLoc(BinOp, PN); return BinOp; } CmpInst *CIOp = cast(FirstInst); CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(), PhiVal, ConstantOp); - NewCI->setDebugLoc(PHIArgMergedDebugLoc(PN)); + PHIArgMergedDebugLoc(NewCI, PN); return NewCI; } @@ -902,6 +1135,9 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // this PHI only has a single use (a PHI), and if that PHI only has one use (a // PHI)... break the cycle. if (PN.hasOneUse()) { + if (Instruction *Result = FoldIntegerTypedPHI(PN)) + return Result; + Instruction *PHIUser = cast(PN.user_back()); if (PHINode *PU = dyn_cast(PHIUser)) { SmallPtrSet PotentiallyDeadPHIs; diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index c21a6d1bdaf7c..876b8ce6ae4a7 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -12,13 +12,36 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CmpInstAnalysis.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" +#include +#include + using namespace llvm; using namespace PatternMatch; @@ -185,7 +208,6 @@ static Value *foldSelectICmpAnd(Type *SelType, const ICmpInst *IC, /// Assuming that the specified instruction is an operand to the select, return /// a bitmask indicating which operands of this instruction are foldable if they /// equal the other incoming value of the select. -/// static unsigned getSelectFoldableOperands(BinaryOperator *I) { switch (I->getOpcode()) { case Instruction::Add: @@ -263,7 +285,6 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI, if (TI->getOpcode() != Instruction::BitCast && (!TI->hasOneUse() || !FI->hasOneUse())) return nullptr; - } else if (!TI->hasOneUse() || !FI->hasOneUse()) { // TODO: The one-use restrictions for a scalar select could be eased if // the fold of a select in visitLoadInst() was enhanced to match a pattern @@ -840,7 +861,6 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, /// Z = select X, Y, 0 /// /// because Y is not live in BB1/BB2. -/// static bool canSelectOperandBeMappingIntoPredBlock(const Value *V, const SelectInst &SI) { // If the value is a non-instruction value like a constant or argument, it @@ -1209,7 +1229,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // may have an undef operand. This is a workaround for PR31652 caused by // descrepancy about branch on undef between LoopUnswitch and GVN. if (isa(TrueVal) || isa(FalseVal)) { - if (any_of(SI.users(), [&](User *U) { + if (llvm::any_of(SI.users(), [&](User *U) { ICmpInst *CI = dyn_cast(U); if (CI && CI->isEquality()) return true; diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index d760101281aaf..a454653a3a135 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -13,10 +13,33 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" +#include +#include +#include +#include + using namespace llvm; using namespace PatternMatch; @@ -90,7 +113,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // Verify that this PHI user has one use, which is the PHI itself, // and that it is a binary operation which is cheap to scalarize. - // otherwise return NULL. + // otherwise return nullptr. if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || !(isa(PHIUser)) || !cheapToScalarize(PHIUser, true)) return nullptr; @@ -421,7 +444,7 @@ static void replaceExtractElements(InsertElementInst *InsElt, /// /// Note: we intentionally don't try to fold earlier shuffles since they have /// often been chosen carefully to be efficiently implementable on the target. -typedef std::pair ShuffleOps; +using ShuffleOps = std::pair; static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl &Mask, @@ -986,15 +1009,13 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { // Mask.size() does not need to be equal to the number of vector elements. assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); - if (isa(V)) { - return UndefValue::get(VectorType::get(V->getType()->getScalarType(), - Mask.size())); - } - if (isa(V)) { - return ConstantAggregateZero::get( - VectorType::get(V->getType()->getScalarType(), - Mask.size())); - } + Type *EltTy = V->getType()->getScalarType(); + if (isa(V)) + return UndefValue::get(VectorType::get(EltTy, Mask.size())); + + if (isa(V)) + return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size())); + if (Constant *C = dyn_cast(V)) { SmallVector MaskValues; for (int i = 0, e = Mask.size(); i != e; ++i) { @@ -1423,7 +1444,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { eltMask = Mask[i]-LHSWidth; // If LHS's width is changed, shift the mask value accordingly. - // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any + // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. // If newRHS == newLHS, we want to remap any references from newRHS to // newLHS so that we can properly identify splats that may occur due to diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 2271e219d0fbf..dad066a6fb4e8 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -34,10 +34,14 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" -#include "llvm-c/Initialization.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -48,27 +52,56 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/CBindingWrapping.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/InstCombine/InstCombineWorklist.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include -#include +#include +#include +#include +#include +#include + using namespace llvm; using namespace llvm::PatternMatch; @@ -396,7 +429,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // No further simplifications. return Changed; - } while (1); + } while (true); } /// Return whether "X LOp (Y ROp Z)" is always equal to @@ -1174,7 +1207,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Parent - initially null, but after drilling down notes where Op came from. // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the // 0'th operand of Val. - std::pair Parent; + std::pair Parent; // Set if the transform requires a descaling at deeper levels that doesn't // overflow. @@ -1184,7 +1217,6 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { int32_t logScale = Scale.exactLogBase2(); for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down - if (ConstantInt *CI = dyn_cast(Op)) { // If Op is a constant divisible by Scale then descale to the quotient. APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth. @@ -1199,7 +1231,6 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } if (BinaryOperator *BO = dyn_cast(Op)) { - if (BO->getOpcode() == Instruction::Mul) { // Multiplication. NoSignedWrap = BO->hasNoSignedWrap(); @@ -1374,7 +1405,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Move up one level in the expression. assert(Ancestor->hasOneUse() && "Drilled down when more than one use!"); Ancestor = Ancestor->user_back(); - } while (1); + } while (true); } /// \brief Creates node of binary operation with the same attributes as the @@ -1621,7 +1652,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. - // if (GEPOperator *Src = dyn_cast(PtrOp)) { if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; @@ -1646,7 +1676,6 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (EndsWithSequential) { // Replace: gep (gep %P, long B), long A, ... // With: T = long A+B; gep %P, T, ... - // Value *SO1 = Src->getOperand(Src->getNumOperands()-1); Value *GO1 = GEP.getOperand(1); @@ -2226,7 +2255,6 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) { return &FI; } - Instruction *InstCombiner::visitFree(CallInst &FI) { Value *Op = FI.getArgOperand(0); @@ -3060,7 +3088,6 @@ bool InstCombiner::run() { /// them to the worklist (this significantly speeds up instcombine on code where /// many instructions are dead or constant). Additionally, if we find a branch /// whose condition is a known constant, we only visit the reachable successors. -/// static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, SmallPtrSetImpl &Visited, InstCombineWorklist &ICWorklist, @@ -3209,8 +3236,6 @@ static bool combineInstructionsOverFunction( F.getContext(), TargetFolder(DL), IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) { Worklist.Add(I); - - using namespace llvm::PatternMatch; if (match(I, m_Intrinsic())) AC.registerAssumption(cast(I)); })); @@ -3223,7 +3248,7 @@ static bool combineInstructionsOverFunction( // Iterate while there is work to do. int Iteration = 0; - for (;;) { + while (true) { ++Iteration; DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " << F.getName() << "\n"); @@ -3297,6 +3322,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { } char InstructionCombiningPass::ID = 0; + INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 1f222da18e806..81ad5b477e006 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1,4 +1,4 @@ -//===-- AddressSanitizer.cpp - memory error detector ------------*- C++ -*-===// +//===- AddressSanitizer.cpp - memory error detector -----------------------===// // // The LLVM Compiler Infrastructure // @@ -16,52 +16,74 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/BinaryFormat/MachO.h" #include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include +#include +#include +#include #include #include +#include #include #include -#include +#include using namespace llvm; @@ -70,7 +92,8 @@ using namespace llvm; static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; -static const uint64_t kDynamicShadowSentinel = ~(uint64_t)0; +static const uint64_t kDynamicShadowSentinel = + std::numeric_limits::max(); static const uint64_t kIOSShadowOffset32 = 1ULL << 30; static const uint64_t kIOSSimShadowOffset32 = 1ULL << 30; static const uint64_t kIOSSimShadowOffset64 = kDefaultShadowOffset64; @@ -86,6 +109,7 @@ static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46; static const uint64_t kNetBSD_ShadowOffset64 = 1ULL << 46; static const uint64_t kPS4CPU_ShadowOffset64 = 1ULL << 40; static const uint64_t kWindowsShadowOffset32 = 3ULL << 28; + // The shadow memory space is dynamically allocated. static const uint64_t kWindowsShadowOffset64 = kDynamicShadowSentinel; @@ -149,9 +173,11 @@ static const size_t kNumberOfAccessSizes = 5; static const unsigned kAllocaRzSize = 32; // Command-line flags. + static cl::opt ClEnableKasan( "asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"), cl::Hidden, cl::init(false)); + static cl::opt ClRecover( "asan-recover", cl::desc("Enable recovery mode (continue-after-error)."), @@ -161,17 +187,21 @@ static cl::opt ClRecover( static cl::opt ClInstrumentReads("asan-instrument-reads", cl::desc("instrument read instructions"), cl::Hidden, cl::init(true)); + static cl::opt ClInstrumentWrites( "asan-instrument-writes", cl::desc("instrument write instructions"), cl::Hidden, cl::init(true)); + static cl::opt ClInstrumentAtomics( "asan-instrument-atomics", cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, cl::init(true)); + static cl::opt ClAlwaysSlowPath( "asan-always-slow-path", cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden, cl::init(false)); + static cl::opt ClForceDynamicShadow( "asan-force-dynamic-shadow", cl::desc("Load shadow address into a local variable for each function"), @@ -185,6 +215,7 @@ static cl::opt ClMaxInsnsToInstrumentPerBB( "asan-max-ins-per-bb", cl::init(10000), cl::desc("maximal number of instructions to instrument in any given BB"), cl::Hidden); + // This flag may need to be replaced with -f[no]asan-stack. static cl::opt ClStack("asan-stack", cl::desc("Handle stack memory"), cl::Hidden, cl::init(true)); @@ -193,32 +224,40 @@ static cl::opt ClMaxInlinePoisoningSize( cl::desc( "Inline shadow poisoning for blocks up to the given size in bytes."), cl::Hidden, cl::init(64)); + static cl::opt ClUseAfterReturn("asan-use-after-return", cl::desc("Check stack-use-after-return"), cl::Hidden, cl::init(true)); + static cl::opt ClRedzoneByvalArgs("asan-redzone-byval-args", cl::desc("Create redzones for byval " "arguments (extra copy " "required)"), cl::Hidden, cl::init(true)); + static cl::opt ClUseAfterScope("asan-use-after-scope", cl::desc("Check stack-use-after-scope"), cl::Hidden, cl::init(false)); + // This flag may need to be replaced with -f[no]asan-globals. static cl::opt ClGlobals("asan-globals", cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); + static cl::opt ClInitializers("asan-initialization-order", cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(true)); + static cl::opt ClInvalidPointerPairs( "asan-detect-invalid-pointer-pair", cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden, cl::init(false)); + static cl::opt ClRealignStack( "asan-realign-stack", cl::desc("Realign stack to the value of this flag (power of two)"), cl::Hidden, cl::init(32)); + static cl::opt ClInstrumentationWithCallsThreshold( "asan-instrumentation-with-call-threshold", cl::desc( @@ -226,14 +265,17 @@ static cl::opt ClInstrumentationWithCallsThreshold( "this number of memory accesses, use callbacks instead of " "inline checks (-1 means never use callbacks)."), cl::Hidden, cl::init(7000)); + static cl::opt ClMemoryAccessCallbackPrefix( "asan-memory-access-callback-prefix", cl::desc("Prefix for memory access callbacks"), cl::Hidden, cl::init("__asan_")); + static cl::opt ClInstrumentDynamicAllocas("asan-instrument-dynamic-allocas", cl::desc("instrument dynamic allocas"), cl::Hidden, cl::init(true)); + static cl::opt ClSkipPromotableAllocas( "asan-skip-promotable-allocas", cl::desc("Do not instrument promotable allocas"), cl::Hidden, @@ -242,9 +284,11 @@ static cl::opt ClSkipPromotableAllocas( // These flags allow to change the shadow mapping. // The shadow mapping looks like // Shadow = (Mem >> scale) + offset + static cl::opt ClMappingScale("asan-mapping-scale", cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0)); + static cl::opt ClMappingOffset( "asan-mapping-offset", cl::desc("offset of asan shadow mapping [EXPERIMENTAL]"), cl::Hidden, @@ -252,14 +296,18 @@ static cl::opt ClMappingOffset( // Optimization flags. Not user visible, used mostly for testing // and benchmarking the tool. + static cl::opt ClOpt("asan-opt", cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true)); + static cl::opt ClOptSameTemp( "asan-opt-same-temp", cl::desc("Instrument the same temp just once"), cl::Hidden, cl::init(true)); + static cl::opt ClOptGlobals("asan-opt-globals", cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true)); + static cl::opt ClOptStack( "asan-opt-stack", cl::desc("Don't instrument scalar stack variables"), cl::Hidden, cl::init(false)); @@ -294,14 +342,19 @@ static cl::opt cl::Hidden, cl::init(true)); // Debug flags. + static cl::opt ClDebug("asan-debug", cl::desc("debug"), cl::Hidden, cl::init(0)); + static cl::opt ClDebugStack("asan-debug-stack", cl::desc("debug stack"), cl::Hidden, cl::init(0)); + static cl::opt ClDebugFunc("asan-debug-func", cl::Hidden, cl::desc("Debug func")); + static cl::opt ClDebugMin("asan-debug-min", cl::desc("Debug min inst"), cl::Hidden, cl::init(-1)); + static cl::opt ClDebugMax("asan-debug-max", cl::desc("Debug max inst"), cl::Hidden, cl::init(-1)); @@ -313,13 +366,14 @@ STATISTIC(NumOptimizedAccessesToStackVar, "Number of optimized accesses to stack vars"); namespace { + /// Frontend-provided metadata for source location. struct LocationMetadata { StringRef Filename; - int LineNo; - int ColumnNo; + int LineNo = 0; + int ColumnNo = 0; - LocationMetadata() : Filename(), LineNo(0), ColumnNo(0) {} + LocationMetadata() = default; bool empty() const { return Filename.empty(); } @@ -336,16 +390,17 @@ struct LocationMetadata { /// Frontend-provided metadata for global variables. class GlobalsMetadata { - public: +public: struct Entry { - Entry() : SourceLoc(), Name(), IsDynInit(false), IsBlacklisted(false) {} LocationMetadata SourceLoc; StringRef Name; - bool IsDynInit; - bool IsBlacklisted; + bool IsDynInit = false; + bool IsBlacklisted = false; + + Entry() = default; }; - GlobalsMetadata() : inited_(false) {} + GlobalsMetadata() = default; void reset() { inited_ = false; @@ -385,8 +440,8 @@ class GlobalsMetadata { return (Pos != Entries.end()) ? Pos->second : Entry(); } - private: - bool inited_; +private: + bool inited_ = false; DenseMap Entries; }; @@ -398,6 +453,8 @@ struct ShadowMapping { bool OrShadowOffset; }; +} // end anonymous namespace + static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, bool IsKasan) { bool IsAndroid = TargetTriple.isAndroid(); @@ -406,16 +463,16 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize, bool IsNetBSD = TargetTriple.isOSNetBSD(); bool IsPS4CPU = TargetTriple.isPS4CPU(); bool IsLinux = TargetTriple.isOSLinux(); - bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 || - TargetTriple.getArch() == llvm::Triple::ppc64le; - bool IsSystemZ = TargetTriple.getArch() == llvm::Triple::systemz; - bool IsX86 = TargetTriple.getArch() == llvm::Triple::x86; - bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; - bool IsMIPS32 = TargetTriple.getArch() == llvm::Triple::mips || - TargetTriple.getArch() == llvm::Triple::mipsel; - bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 || - TargetTriple.getArch() == llvm::Triple::mips64el; - bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64; + bool IsPPC64 = TargetTriple.getArch() == Triple::ppc64 || + TargetTriple.getArch() == Triple::ppc64le; + bool IsSystemZ = TargetTriple.getArch() == Triple::systemz; + bool IsX86 = TargetTriple.getArch() == Triple::x86; + bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64; + bool IsMIPS32 = TargetTriple.getArch() == Triple::mips || + TargetTriple.getArch() == Triple::mipsel; + bool IsMIPS64 = TargetTriple.getArch() == Triple::mips64 || + TargetTriple.getArch() == Triple::mips64el; + bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; bool IsWindows = TargetTriple.isOSWindows(); bool IsFuchsia = TargetTriple.isOSFuchsia(); @@ -503,23 +560,30 @@ static size_t RedzoneSizeForScale(int MappingScale) { return std::max(32U, 1U << MappingScale); } +namespace { + /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { + // Pass identification, replacement for typeid + static char ID; + explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false) : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan), Recover(Recover || ClRecover), - UseAfterScope(UseAfterScope || ClUseAfterScope), - LocalDynamicShadow(nullptr) { + UseAfterScope(UseAfterScope || ClUseAfterScope) { initializeAddressSanitizerPass(*PassRegistry::getPassRegistry()); } + StringRef getPassName() const override { return "AddressSanitizerFunctionPass"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); } + uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const { uint64_t ArraySize = 1; if (AI.isArrayAllocation()) { @@ -532,6 +596,7 @@ struct AddressSanitizer : public FunctionPass { AI.getModule()->getDataLayout().getTypeAllocSize(Ty); return SizeInBytes * ArraySize; } + /// Check if we want (and can) handle this alloca. bool isInterestingAlloca(const AllocaInst &AI); @@ -542,6 +607,7 @@ struct AddressSanitizer : public FunctionPass { Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, uint64_t *TypeSize, unsigned *Alignment, Value **MaybeMask = nullptr); + void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I, bool UseCalls, const DataLayout &DL); void instrumentPointerComparisonOrSubtraction(Instruction *I); @@ -566,11 +632,12 @@ struct AddressSanitizer : public FunctionPass { void markEscapedLocalAllocas(Function &F); bool doInitialization(Module &M) override; bool doFinalization(Module &M) override; - static char ID; // Pass identification, replacement for typeid DominatorTree &getDominatorTree() const { return *DT; } - private: +private: + friend struct FunctionStackPoisoner; + void initializeCallbacks(Module &M); bool LooksLikeCodeInBug11395(Instruction *I); @@ -581,11 +648,13 @@ struct AddressSanitizer : public FunctionPass { /// Helper to cleanup per-function state. struct FunctionStateRAII { AddressSanitizer *Pass; + FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) { assert(Pass->ProcessedAllocas.empty() && "last pass forgot to clear cache"); assert(!Pass->LocalDynamicShadow); } + ~FunctionStateRAII() { Pass->LocalDynamicShadow = nullptr; Pass->ProcessedAllocas.clear(); @@ -603,23 +672,27 @@ struct AddressSanitizer : public FunctionPass { DominatorTree *DT; Function *AsanHandleNoReturnFunc; Function *AsanPtrCmpFunction, *AsanPtrSubFunction; - // This array is indexed by AccessIsWrite, Experiment and log2(AccessSize). + + // These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize). Function *AsanErrorCallback[2][2][kNumberOfAccessSizes]; Function *AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes]; - // This array is indexed by AccessIsWrite and Experiment. + + // These arrays is indexed by AccessIsWrite and Experiment. Function *AsanErrorCallbackSized[2][2]; Function *AsanMemoryAccessCallbackSized[2][2]; + Function *AsanMemmove, *AsanMemcpy, *AsanMemset; InlineAsm *EmptyAsm; - Value *LocalDynamicShadow; + Value *LocalDynamicShadow = nullptr; GlobalsMetadata GlobalsMD; DenseMap ProcessedAllocas; - - friend struct FunctionStackPoisoner; }; class AddressSanitizerModule : public ModulePass { public: + // Pass identification, replacement for typeid + static char ID; + explicit AddressSanitizerModule(bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true) @@ -634,8 +707,8 @@ class AddressSanitizerModule : public ModulePass { // ClWithComdat and ClUseGlobalsGC unless the frontend says it's ok to // do globals-gc. UseCtorComdat(UseGlobalsGC && ClWithComdat) {} + bool runOnModule(Module &M) override; - static char ID; // Pass identification, replacement for typeid StringRef getPassName() const override { return "AddressSanitizerModule"; } private: @@ -739,7 +812,7 @@ struct FunctionStackPoisoner : public InstVisitor { IntrinsicInst *LocalEscapeCall = nullptr; // Maps Value to an AllocaInst from which the Value is originated. - typedef DenseMap AllocaForValueMapTy; + using AllocaForValueMapTy = DenseMap; AllocaForValueMapTy AllocaForValue; bool HasNonEmptyInlineAsm = false; @@ -942,9 +1015,10 @@ struct FunctionStackPoisoner : public InstVisitor { Instruction *ThenTerm, Value *ValueIfFalse); }; -} // anonymous namespace +} // end anonymous namespace char AddressSanitizer::ID = 0; + INITIALIZE_PASS_BEGIN( AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, @@ -955,6 +1029,7 @@ INITIALIZE_PASS_END( AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) + FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel, bool Recover, bool UseAfterScope) { @@ -963,11 +1038,13 @@ FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel, } char AddressSanitizerModule::ID = 0; + INITIALIZE_PASS( AddressSanitizerModule, "asan-module", "AddressSanitizer: detects use-after-free and out-of-bounds bugs." "ModulePass", false, false) + ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel, bool Recover, bool UseGlobalsGC) { diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index a193efe902cf5..8eb9ad409b886 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -13,18 +13,31 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" +#include +#include + using namespace llvm; #define DEBUG_TYPE "bounds-checking" @@ -36,9 +49,10 @@ STATISTIC(ChecksAdded, "Bounds checks added"); STATISTIC(ChecksSkipped, "Bounds checks skipped"); STATISTIC(ChecksUnable, "Bounds checks unable to add"); -typedef IRBuilder BuilderTy; +using BuilderTy = IRBuilder; namespace { + struct BoundsChecking : public FunctionPass { static char ID; @@ -60,16 +74,16 @@ namespace { BasicBlock *TrapBB; BasicBlock *getTrapBB(); - void emitBranchToTrap(Value *Cmp = nullptr); bool instrument(Value *Ptr, Value *Val, const DataLayout &DL); }; -} + +} // end anonymous namespace char BoundsChecking::ID = 0; + INITIALIZE_PASS(BoundsChecking, "bounds-checking", "Run-time bounds checking", false, false) - /// getTrapBB - create a basic block that traps. All overflowing conditions /// branch to this block. There's only one trap block per function. BasicBlock *BoundsChecking::getTrapBB() { @@ -81,7 +95,7 @@ BasicBlock *BoundsChecking::getTrapBB() { TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn); Builder->SetInsertPoint(TrapBB); - llvm::Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap); + Value *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap); CallInst *TrapCall = Builder->CreateCall(F, {}); TrapCall->setDoesNotReturn(); TrapCall->setDoesNotThrow(); @@ -91,33 +105,6 @@ BasicBlock *BoundsChecking::getTrapBB() { return TrapBB; } - -/// emitBranchToTrap - emit a branch instruction to a trap block. -/// If Cmp is non-null, perform a jump only if its value evaluates to true. -void BoundsChecking::emitBranchToTrap(Value *Cmp) { - // check if the comparison is always false - ConstantInt *C = dyn_cast_or_null(Cmp); - if (C) { - ++ChecksSkipped; - if (!C->getZExtValue()) - return; - else - Cmp = nullptr; // unconditional branch - } - ++ChecksAdded; - - BasicBlock::iterator Inst = Builder->GetInsertPoint(); - BasicBlock *OldBB = Inst->getParent(); - BasicBlock *Cont = OldBB->splitBasicBlock(Inst); - OldBB->getTerminator()->eraseFromParent(); - - if (Cmp) - BranchInst::Create(getTrapBB(), Cont, Cmp, OldBB); - else - BranchInst::Create(getTrapBB(), OldBB); -} - - /// instrument - adds run-time bounds checks to memory accessing instructions. /// Ptr is the pointer that will be read/written, and InstVal is either the /// result from the load or the value being stored. It is used to determine the @@ -158,8 +145,32 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal, Value *Cmp1 = Builder->CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0)); Or = Builder->CreateOr(Cmp1, Or); } - emitBranchToTrap(Or); + // check if the comparison is always false + ConstantInt *C = dyn_cast_or_null(Or); + if (C) { + ++ChecksSkipped; + // If non-zero, nothing to do. + if (!C->getZExtValue()) + return true; + } + ++ChecksAdded; + + BasicBlock::iterator SplitI = Builder->GetInsertPoint(); + BasicBlock *OldBB = SplitI->getParent(); + BasicBlock *Cont = OldBB->splitBasicBlock(SplitI); + OldBB->getTerminator()->eraseFromParent(); + + if (C) { + // If we have a constant zero, unconditionally branch. + // FIXME: We should really handle this differently to bypass the splitting + // the block. + BranchInst::Create(getTrapBB(), OldBB); + return true; + } + + // Create the conditional branch. + BranchInst::Create(getTrapBB(), Cont, Or, OldBB); return true; } @@ -176,7 +187,7 @@ bool BoundsChecking::runOnFunction(Function &F) { // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory // touching instructions - std::vector WorkList; + std::vector WorkList; for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { Instruction *I = &*i; if (isa(I) || isa(I) || isa(I) || diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index d74ccdfccb826..09bcbb282653c 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1,4 +1,4 @@ -//===-- DataFlowSanitizer.cpp - dynamic data flow analysis ----------------===// +//===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow /// analysis. @@ -43,32 +44,63 @@ /// /// For more information, please refer to the design document: /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html +// +//===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SpecialCaseList.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include +#include +#include +#include #include +#include #include +#include #include +#include using namespace llvm; @@ -129,10 +161,7 @@ static cl::opt ClDebugNonzeroLabels( "load or return with a nonzero label"), cl::Hidden); - -namespace { - -StringRef GetGlobalTypeString(const GlobalValue &G) { +static StringRef GetGlobalTypeString(const GlobalValue &G) { // Types of GlobalVariables are always pointer types. Type *GType = G.getValueType(); // For now we support blacklisting struct types only. @@ -143,11 +172,13 @@ StringRef GetGlobalTypeString(const GlobalValue &G) { return ""; } +namespace { + class DFSanABIList { std::unique_ptr SCL; public: - DFSanABIList() {} + DFSanABIList() = default; void set(std::unique_ptr List) { SCL = std::move(List); } @@ -256,7 +287,7 @@ class DataFlowSanitizer : public ModulePass { DFSanABIList ABIList; DenseMap UnwrappedFnMap; AttrBuilder ReadOnlyNoneAttrs; - bool DFSanRuntimeShadowMask; + bool DFSanRuntimeShadowMask = false; Value *getShadowAddress(Value *Addr, Instruction *Pos); bool isInstrumented(const Function *F); @@ -272,11 +303,13 @@ class DataFlowSanitizer : public ModulePass { FunctionType *NewFT); Constant *getOrBuildTrampolineFunction(FunctionType *FT, StringRef FName); - public: +public: + static char ID; + DataFlowSanitizer( const std::vector &ABIListFiles = std::vector(), void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr); - static char ID; + bool doInitialization(Module &M) override; bool runOnModule(Module &M) override; }; @@ -287,12 +320,12 @@ struct DFSanFunction { DominatorTree DT; DataFlowSanitizer::InstrumentedABI IA; bool IsNativeABI; - Value *ArgTLSPtr; - Value *RetvalTLSPtr; - AllocaInst *LabelReturnAlloca; + Value *ArgTLSPtr = nullptr; + Value *RetvalTLSPtr = nullptr; + AllocaInst *LabelReturnAlloca = nullptr; DenseMap ValShadowMap; DenseMap AllocaShadowMap; - std::vector > PHIFixups; + std::vector> PHIFixups; DenseSet SkipInsts; std::vector NonZeroChecks; bool AvoidNewBlocks; @@ -306,14 +339,13 @@ struct DFSanFunction { DenseMap> ShadowElements; DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI) - : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), - IsNativeABI(IsNativeABI), ArgTLSPtr(nullptr), RetvalTLSPtr(nullptr), - LabelReturnAlloca(nullptr) { + : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) { DT.recalculate(*F); // FIXME: Need to track down the register allocator issue which causes poor // performance in pathological cases with large numbers of basic blocks. AvoidNewBlocks = F->size() > 1000; } + Value *getArgTLSPtr(); Value *getArgTLS(unsigned Index, Instruction *Pos); Value *getRetvalTLS(); @@ -328,8 +360,9 @@ struct DFSanFunction { }; class DFSanVisitor : public InstVisitor { - public: +public: DFSanFunction &DFSF; + DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {} const DataLayout &getDataLayout() const { @@ -337,7 +370,6 @@ class DFSanVisitor : public InstVisitor { } void visitOperandShadowInst(Instruction &I); - void visitBinaryOperator(BinaryOperator &BO); void visitCastInst(CastInst &CI); void visitCmpInst(CmpInst &CI); @@ -358,9 +390,10 @@ class DFSanVisitor : public InstVisitor { void visitMemTransferInst(MemTransferInst &I); }; -} +} // end anonymous namespace char DataFlowSanitizer::ID; + INITIALIZE_PASS(DataFlowSanitizer, "dfsan", "DataFlowSanitizer: dynamic data flow analysis.", false, false) @@ -374,8 +407,7 @@ llvm::createDataFlowSanitizerPass(const std::vector &ABIListFiles, DataFlowSanitizer::DataFlowSanitizer( const std::vector &ABIListFiles, void *(*getArgTLS)(), void *(*getRetValTLS)()) - : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS), - DFSanRuntimeShadowMask(false) { + : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) { std::vector AllABIListFiles(std::move(ABIListFiles)); AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(), ClABIListFiles.end()); @@ -383,7 +415,7 @@ DataFlowSanitizer::DataFlowSanitizer( } FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { - llvm::SmallVector ArgTypes(T->param_begin(), T->param_end()); + SmallVector ArgTypes(T->param_begin(), T->param_end()); ArgTypes.append(T->getNumParams(), ShadowTy); if (T->isVarArg()) ArgTypes.push_back(ShadowPtrTy); @@ -395,7 +427,7 @@ FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) { assert(!T->isVarArg()); - llvm::SmallVector ArgTypes; + SmallVector ArgTypes; ArgTypes.push_back(T->getPointerTo()); ArgTypes.append(T->param_begin(), T->param_end()); ArgTypes.append(T->getNumParams(), ShadowTy); @@ -406,7 +438,7 @@ FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) { } FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { - llvm::SmallVector ArgTypes; + SmallVector ArgTypes; for (FunctionType::param_iterator i = T->param_begin(), e = T->param_end(); i != e; ++i) { FunctionType *FT; @@ -429,12 +461,12 @@ FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { } bool DataFlowSanitizer::doInitialization(Module &M) { - llvm::Triple TargetTriple(M.getTargetTriple()); - bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64; - bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 || - TargetTriple.getArch() == llvm::Triple::mips64el; - bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64 || - TargetTriple.getArch() == llvm::Triple::aarch64_be; + Triple TargetTriple(M.getTargetTriple()); + bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64; + bool IsMIPS64 = TargetTriple.getArch() == Triple::mips64 || + TargetTriple.getArch() == Triple::mips64el; + bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64 || + TargetTriple.getArch() == Triple::aarch64_be; const DataLayout &DL = M.getDataLayout(); @@ -655,7 +687,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { DFSanVarargWrapperFnTy); std::vector FnsToInstrument; - llvm::SmallPtrSet FnsWithNativeABI; + SmallPtrSet FnsWithNativeABI; for (Function &i : M) { if (!i.isIntrinsic() && &i != DFSanUnionFn && @@ -798,11 +830,11 @@ bool DataFlowSanitizer::runOnModule(Module &M) { // DFSanVisitor may create new basic blocks, which confuses df_iterator. // Build a copy of the list before iterating over it. - llvm::SmallVector BBList(depth_first(&i->getEntryBlock())); + SmallVector BBList(depth_first(&i->getEntryBlock())); for (BasicBlock *i : BBList) { Instruction *Inst = &i->front(); - while (1) { + while (true) { // DFSanVisitor may split the current basic block, changing the current // instruction's next pointer and moving the next instruction to the // tail block from which we should continue. @@ -822,7 +854,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { // until we have visited every block. Therefore, the code that handles phi // nodes adds them to the PHIFixups list so that they can be properly // handled here. - for (std::vector >::iterator + for (std::vector>::iterator i = DFSF.PHIFixups.begin(), e = DFSF.PHIFixups.end(); i != e; ++i) { @@ -1046,8 +1078,7 @@ void DFSanVisitor::visitOperandShadowInst(Instruction &I) { Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align, Instruction *Pos) { if (AllocaInst *AI = dyn_cast(Addr)) { - llvm::DenseMap::iterator i = - AllocaShadowMap.find(AI); + const auto i = AllocaShadowMap.find(AI); if (i != AllocaShadowMap.end()) { IRBuilder<> IRB(Pos); return IRB.CreateLoad(i->second); @@ -1188,8 +1219,7 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) { void DFSanFunction::storeShadow(Value *Addr, uint64_t Size, uint64_t Align, Value *Shadow, Instruction *Pos) { if (AllocaInst *AI = dyn_cast(Addr)) { - llvm::DenseMap::iterator i = - AllocaShadowMap.find(AI); + const auto i = AllocaShadowMap.find(AI); if (i != AllocaShadowMap.end()) { IRBuilder<> IRB(Pos); IRB.CreateStore(Shadow, i->second); @@ -1410,24 +1440,21 @@ void DFSanVisitor::visitCallSite(CallSite CS) { if (i != DFSF.DFS.UnwrappedFnMap.end()) { Function *F = i->second; switch (DFSF.DFS.getWrapperKind(F)) { - case DataFlowSanitizer::WK_Warning: { + case DataFlowSanitizer::WK_Warning: CS.setCalledFunction(F); IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn, IRB.CreateGlobalStringPtr(F->getName())); DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); return; - } - case DataFlowSanitizer::WK_Discard: { + case DataFlowSanitizer::WK_Discard: CS.setCalledFunction(F); DFSF.setShadow(CS.getInstruction(), DFSF.DFS.ZeroShadow); return; - } - case DataFlowSanitizer::WK_Functional: { + case DataFlowSanitizer::WK_Functional: CS.setCalledFunction(F); visitOperandShadowInst(*CS.getInstruction()); return; - } - case DataFlowSanitizer::WK_Custom: { + case DataFlowSanitizer::WK_Custom: // Don't try to handle invokes of custom functions, it's too complicated. // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_ // wrapper. @@ -1527,7 +1554,6 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } break; } - } } FunctionType *FT = cast( diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 3154c1939ea7c..67ca8172b0d56 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/UniqueVector.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/IRBuilder.h" @@ -502,6 +503,13 @@ static bool functionHasLines(Function &F) { return false; } +static bool isUsingFuncletBasedEH(Function &F) { + if (!F.hasPersonalityFn()) return false; + + EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); + return isFuncletEHPersonality(Personality); +} + static bool shouldKeepInEntry(BasicBlock::iterator It) { if (isa(*It)) return true; if (isa(*It)) return true; @@ -542,6 +550,8 @@ void GCOVProfiler::emitProfileNotes() { DISubprogram *SP = F.getSubprogram(); if (!SP) continue; if (!functionHasLines(F)) continue; + // TODO: Functions using funclet-based EH are currently not supported. + if (isUsingFuncletBasedEH(F)) continue; // gcov expects every function to start with an entry block that has a // single successor, so split the entry block to make sure of that. @@ -619,7 +629,10 @@ bool GCOVProfiler::emitProfileArcs() { DISubprogram *SP = F.getSubprogram(); if (!SP) continue; if (!functionHasLines(F)) continue; + // TODO: Functions using funclet-based EH are currently not supported. + if (isUsingFuncletBasedEH(F)) continue; if (!Result) Result = true; + unsigned Edges = 0; for (auto &BB : F) { TerminatorInst *TI = BB.getTerminator(); diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index f323e0814950c..8b9bbb4995589 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -1,4 +1,4 @@ -//===-- IndirectCallPromotion.cpp - Optimizations based on value profiling ===// +//===- IndirectCallPromotion.cpp - Optimizations based on value profiling -===// // // The LLVM Compiler Infrastructure // @@ -14,15 +14,15 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IndirectCallPromotionAnalysis.h" #include "llvm/Analysis/IndirectCallSiteVisitor.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" @@ -36,20 +36,22 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" -#include "llvm/PassRegistry.h" -#include "llvm/PassSupport.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/PGOInstrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include #include +#include +#include +#include #include using namespace llvm; @@ -112,6 +114,7 @@ static cl::opt cl::desc("Dump IR after transformation happens")); namespace { + class PGOIndirectCallPromotionLegacyPass : public ModulePass { public: static char ID; @@ -139,9 +142,11 @@ class PGOIndirectCallPromotionLegacyPass : public ModulePass { // the promoted direct call. bool SamplePGO; }; + } // end anonymous namespace char PGOIndirectCallPromotionLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(PGOIndirectCallPromotionLegacyPass, "pgo-icall-prom", "Use PGO instrumentation profile to promote indirect " "calls to direct calls.", @@ -158,6 +163,7 @@ ModulePass *llvm::createPGOIndirectCallPromotionLegacyPass(bool InLTO, } namespace { + // The class for main data structure to promote indirect calls to conditional // direct calls. class ICallPromotionFunc { @@ -177,6 +183,7 @@ class ICallPromotionFunc { struct PromotionCandidate { Function *TargetFunction; uint64_t Count; + PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {} }; @@ -195,17 +202,16 @@ class ICallPromotionFunc { const std::vector &Candidates, uint64_t &TotalCount); - // Noncopyable - ICallPromotionFunc(const ICallPromotionFunc &other) = delete; - ICallPromotionFunc &operator=(const ICallPromotionFunc &other) = delete; - public: ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab, bool SamplePGO, OptimizationRemarkEmitter &ORE) : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {} + ICallPromotionFunc(const ICallPromotionFunc &) = delete; + ICallPromotionFunc &operator=(const ICallPromotionFunc &) = delete; bool processFunction(ProfileSummaryInfo *PSI); }; + } // end anonymous namespace bool llvm::isLegalToPromote(Instruction *Inst, Function *F, @@ -277,38 +283,49 @@ ICallPromotionFunc::getPromotionCandidatesForCallSite( if (ICPInvokeOnly && dyn_cast(Inst)) { DEBUG(dbgs() << " Not promote: User options.\n"); - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) - << " Not promote: User options"); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) + << " Not promote: User options"; + }); break; } if (ICPCallOnly && dyn_cast(Inst)) { DEBUG(dbgs() << " Not promote: User option.\n"); - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) - << " Not promote: User options"); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UserOptions", Inst) + << " Not promote: User options"; + }); break; } if (ICPCutOff != 0 && NumOfPGOICallPromotion >= ICPCutOff) { DEBUG(dbgs() << " Not promote: Cutoff reached.\n"); - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", Inst) - << " Not promote: Cutoff reached"); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "CutOffReached", Inst) + << " Not promote: Cutoff reached"; + }); break; } Function *TargetFunction = Symtab->getFunction(Target); if (TargetFunction == nullptr) { DEBUG(dbgs() << " Not promote: Cannot find the target\n"); - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst) - << "Cannot promote indirect call: target not found"); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", Inst) + << "Cannot promote indirect call: target not found"; + }); break; } const char *Reason = nullptr; if (!isLegalToPromote(Inst, TargetFunction, &Reason)) { using namespace ore; - ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", Inst) + + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", Inst) << "Cannot promote indirect call to " << NV("TargetFunction", TargetFunction) << " with count of " - << NV("Count", Count) << ": " << Reason); + << NV("Count", Count) << ": " << Reason; + }); break; } @@ -461,11 +478,13 @@ static Instruction *insertCallRetCast(const Instruction *Inst, // MergeBB is the bottom BB of the if-then-else-diamond after the // transformation. For invoke instruction, the edges from DirectCallBB and // IndirectCallBB to MergeBB are removed before this call (during -// createIfThenElse). +// createIfThenElse). Stores the pointer to the Instruction that cast +// the direct call in \p CastInst. static Instruction *createDirectCallInst(const Instruction *Inst, Function *DirectCallee, BasicBlock *DirectCallBB, - BasicBlock *MergeBB) { + BasicBlock *MergeBB, + Instruction *&CastInst) { Instruction *NewInst = Inst->clone(); if (CallInst *CI = dyn_cast(NewInst)) { CI->setCalledFunction(DirectCallee); @@ -499,7 +518,8 @@ static Instruction *createDirectCallInst(const Instruction *Inst, } } - return insertCallRetCast(Inst, NewInst, DirectCallee); + CastInst = insertCallRetCast(Inst, NewInst, DirectCallee); + return NewInst; } // Create a PHI to unify the return values of calls. @@ -559,15 +579,17 @@ Instruction *llvm::promoteIndirectCall(Instruction *Inst, createIfThenElse(Inst, DirectCallee, Count, TotalCount, &DirectCallBB, &IndirectCallBB, &MergeBB); + // If the return type of the NewInst is not the same as the Inst, a CastInst + // is needed for type casting. Otherwise CastInst is the same as NewInst. + Instruction *CastInst = nullptr; Instruction *NewInst = - createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB); + createDirectCallInst(Inst, DirectCallee, DirectCallBB, MergeBB, CastInst); if (AttachProfToDirectCall) { SmallVector Weights; Weights.push_back(Count); MDBuilder MDB(NewInst->getContext()); - if (Instruction *DI = dyn_cast(NewInst->stripPointerCasts())) - DI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + NewInst->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); } // Move Inst from MergeBB to IndirectCallBB. @@ -589,20 +611,23 @@ Instruction *llvm::promoteIndirectCall(Instruction *Inst, // We don't need to update the operand from NormalDest for DirectCallBB. // Pass nullptr here. fixupPHINodeForNormalDest(Inst, II->getNormalDest(), MergeBB, - IndirectCallBB, NewInst); + IndirectCallBB, CastInst); } - insertCallRetPHI(Inst, NewInst, DirectCallee); + insertCallRetPHI(Inst, CastInst, DirectCallee); DEBUG(dbgs() << "\n== Basic Blocks After ==\n"); DEBUG(dbgs() << *BB << *DirectCallBB << *IndirectCallBB << *MergeBB << "\n"); using namespace ore; + if (ORE) - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Promoted", Inst) - << "Promote indirect call to " << NV("DirectCallee", DirectCallee) - << " with count " << NV("Count", Count) << " out of " - << NV("TotalCount", TotalCount)); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Promoted", Inst) + << "Promote indirect call to " << NV("DirectCallee", DirectCallee) + << " with count " << NV("Count", Count) << " out of " + << NV("TotalCount", TotalCount); + }); return NewInst; } @@ -683,7 +708,7 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, AM->getResult(M).getManager(); ORE = &FAM.getResult(F); } else { - OwnedORE = make_unique(&F); + OwnedORE = llvm::make_unique(&F); ORE = OwnedORE.get(); } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 3b33ced2640ca..0e6f6bacf44b3 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1,4 +1,4 @@ -//===-- MemorySanitizer.cpp - detector of uninitialized reads -------------===// +//===- MemorySanitizer.cpp - detector of uninitialized reads --------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file is a part of MemorySanitizer, a detector of uninitialized /// reads. @@ -88,32 +89,64 @@ /// implementation ignores the load aspect of CAS/RMW, always returning a clean /// value. It implements the store part as a simple atomic store by storing a /// clean shadow. - +// //===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueMap.h" +#include "llvm/Pass.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -137,18 +170,23 @@ static const size_t kNumberOfAccessSizes = 4; static cl::opt ClTrackOrigins("msan-track-origins", cl::desc("Track origins (allocation sites) of poisoned memory"), cl::Hidden, cl::init(0)); + static cl::opt ClKeepGoing("msan-keep-going", cl::desc("keep going after reporting a UMR"), cl::Hidden, cl::init(false)); + static cl::opt ClPoisonStack("msan-poison-stack", cl::desc("poison uninitialized stack variables"), cl::Hidden, cl::init(true)); + static cl::opt ClPoisonStackWithCall("msan-poison-stack-with-call", cl::desc("poison uninitialized stack variables with a call"), cl::Hidden, cl::init(false)); + static cl::opt ClPoisonStackPattern("msan-poison-stack-pattern", cl::desc("poison uninitialized stack variables with the given pattern"), cl::Hidden, cl::init(0xff)); + static cl::opt ClPoisonUndef("msan-poison-undef", cl::desc("poison undef temps"), cl::Hidden, cl::init(true)); @@ -217,6 +255,8 @@ struct PlatformMemoryMapParams { const MemoryMapParams *bits64; }; +} // end anonymous namespace + // i386 Linux static const MemoryMapParams Linux_I386_MemoryMapParams = { 0x000080000000, // AndMask @@ -305,27 +345,39 @@ static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = { &FreeBSD_X86_64_MemoryMapParams, }; +namespace { + /// \brief An instrumentation pass implementing detection of uninitialized /// reads. /// /// MemorySanitizer: instrument the code in module to find /// uninitialized reads. class MemorySanitizer : public FunctionPass { - public: +public: + // Pass identification, replacement for typeid. + static char ID; + MemorySanitizer(int TrackOrigins = 0, bool Recover = false) : FunctionPass(ID), TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)), - Recover(Recover || ClKeepGoing), - WarningFn(nullptr) {} + Recover(Recover || ClKeepGoing) {} + StringRef getPassName() const override { return "MemorySanitizer"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); } + bool runOnFunction(Function &F) override; bool doInitialization(Module &M) override; - static char ID; // Pass identification, replacement for typeid. - private: +private: + friend struct MemorySanitizerVisitor; + friend struct VarArgAMD64Helper; + friend struct VarArgMIPS64Helper; + friend struct VarArgAArch64Helper; + friend struct VarArgPowerPC64Helper; + void initializeCallbacks(Module &M); /// \brief Track origins (allocation points) of uninitialized values. @@ -335,26 +387,34 @@ class MemorySanitizer : public FunctionPass { LLVMContext *C; Type *IntptrTy; Type *OriginTy; + /// \brief Thread-local shadow storage for function parameters. GlobalVariable *ParamTLS; + /// \brief Thread-local origin storage for function parameters. GlobalVariable *ParamOriginTLS; + /// \brief Thread-local shadow storage for function return value. GlobalVariable *RetvalTLS; + /// \brief Thread-local origin storage for function return value. GlobalVariable *RetvalOriginTLS; + /// \brief Thread-local shadow storage for in-register va_arg function /// parameters (x86_64-specific). GlobalVariable *VAArgTLS; + /// \brief Thread-local shadow storage for va_arg overflow area /// (x86_64-specific). GlobalVariable *VAArgOverflowSizeTLS; + /// \brief Thread-local space used to pass origin value to the UMR reporting /// function. GlobalVariable *OriginTLS; /// \brief The run-time callback to print a warning. - Value *WarningFn; + Value *WarningFn = nullptr; + // These arrays are indexed by log2(AccessSize). Value *MaybeWarningFn[kNumberOfAccessSizes]; Value *MaybeStoreOriginFn[kNumberOfAccessSizes]; @@ -362,11 +422,14 @@ class MemorySanitizer : public FunctionPass { /// \brief Run-time helper that generates a new origin value for a stack /// allocation. Value *MsanSetAllocaOrigin4Fn; + /// \brief Run-time helper that poisons stack on function entry. Value *MsanPoisonStackFn; + /// \brief Run-time helper that records a store (or any event) of an /// uninitialized value and returns an updated origin id encoding this info. Value *MsanChainOriginFn; + /// \brief MSan runtime replacements for memmove, memcpy and memset. Value *MemmoveFn, *MemcpyFn, *MemsetFn; @@ -374,21 +437,20 @@ class MemorySanitizer : public FunctionPass { const MemoryMapParams *MapParams; MDNode *ColdCallWeights; + /// \brief Branch weights for origin store. MDNode *OriginStoreWeights; + /// \brief An empty volatile inline asm that prevents callback merge. InlineAsm *EmptyAsm; - Function *MsanCtorFunction; - friend struct MemorySanitizerVisitor; - friend struct VarArgAMD64Helper; - friend struct VarArgMIPS64Helper; - friend struct VarArgAArch64Helper; - friend struct VarArgPowerPC64Helper; + Function *MsanCtorFunction; }; -} // anonymous namespace + +} // end anonymous namespace char MemorySanitizer::ID = 0; + INITIALIZE_PASS_BEGIN( MemorySanitizer, "msan", "MemorySanitizer: detects uninitialized reads.", false, false) @@ -586,6 +648,8 @@ namespace { /// the function, and should avoid creating new basic blocks. A new /// instance of this class is created for each instrumented function. struct VarArgHelper { + virtual ~VarArgHelper() = default; + /// \brief Visit a CallSite. virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0; @@ -600,21 +664,22 @@ struct VarArgHelper { /// This method is called after visiting all interesting (see above) /// instructions in a function. virtual void finalizeInstrumentation() = 0; - - virtual ~VarArgHelper() {} }; struct MemorySanitizerVisitor; -VarArgHelper* -CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, - MemorySanitizerVisitor &Visitor); +} // end anonymous namespace + +static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + MemorySanitizerVisitor &Visitor); -unsigned TypeSizeToSizeIndex(unsigned TypeSize) { +static unsigned TypeSizeToSizeIndex(unsigned TypeSize) { if (TypeSize <= 8) return 0; return Log2_32_Ceil((TypeSize + 7) / 8); } +namespace { + /// This class does all the work for a given function. Store and Load /// instructions store and load corresponding shadow and origin /// values. Most instructions propagate shadow from arguments to their @@ -641,8 +706,9 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *Shadow; Value *Origin; Instruction *OrigIns; + ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I) - : Shadow(S), Origin(O), OrigIns(I) { } + : Shadow(S), Origin(O), OrigIns(I) {} }; SmallVector InstrumentationList; SmallVector StoreList; @@ -856,7 +922,6 @@ struct MemorySanitizerVisitor : public InstVisitor { for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); - // Finalize PHI nodes. for (PHINode *PN : ShadowPHINodes) { PHINode *PNS = cast(getShadow(PN)); @@ -1489,14 +1554,14 @@ struct MemorySanitizerVisitor : public InstVisitor { /// arguments are initialized. template class Combiner { - Value *Shadow; - Value *Origin; + Value *Shadow = nullptr; + Value *Origin = nullptr; IRBuilder<> &IRB; MemorySanitizerVisitor *MSV; public: - Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) : - Shadow(nullptr), Origin(nullptr), IRB(IRB), MSV(MSV) {} + Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) + : IRB(IRB), MSV(MSV) {} /// \brief Add a pair of shadow and origin values to the mix. Combiner &Add(Value *OpShadow, Value *OpOrigin) { @@ -1550,8 +1615,8 @@ struct MemorySanitizerVisitor : public InstVisitor { } }; - typedef Combiner ShadowAndOriginCombiner; - typedef Combiner OriginCombiner; + using ShadowAndOriginCombiner = Combiner; + using OriginCombiner = Combiner; /// \brief Propagate origin for arbitrary operation. void setOriginForNaryOp(Instruction &I) { @@ -2204,28 +2269,28 @@ struct MemorySanitizerVisitor : public InstVisitor { // intrinsic. Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) { switch (id) { - case llvm::Intrinsic::x86_sse2_packsswb_128: - case llvm::Intrinsic::x86_sse2_packuswb_128: - return llvm::Intrinsic::x86_sse2_packsswb_128; + case Intrinsic::x86_sse2_packsswb_128: + case Intrinsic::x86_sse2_packuswb_128: + return Intrinsic::x86_sse2_packsswb_128; - case llvm::Intrinsic::x86_sse2_packssdw_128: - case llvm::Intrinsic::x86_sse41_packusdw: - return llvm::Intrinsic::x86_sse2_packssdw_128; + case Intrinsic::x86_sse2_packssdw_128: + case Intrinsic::x86_sse41_packusdw: + return Intrinsic::x86_sse2_packssdw_128; - case llvm::Intrinsic::x86_avx2_packsswb: - case llvm::Intrinsic::x86_avx2_packuswb: - return llvm::Intrinsic::x86_avx2_packsswb; + case Intrinsic::x86_avx2_packsswb: + case Intrinsic::x86_avx2_packuswb: + return Intrinsic::x86_avx2_packsswb; - case llvm::Intrinsic::x86_avx2_packssdw: - case llvm::Intrinsic::x86_avx2_packusdw: - return llvm::Intrinsic::x86_avx2_packssdw; + case Intrinsic::x86_avx2_packssdw: + case Intrinsic::x86_avx2_packusdw: + return Intrinsic::x86_avx2_packssdw; - case llvm::Intrinsic::x86_mmx_packsswb: - case llvm::Intrinsic::x86_mmx_packuswb: - return llvm::Intrinsic::x86_mmx_packsswb; + case Intrinsic::x86_mmx_packsswb: + case Intrinsic::x86_mmx_packuswb: + return Intrinsic::x86_mmx_packsswb; - case llvm::Intrinsic::x86_mmx_packssdw: - return llvm::Intrinsic::x86_mmx_packssdw; + case Intrinsic::x86_mmx_packssdw: + return Intrinsic::x86_mmx_packssdw; default: llvm_unreachable("unexpected intrinsic id"); } @@ -2255,9 +2320,9 @@ struct MemorySanitizerVisitor : public InstVisitor { S2 = IRB.CreateBitCast(S2, T); } Value *S1_ext = IRB.CreateSExt( - IRB.CreateICmpNE(S1, llvm::Constant::getNullValue(T)), T); + IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T); Value *S2_ext = IRB.CreateSExt( - IRB.CreateICmpNE(S2, llvm::Constant::getNullValue(T)), T); + IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T); if (isX86_MMX) { Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C); S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy); @@ -2366,213 +2431,213 @@ struct MemorySanitizerVisitor : public InstVisitor { void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { - case llvm::Intrinsic::bswap: + case Intrinsic::bswap: handleBswap(I); break; - case llvm::Intrinsic::x86_sse_stmxcsr: + case Intrinsic::x86_sse_stmxcsr: handleStmxcsr(I); break; - case llvm::Intrinsic::x86_sse_ldmxcsr: + case Intrinsic::x86_sse_ldmxcsr: handleLdmxcsr(I); break; - case llvm::Intrinsic::x86_avx512_vcvtsd2usi64: - case llvm::Intrinsic::x86_avx512_vcvtsd2usi32: - case llvm::Intrinsic::x86_avx512_vcvtss2usi64: - case llvm::Intrinsic::x86_avx512_vcvtss2usi32: - case llvm::Intrinsic::x86_avx512_cvttss2usi64: - case llvm::Intrinsic::x86_avx512_cvttss2usi: - case llvm::Intrinsic::x86_avx512_cvttsd2usi64: - case llvm::Intrinsic::x86_avx512_cvttsd2usi: - case llvm::Intrinsic::x86_avx512_cvtusi2sd: - case llvm::Intrinsic::x86_avx512_cvtusi2ss: - case llvm::Intrinsic::x86_avx512_cvtusi642sd: - case llvm::Intrinsic::x86_avx512_cvtusi642ss: - case llvm::Intrinsic::x86_sse2_cvtsd2si64: - case llvm::Intrinsic::x86_sse2_cvtsd2si: - case llvm::Intrinsic::x86_sse2_cvtsd2ss: - case llvm::Intrinsic::x86_sse2_cvtsi2sd: - case llvm::Intrinsic::x86_sse2_cvtsi642sd: - case llvm::Intrinsic::x86_sse2_cvtss2sd: - case llvm::Intrinsic::x86_sse2_cvttsd2si64: - case llvm::Intrinsic::x86_sse2_cvttsd2si: - case llvm::Intrinsic::x86_sse_cvtsi2ss: - case llvm::Intrinsic::x86_sse_cvtsi642ss: - case llvm::Intrinsic::x86_sse_cvtss2si64: - case llvm::Intrinsic::x86_sse_cvtss2si: - case llvm::Intrinsic::x86_sse_cvttss2si64: - case llvm::Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_avx512_vcvtsd2usi64: + case Intrinsic::x86_avx512_vcvtsd2usi32: + case Intrinsic::x86_avx512_vcvtss2usi64: + case Intrinsic::x86_avx512_vcvtss2usi32: + case Intrinsic::x86_avx512_cvttss2usi64: + case Intrinsic::x86_avx512_cvttss2usi: + case Intrinsic::x86_avx512_cvttsd2usi64: + case Intrinsic::x86_avx512_cvttsd2usi: + case Intrinsic::x86_avx512_cvtusi2sd: + case Intrinsic::x86_avx512_cvtusi2ss: + case Intrinsic::x86_avx512_cvtusi642sd: + case Intrinsic::x86_avx512_cvtusi642ss: + case Intrinsic::x86_sse2_cvtsd2si64: + case Intrinsic::x86_sse2_cvtsd2si: + case Intrinsic::x86_sse2_cvtsd2ss: + case Intrinsic::x86_sse2_cvtsi2sd: + case Intrinsic::x86_sse2_cvtsi642sd: + case Intrinsic::x86_sse2_cvtss2sd: + case Intrinsic::x86_sse2_cvttsd2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse_cvtsi2ss: + case Intrinsic::x86_sse_cvtsi642ss: + case Intrinsic::x86_sse_cvtss2si64: + case Intrinsic::x86_sse_cvtss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse_cvttss2si: handleVectorConvertIntrinsic(I, 1); break; - case llvm::Intrinsic::x86_sse_cvtps2pi: - case llvm::Intrinsic::x86_sse_cvttps2pi: + case Intrinsic::x86_sse_cvtps2pi: + case Intrinsic::x86_sse_cvttps2pi: handleVectorConvertIntrinsic(I, 2); break; - case llvm::Intrinsic::x86_avx512_psll_w_512: - case llvm::Intrinsic::x86_avx512_psll_d_512: - case llvm::Intrinsic::x86_avx512_psll_q_512: - case llvm::Intrinsic::x86_avx512_pslli_w_512: - case llvm::Intrinsic::x86_avx512_pslli_d_512: - case llvm::Intrinsic::x86_avx512_pslli_q_512: - case llvm::Intrinsic::x86_avx512_psrl_w_512: - case llvm::Intrinsic::x86_avx512_psrl_d_512: - case llvm::Intrinsic::x86_avx512_psrl_q_512: - case llvm::Intrinsic::x86_avx512_psra_w_512: - case llvm::Intrinsic::x86_avx512_psra_d_512: - case llvm::Intrinsic::x86_avx512_psra_q_512: - case llvm::Intrinsic::x86_avx512_psrli_w_512: - case llvm::Intrinsic::x86_avx512_psrli_d_512: - case llvm::Intrinsic::x86_avx512_psrli_q_512: - case llvm::Intrinsic::x86_avx512_psrai_w_512: - case llvm::Intrinsic::x86_avx512_psrai_d_512: - case llvm::Intrinsic::x86_avx512_psrai_q_512: - case llvm::Intrinsic::x86_avx512_psra_q_256: - case llvm::Intrinsic::x86_avx512_psra_q_128: - case llvm::Intrinsic::x86_avx512_psrai_q_256: - case llvm::Intrinsic::x86_avx512_psrai_q_128: - case llvm::Intrinsic::x86_avx2_psll_w: - case llvm::Intrinsic::x86_avx2_psll_d: - case llvm::Intrinsic::x86_avx2_psll_q: - case llvm::Intrinsic::x86_avx2_pslli_w: - case llvm::Intrinsic::x86_avx2_pslli_d: - case llvm::Intrinsic::x86_avx2_pslli_q: - case llvm::Intrinsic::x86_avx2_psrl_w: - case llvm::Intrinsic::x86_avx2_psrl_d: - case llvm::Intrinsic::x86_avx2_psrl_q: - case llvm::Intrinsic::x86_avx2_psra_w: - case llvm::Intrinsic::x86_avx2_psra_d: - case llvm::Intrinsic::x86_avx2_psrli_w: - case llvm::Intrinsic::x86_avx2_psrli_d: - case llvm::Intrinsic::x86_avx2_psrli_q: - case llvm::Intrinsic::x86_avx2_psrai_w: - case llvm::Intrinsic::x86_avx2_psrai_d: - case llvm::Intrinsic::x86_sse2_psll_w: - case llvm::Intrinsic::x86_sse2_psll_d: - case llvm::Intrinsic::x86_sse2_psll_q: - case llvm::Intrinsic::x86_sse2_pslli_w: - case llvm::Intrinsic::x86_sse2_pslli_d: - case llvm::Intrinsic::x86_sse2_pslli_q: - case llvm::Intrinsic::x86_sse2_psrl_w: - case llvm::Intrinsic::x86_sse2_psrl_d: - case llvm::Intrinsic::x86_sse2_psrl_q: - case llvm::Intrinsic::x86_sse2_psra_w: - case llvm::Intrinsic::x86_sse2_psra_d: - case llvm::Intrinsic::x86_sse2_psrli_w: - case llvm::Intrinsic::x86_sse2_psrli_d: - case llvm::Intrinsic::x86_sse2_psrli_q: - case llvm::Intrinsic::x86_sse2_psrai_w: - case llvm::Intrinsic::x86_sse2_psrai_d: - case llvm::Intrinsic::x86_mmx_psll_w: - case llvm::Intrinsic::x86_mmx_psll_d: - case llvm::Intrinsic::x86_mmx_psll_q: - case llvm::Intrinsic::x86_mmx_pslli_w: - case llvm::Intrinsic::x86_mmx_pslli_d: - case llvm::Intrinsic::x86_mmx_pslli_q: - case llvm::Intrinsic::x86_mmx_psrl_w: - case llvm::Intrinsic::x86_mmx_psrl_d: - case llvm::Intrinsic::x86_mmx_psrl_q: - case llvm::Intrinsic::x86_mmx_psra_w: - case llvm::Intrinsic::x86_mmx_psra_d: - case llvm::Intrinsic::x86_mmx_psrli_w: - case llvm::Intrinsic::x86_mmx_psrli_d: - case llvm::Intrinsic::x86_mmx_psrli_q: - case llvm::Intrinsic::x86_mmx_psrai_w: - case llvm::Intrinsic::x86_mmx_psrai_d: + case Intrinsic::x86_avx512_psll_w_512: + case Intrinsic::x86_avx512_psll_d_512: + case Intrinsic::x86_avx512_psll_q_512: + case Intrinsic::x86_avx512_pslli_w_512: + case Intrinsic::x86_avx512_pslli_d_512: + case Intrinsic::x86_avx512_pslli_q_512: + case Intrinsic::x86_avx512_psrl_w_512: + case Intrinsic::x86_avx512_psrl_d_512: + case Intrinsic::x86_avx512_psrl_q_512: + case Intrinsic::x86_avx512_psra_w_512: + case Intrinsic::x86_avx512_psra_d_512: + case Intrinsic::x86_avx512_psra_q_512: + case Intrinsic::x86_avx512_psrli_w_512: + case Intrinsic::x86_avx512_psrli_d_512: + case Intrinsic::x86_avx512_psrli_q_512: + case Intrinsic::x86_avx512_psrai_w_512: + case Intrinsic::x86_avx512_psrai_d_512: + case Intrinsic::x86_avx512_psrai_q_512: + case Intrinsic::x86_avx512_psra_q_256: + case Intrinsic::x86_avx512_psra_q_128: + case Intrinsic::x86_avx512_psrai_q_256: + case Intrinsic::x86_avx512_psrai_q_128: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_avx2_psra_d: + case Intrinsic::x86_avx2_psrli_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_mmx_psll_w: + case Intrinsic::x86_mmx_psll_d: + case Intrinsic::x86_mmx_psll_q: + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrl_w: + case Intrinsic::x86_mmx_psrl_d: + case Intrinsic::x86_mmx_psrl_q: + case Intrinsic::x86_mmx_psra_w: + case Intrinsic::x86_mmx_psra_d: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: handleVectorShiftIntrinsic(I, /* Variable */ false); break; - case llvm::Intrinsic::x86_avx2_psllv_d: - case llvm::Intrinsic::x86_avx2_psllv_d_256: - case llvm::Intrinsic::x86_avx512_psllv_d_512: - case llvm::Intrinsic::x86_avx2_psllv_q: - case llvm::Intrinsic::x86_avx2_psllv_q_256: - case llvm::Intrinsic::x86_avx512_psllv_q_512: - case llvm::Intrinsic::x86_avx2_psrlv_d: - case llvm::Intrinsic::x86_avx2_psrlv_d_256: - case llvm::Intrinsic::x86_avx512_psrlv_d_512: - case llvm::Intrinsic::x86_avx2_psrlv_q: - case llvm::Intrinsic::x86_avx2_psrlv_q_256: - case llvm::Intrinsic::x86_avx512_psrlv_q_512: - case llvm::Intrinsic::x86_avx2_psrav_d: - case llvm::Intrinsic::x86_avx2_psrav_d_256: - case llvm::Intrinsic::x86_avx512_psrav_d_512: - case llvm::Intrinsic::x86_avx512_psrav_q_128: - case llvm::Intrinsic::x86_avx512_psrav_q_256: - case llvm::Intrinsic::x86_avx512_psrav_q_512: + case Intrinsic::x86_avx2_psllv_d: + case Intrinsic::x86_avx2_psllv_d_256: + case Intrinsic::x86_avx512_psllv_d_512: + case Intrinsic::x86_avx2_psllv_q: + case Intrinsic::x86_avx2_psllv_q_256: + case Intrinsic::x86_avx512_psllv_q_512: + case Intrinsic::x86_avx2_psrlv_d: + case Intrinsic::x86_avx2_psrlv_d_256: + case Intrinsic::x86_avx512_psrlv_d_512: + case Intrinsic::x86_avx2_psrlv_q: + case Intrinsic::x86_avx2_psrlv_q_256: + case Intrinsic::x86_avx512_psrlv_q_512: + case Intrinsic::x86_avx2_psrav_d: + case Intrinsic::x86_avx2_psrav_d_256: + case Intrinsic::x86_avx512_psrav_d_512: + case Intrinsic::x86_avx512_psrav_q_128: + case Intrinsic::x86_avx512_psrav_q_256: + case Intrinsic::x86_avx512_psrav_q_512: handleVectorShiftIntrinsic(I, /* Variable */ true); break; - case llvm::Intrinsic::x86_sse2_packsswb_128: - case llvm::Intrinsic::x86_sse2_packssdw_128: - case llvm::Intrinsic::x86_sse2_packuswb_128: - case llvm::Intrinsic::x86_sse41_packusdw: - case llvm::Intrinsic::x86_avx2_packsswb: - case llvm::Intrinsic::x86_avx2_packssdw: - case llvm::Intrinsic::x86_avx2_packuswb: - case llvm::Intrinsic::x86_avx2_packusdw: + case Intrinsic::x86_sse2_packsswb_128: + case Intrinsic::x86_sse2_packssdw_128: + case Intrinsic::x86_sse2_packuswb_128: + case Intrinsic::x86_sse41_packusdw: + case Intrinsic::x86_avx2_packsswb: + case Intrinsic::x86_avx2_packssdw: + case Intrinsic::x86_avx2_packuswb: + case Intrinsic::x86_avx2_packusdw: handleVectorPackIntrinsic(I); break; - case llvm::Intrinsic::x86_mmx_packsswb: - case llvm::Intrinsic::x86_mmx_packuswb: + case Intrinsic::x86_mmx_packsswb: + case Intrinsic::x86_mmx_packuswb: handleVectorPackIntrinsic(I, 16); break; - case llvm::Intrinsic::x86_mmx_packssdw: + case Intrinsic::x86_mmx_packssdw: handleVectorPackIntrinsic(I, 32); break; - case llvm::Intrinsic::x86_mmx_psad_bw: - case llvm::Intrinsic::x86_sse2_psad_bw: - case llvm::Intrinsic::x86_avx2_psad_bw: + case Intrinsic::x86_mmx_psad_bw: + case Intrinsic::x86_sse2_psad_bw: + case Intrinsic::x86_avx2_psad_bw: handleVectorSadIntrinsic(I); break; - case llvm::Intrinsic::x86_sse2_pmadd_wd: - case llvm::Intrinsic::x86_avx2_pmadd_wd: - case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw_128: - case llvm::Intrinsic::x86_avx2_pmadd_ub_sw: + case Intrinsic::x86_sse2_pmadd_wd: + case Intrinsic::x86_avx2_pmadd_wd: + case Intrinsic::x86_ssse3_pmadd_ub_sw_128: + case Intrinsic::x86_avx2_pmadd_ub_sw: handleVectorPmaddIntrinsic(I); break; - case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw: + case Intrinsic::x86_ssse3_pmadd_ub_sw: handleVectorPmaddIntrinsic(I, 8); break; - case llvm::Intrinsic::x86_mmx_pmadd_wd: + case Intrinsic::x86_mmx_pmadd_wd: handleVectorPmaddIntrinsic(I, 16); break; - case llvm::Intrinsic::x86_sse_cmp_ss: - case llvm::Intrinsic::x86_sse2_cmp_sd: - case llvm::Intrinsic::x86_sse_comieq_ss: - case llvm::Intrinsic::x86_sse_comilt_ss: - case llvm::Intrinsic::x86_sse_comile_ss: - case llvm::Intrinsic::x86_sse_comigt_ss: - case llvm::Intrinsic::x86_sse_comige_ss: - case llvm::Intrinsic::x86_sse_comineq_ss: - case llvm::Intrinsic::x86_sse_ucomieq_ss: - case llvm::Intrinsic::x86_sse_ucomilt_ss: - case llvm::Intrinsic::x86_sse_ucomile_ss: - case llvm::Intrinsic::x86_sse_ucomigt_ss: - case llvm::Intrinsic::x86_sse_ucomige_ss: - case llvm::Intrinsic::x86_sse_ucomineq_ss: - case llvm::Intrinsic::x86_sse2_comieq_sd: - case llvm::Intrinsic::x86_sse2_comilt_sd: - case llvm::Intrinsic::x86_sse2_comile_sd: - case llvm::Intrinsic::x86_sse2_comigt_sd: - case llvm::Intrinsic::x86_sse2_comige_sd: - case llvm::Intrinsic::x86_sse2_comineq_sd: - case llvm::Intrinsic::x86_sse2_ucomieq_sd: - case llvm::Intrinsic::x86_sse2_ucomilt_sd: - case llvm::Intrinsic::x86_sse2_ucomile_sd: - case llvm::Intrinsic::x86_sse2_ucomigt_sd: - case llvm::Intrinsic::x86_sse2_ucomige_sd: - case llvm::Intrinsic::x86_sse2_ucomineq_sd: + case Intrinsic::x86_sse_cmp_ss: + case Intrinsic::x86_sse2_cmp_sd: + case Intrinsic::x86_sse_comieq_ss: + case Intrinsic::x86_sse_comilt_ss: + case Intrinsic::x86_sse_comile_ss: + case Intrinsic::x86_sse_comigt_ss: + case Intrinsic::x86_sse_comige_ss: + case Intrinsic::x86_sse_comineq_ss: + case Intrinsic::x86_sse_ucomieq_ss: + case Intrinsic::x86_sse_ucomilt_ss: + case Intrinsic::x86_sse_ucomile_ss: + case Intrinsic::x86_sse_ucomigt_ss: + case Intrinsic::x86_sse_ucomige_ss: + case Intrinsic::x86_sse_ucomineq_ss: + case Intrinsic::x86_sse2_comieq_sd: + case Intrinsic::x86_sse2_comilt_sd: + case Intrinsic::x86_sse2_comile_sd: + case Intrinsic::x86_sse2_comigt_sd: + case Intrinsic::x86_sse2_comige_sd: + case Intrinsic::x86_sse2_comineq_sd: + case Intrinsic::x86_sse2_ucomieq_sd: + case Intrinsic::x86_sse2_ucomilt_sd: + case Intrinsic::x86_sse2_ucomile_sd: + case Intrinsic::x86_sse2_ucomigt_sd: + case Intrinsic::x86_sse2_ucomige_sd: + case Intrinsic::x86_sse2_ucomineq_sd: handleVectorCompareScalarIntrinsic(I); break; - case llvm::Intrinsic::x86_sse_cmp_ps: - case llvm::Intrinsic::x86_sse2_cmp_pd: + case Intrinsic::x86_sse_cmp_ps: + case Intrinsic::x86_sse2_cmp_pd: // FIXME: For x86_avx_cmp_pd_256 and x86_avx_cmp_ps_256 this function // generates reasonably looking IR that fails in the backend with "Do not // know how to split the result of this operator!". @@ -2939,18 +3004,16 @@ struct VarArgAMD64Helper : public VarArgHelper { Function &F; MemorySanitizer &MS; MemorySanitizerVisitor &MSV; - Value *VAArgTLSCopy; - Value *VAArgOverflowSize; + Value *VAArgTLSCopy = nullptr; + Value *VAArgOverflowSize = nullptr; SmallVector VAStartInstrumentationList; - VarArgAMD64Helper(Function &F, MemorySanitizer &MS, - MemorySanitizerVisitor &MSV) - : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), - VAArgOverflowSize(nullptr) {} - enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; + VarArgAMD64Helper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} + ArgKind classifyArgument(Value* arg) { // A very rough approximation of X86_64 argument classification rules. Type *T = arg->getType(); @@ -3119,15 +3182,13 @@ struct VarArgMIPS64Helper : public VarArgHelper { Function &F; MemorySanitizer &MS; MemorySanitizerVisitor &MSV; - Value *VAArgTLSCopy; - Value *VAArgSize; + Value *VAArgTLSCopy = nullptr; + Value *VAArgSize = nullptr; SmallVector VAStartInstrumentationList; VarArgMIPS64Helper(Function &F, MemorySanitizer &MS, - MemorySanitizerVisitor &MSV) - : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), - VAArgSize(nullptr) {} + MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { unsigned VAArgOffset = 0; @@ -3135,11 +3196,11 @@ struct VarArgMIPS64Helper : public VarArgHelper { for (CallSite::arg_iterator ArgIt = CS.arg_begin() + CS.getFunctionType()->getNumParams(), End = CS.arg_end(); ArgIt != End; ++ArgIt) { - llvm::Triple TargetTriple(F.getParent()->getTargetTriple()); + Triple TargetTriple(F.getParent()->getTargetTriple()); Value *A = *ArgIt; Value *Base; uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); - if (TargetTriple.getArch() == llvm::Triple::mips64) { + if (TargetTriple.getArch() == Triple::mips64) { // Adjusting the shadow for argument with size < 8 to match the placement // of bits in big endian system if (ArgSize < 8) @@ -3217,7 +3278,6 @@ struct VarArgMIPS64Helper : public VarArgHelper { } }; - /// \brief AArch64-specific implementation of VarArgHelper. struct VarArgAArch64Helper : public VarArgHelper { static const unsigned kAArch64GrArgSize = 64; @@ -3234,18 +3294,16 @@ struct VarArgAArch64Helper : public VarArgHelper { Function &F; MemorySanitizer &MS; MemorySanitizerVisitor &MSV; - Value *VAArgTLSCopy; - Value *VAArgOverflowSize; + Value *VAArgTLSCopy = nullptr; + Value *VAArgOverflowSize = nullptr; SmallVector VAStartInstrumentationList; - VarArgAArch64Helper(Function &F, MemorySanitizer &MS, - MemorySanitizerVisitor &MSV) - : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), - VAArgOverflowSize(nullptr) {} - enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; + VarArgAArch64Helper(Function &F, MemorySanitizer &MS, + MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} + ArgKind classifyArgument(Value* arg) { Type *T = arg->getType(); if (T->isFPOrFPVectorTy()) @@ -3468,15 +3526,13 @@ struct VarArgPowerPC64Helper : public VarArgHelper { Function &F; MemorySanitizer &MS; MemorySanitizerVisitor &MSV; - Value *VAArgTLSCopy; - Value *VAArgSize; + Value *VAArgTLSCopy = nullptr; + Value *VAArgSize = nullptr; SmallVector VAStartInstrumentationList; VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS, - MemorySanitizerVisitor &MSV) - : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), - VAArgSize(nullptr) {} + MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override { // For PowerPC, we need to deal with alignment of stack arguments - @@ -3486,12 +3542,12 @@ struct VarArgPowerPC64Helper : public VarArgHelper { // compute current offset from stack pointer (which is always properly // aligned), and offset for the first vararg, then subtract them. unsigned VAArgBase; - llvm::Triple TargetTriple(F.getParent()->getTargetTriple()); + Triple TargetTriple(F.getParent()->getTargetTriple()); // Parameter save area starts at 48 bytes from frame pointer for ABIv1, // and 32 bytes for ABIv2. This is usually determined by target // endianness, but in theory could be overriden by function attribute. // For simplicity, we ignore it here (it'd only matter for QPX vectors). - if (TargetTriple.getArch() == llvm::Triple::ppc64) + if (TargetTriple.getArch() == Triple::ppc64) VAArgBase = 48; else VAArgBase = 32; @@ -3634,27 +3690,27 @@ struct VarArgNoOpHelper : public VarArgHelper { void finalizeInstrumentation() override {} }; -VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, - MemorySanitizerVisitor &Visitor) { +} // end anonymous namespace + +static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + MemorySanitizerVisitor &Visitor) { // VarArg handling is only implemented on AMD64. False positives are possible // on other platforms. - llvm::Triple TargetTriple(Func.getParent()->getTargetTriple()); - if (TargetTriple.getArch() == llvm::Triple::x86_64) + Triple TargetTriple(Func.getParent()->getTargetTriple()); + if (TargetTriple.getArch() == Triple::x86_64) return new VarArgAMD64Helper(Func, Msan, Visitor); - else if (TargetTriple.getArch() == llvm::Triple::mips64 || - TargetTriple.getArch() == llvm::Triple::mips64el) + else if (TargetTriple.getArch() == Triple::mips64 || + TargetTriple.getArch() == Triple::mips64el) return new VarArgMIPS64Helper(Func, Msan, Visitor); - else if (TargetTriple.getArch() == llvm::Triple::aarch64) + else if (TargetTriple.getArch() == Triple::aarch64) return new VarArgAArch64Helper(Func, Msan, Visitor); - else if (TargetTriple.getArch() == llvm::Triple::ppc64 || - TargetTriple.getArch() == llvm::Triple::ppc64le) + else if (TargetTriple.getArch() == Triple::ppc64 || + TargetTriple.getArch() == Triple::ppc64le) return new VarArgPowerPC64Helper(Func, Msan, Visitor); else return new VarArgNoOpHelper(Func, Msan, Visitor); } -} // anonymous namespace - bool MemorySanitizer::runOnFunction(Function &F) { if (&F == MsanCtorFunction) return false; diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index d14ab9db7ecdc..11a43e803a99e 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -1,4 +1,4 @@ -//===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===// +//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===// // // The LLVM Compiler Infrastructure // @@ -50,37 +50,69 @@ #include "llvm/Transforms/PGOInstrumentation.h" #include "CFGMST.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/IndirectCallSiteVisitor.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/ProfileSummary.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/InstrProfReader.h" -#include "llvm/ProfileData/ProfileCommon.h" #include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/JamCRC.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include +#include +#include +#include +#include #include #include #include @@ -202,11 +234,9 @@ extern cl::opt PGOViewCounts; // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= extern cl::opt ViewBlockFreqFuncName; -namespace { - // Return a string describing the branch condition that can be // used in static branch probability heuristics: -std::string getBranchCondString(Instruction *TI) { +static std::string getBranchCondString(Instruction *TI) { BranchInst *BI = dyn_cast(TI); if (!BI || !BI->isConditional()) return std::string(); @@ -237,6 +267,8 @@ std::string getBranchCondString(Instruction *TI) { return result; } +namespace { + /// The select instruction visitor plays three roles specified /// by the mode. In \c VM_counting mode, it simply counts the number of /// select instructions. In \c VM_instrument mode, it inserts code to count @@ -263,6 +295,7 @@ struct SelectInstVisitor : public InstVisitor { Mode = VM_counting; visit(Func); } + // Visit the IR stream and instrument all select instructions. \p // Ind is a pointer to the counter index variable; \p TotalNC // is the total number of counters; \p FNV is the pointer to the @@ -287,8 +320,10 @@ struct SelectInstVisitor : public InstVisitor { void instrumentOneSelectInst(SelectInst &SI); void annotateOneSelectInst(SelectInst &SI); + // Visit \p SI instruction and perform tasks according to visit mode. void visitSelectInst(SelectInst &SI); + // Return the number of select instructions. This needs be called after // countSelects(). unsigned getNumOfSelectInsts() const { return NSIs; } @@ -332,8 +367,10 @@ struct MemIntrinsicVisitor : public InstVisitor { // Visit the IR stream and annotate all mem intrinsic call instructions. void instrumentOneMemIntrinsic(MemIntrinsic &MI); + // Visit \p MI instruction and perform tasks according to visit mode. void visitMemIntrinsic(MemIntrinsic &SI); + unsigned getNumOfMemIntrinsics() const { return NMemIs; } }; @@ -375,6 +412,7 @@ class PGOInstrumentationUseLegacyPass : public ModulePass { std::string ProfileFileName; bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); } @@ -383,6 +421,7 @@ class PGOInstrumentationUseLegacyPass : public ModulePass { } // end anonymous namespace char PGOInstrumentationGenLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) @@ -395,6 +434,7 @@ ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { } char PGOInstrumentationUseLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use", "Read PGO instrumentation profile.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) @@ -407,6 +447,7 @@ ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { } namespace { + /// \brief An MST based instrumentation for PGO /// /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO @@ -417,12 +458,13 @@ struct PGOEdge { const BasicBlock *SrcBB; const BasicBlock *DestBB; uint64_t Weight; - bool InMST; - bool Removed; - bool IsCritical; + bool InMST = false; + bool Removed = false; + bool IsCritical = false; + PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) - : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false), - IsCritical(false) {} + : SrcBB(Src), DestBB(Dest), Weight(W) {} + // Return the information string of an edge. const std::string infoString() const { return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + @@ -434,9 +476,9 @@ struct PGOEdge { struct BBInfo { BBInfo *Group; uint32_t Index; - uint32_t Rank; + uint32_t Rank = 0; - BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {} + BBInfo(unsigned IX) : Group(this), Index(IX) {} // Return the information string of this object. const std::string infoString() const { @@ -448,19 +490,22 @@ struct BBInfo { template class FuncPGOInstrumentation { private: Function &F; - void computeCFGHash(); - void renameComdatFunction(); + // A map that stores the Comdat group in function F. std::unordered_multimap &ComdatMembers; + void computeCFGHash(); + void renameComdatFunction(); + public: std::vector> ValueSites; SelectInstVisitor SIVisitor; MemIntrinsicVisitor MIVisitor; std::string FuncName; GlobalVariable *FuncNameVar; + // CFG hash value for this function. - uint64_t FunctionHash; + uint64_t FunctionHash = 0; // The Minimum Spanning Tree of function CFG. CFGMST MST; @@ -487,8 +532,7 @@ template class FuncPGOInstrumentation { bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr) : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), - SIVisitor(Func), MIVisitor(Func), FunctionHash(0), MST(F, BPI, BFI) { - + SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); MIVisitor.countMemIntrinsics(Func); @@ -499,7 +543,7 @@ template class FuncPGOInstrumentation { FuncName = getPGOFuncName(F); computeCFGHash(); - if (ComdatMembers.size()) + if (!ComdatMembers.empty()) renameComdatFunction(); DEBUG(dumpInfo("after CFGMST")); @@ -527,6 +571,8 @@ template class FuncPGOInstrumentation { } }; +} // end anonymous namespace + // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index // value of each BB in the CFG. The higher 32 bits record the number of edges. template @@ -686,7 +732,7 @@ static void instrumentOneFunc( "Cannot get the Instrumentation point"); Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), - {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), + {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), Builder.getInt32(I++)}); } @@ -710,7 +756,7 @@ static void instrumentOneFunc( "Cannot get the Instrumentation point"); Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), - {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), + {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), Builder.getInt64(FuncInfo.FunctionHash), Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()), Builder.getInt32(IPVK_IndirectCallTarget), @@ -723,12 +769,15 @@ static void instrumentOneFunc( F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash); } +namespace { + // This class represents a CFG edge in profile use compilation. struct PGOUseEdge : public PGOEdge { - bool CountValid; - uint64_t CountValue; + bool CountValid = false; + uint64_t CountValue = 0; + PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) - : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {} + : PGOEdge(Src, Dest, W) {} // Set edge count value void setEdgeCount(uint64_t Value) { @@ -745,22 +794,21 @@ struct PGOUseEdge : public PGOEdge { } }; -typedef SmallVector DirectEdges; +using DirectEdges = SmallVector; // This class stores the auxiliary information for each BB. struct UseBBInfo : public BBInfo { - uint64_t CountValue; + uint64_t CountValue = 0; bool CountValid; - int32_t UnknownCountInEdge; - int32_t UnknownCountOutEdge; + int32_t UnknownCountInEdge = 0; + int32_t UnknownCountOutEdge = 0; DirectEdges InEdges; DirectEdges OutEdges; - UseBBInfo(unsigned IX) - : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0), - UnknownCountOutEdge(0) {} + + UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {} + UseBBInfo(unsigned IX, uint64_t C) - : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0), - UnknownCountOutEdge(0) {} + : BBInfo(IX), CountValue(C), CountValid(true) {} // Set the profile count value for this BB. void setBBInfoCount(uint64_t Value) { @@ -776,6 +824,8 @@ struct UseBBInfo : public BBInfo { } }; +} // end anonymous namespace + // Sum up the count values for all the edges. static uint64_t sumEdgeCount(const ArrayRef Edges) { uint64_t Total = 0; @@ -787,6 +837,8 @@ static uint64_t sumEdgeCount(const ArrayRef Edges) { return Total; } +namespace { + class PGOUseFunc { public: PGOUseFunc(Function &Func, Module *Modu, @@ -794,7 +846,7 @@ class PGOUseFunc { BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr) : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI), - CountPosition(0), ProfileCountSize(0), FreqAttr(FFA_Normal) {} + FreqAttr(FFA_Normal) {} // Read counts for the instrumented BB from profile. bool readCounters(IndexedInstrProfReader *PGOReader); @@ -819,6 +871,7 @@ class PGOUseFunc { // Return the function hash. uint64_t getFuncHash() const { return FuncInfo.FunctionHash; } + // Return the profile record for this function; InstrProfRecord &getProfileRecord() { return ProfileRecord; } @@ -841,6 +894,7 @@ class PGOUseFunc { private: Function &F; Module *M; + // This member stores the shared information with class PGOGenFunc. FuncPGOInstrumentation FuncInfo; @@ -849,10 +903,10 @@ class PGOUseFunc { uint64_t ProgramMaxCount; // Position of counter that remains to be read. - uint32_t CountPosition; + uint32_t CountPosition = 0; // Total size of the profile count for this function. - uint32_t ProfileCountSize; + uint32_t ProfileCountSize = 0; // ProfileRecord for this function. InstrProfRecord ProfileRecord; @@ -887,11 +941,12 @@ class PGOUseFunc { } }; +} // end anonymous namespace + // Visit all the edges and assign the count value for the instrumented // edges and the BB. void PGOUseFunc::setInstrumentedCounts( const std::vector &CountFromProfile) { - assert(FuncInfo.getNumCounters() == CountFromProfile.size()); // Use a worklist as we will update the vector during the iteration. std::vector WorkList; @@ -1136,7 +1191,7 @@ void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) { auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty); Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step), - {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step}); ++(*CurCtrIdx); @@ -1191,7 +1246,7 @@ void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) { assert(!dyn_cast(Length)); Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile), - {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty), Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)}); ++CurCtrId; @@ -1257,7 +1312,6 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) { ValueSiteIndex++; } } -} // end anonymous namespace // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime // aware this is an ir_level profile so it can set the version flag. @@ -1327,7 +1381,6 @@ bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) { PreservedAnalyses PGOInstrumentationGen::run(Module &M, ModuleAnalysisManager &AM) { - auto &FAM = AM.getResult(M).getManager(); auto LookupBPI = [&FAM](Function &F) { return &FAM.getResult(F); @@ -1428,12 +1481,12 @@ static bool annotateAllFunctions( // can affect the BranchProbabilityInfo of any callers, resulting in an // inconsistent MST between prof-gen and prof-use. for (auto &F : HotFunctions) { - F->addFnAttr(llvm::Attribute::InlineHint); + F->addFnAttr(Attribute::InlineHint); DEBUG(dbgs() << "Set inline attribute to function: " << F->getName() << "\n"); } for (auto &F : ColdFunctions) { - F->addFnAttr(llvm::Attribute::Cold); + F->addFnAttr(Attribute::Cold); DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n"); } return true; @@ -1477,9 +1530,19 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) { return annotateAllFunctions(M, ProfileFileName, LookupBPI, LookupBFI); } -namespace llvm { -void setProfMetadata(Module *M, Instruction *TI, ArrayRef EdgeCounts, - uint64_t MaxCount) { +static std::string getSimpleNodeName(const BasicBlock *Node) { + if (!Node->getName().empty()) + return Node->getName(); + + std::string SimpleNodeName; + raw_string_ostream OS(SimpleNodeName); + Node->printAsOperand(OS, false); + return OS.str(); +} + +void llvm::setProfMetadata(Module *M, Instruction *TI, + ArrayRef EdgeCounts, + uint64_t MaxCount) { MDBuilder MDB(M->getContext()); assert(MaxCount > 0 && "Bad max count"); uint64_t Scale = calculateCountScale(MaxCount); @@ -1490,7 +1553,7 @@ void setProfMetadata(Module *M, Instruction *TI, ArrayRef EdgeCounts, DEBUG(dbgs() << "Weight is: "; for (const auto &W : Weights) { dbgs() << W << " "; } dbgs() << "\n";); - TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); + TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); if (EmitBranchProbability) { std::string BrCondStr = getBranchCondString(TI); if (BrCondStr.empty()) @@ -1510,41 +1573,39 @@ void setProfMetadata(Module *M, Instruction *TI, ArrayRef EdgeCounts, OS.flush(); Function *F = TI->getParent()->getParent(); OptimizationRemarkEmitter ORE(F); - ORE.emit(OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) - << BrCondStr << " is true with probability : " << BranchProbStr); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI) + << BrCondStr << " is true with probability : " << BranchProbStr; + }); } } +namespace llvm { + template <> struct GraphTraits { - typedef const BasicBlock *NodeRef; - typedef succ_const_iterator ChildIteratorType; - typedef pointer_iterator nodes_iterator; + using NodeRef = const BasicBlock *; + using ChildIteratorType = succ_const_iterator; + using nodes_iterator = pointer_iterator; static NodeRef getEntryNode(const PGOUseFunc *G) { return &G->getFunc().front(); } + static ChildIteratorType child_begin(const NodeRef N) { return succ_begin(N); } + static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); } + static nodes_iterator nodes_begin(const PGOUseFunc *G) { return nodes_iterator(G->getFunc().begin()); } + static nodes_iterator nodes_end(const PGOUseFunc *G) { return nodes_iterator(G->getFunc().end()); } }; -static std::string getSimpleNodeName(const BasicBlock *Node) { - if (!Node->getName().empty()) - return Node->getName(); - - std::string SimpleNodeName; - raw_string_ostream OS(SimpleNodeName); - Node->printAsOperand(OS, false); - return OS.str(); -} - template <> struct DOTGraphTraits : DefaultDOTGraphTraits { explicit DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} @@ -1584,4 +1645,5 @@ template <> struct DOTGraphTraits : DefaultDOTGraphTraits { return Result; } }; -} // namespace llvm + +} // end namespace llvm diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index b288c1f39ba32..95eb3680403a8 100644 --- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -21,7 +21,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" @@ -382,14 +382,14 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) { DEBUG(dbgs() << *DefaultBB << "\n"); DEBUG(dbgs() << *MergeBB << "\n"); - { + ORE.emit([&]() { using namespace ore; - ORE.emit(OptimizationRemark(DEBUG_TYPE, "memopt-opt", MI) + return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MI) << "optimized " << NV("Intrinsic", StringRef(getMIName(MI))) << " with count " << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount) << " for " << NV("Versions", Version) - << " versions"); - } + << " versions"; + }); return true; } diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 8c0a90843ef4e..6692d950da2ad 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -808,9 +808,14 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { // If Arg is a PHI, and one or more incoming values to the // PHI are null, and the call is control-equivalent to the PHI, and there - // are no relevant side effects between the PHI and the call, the call - // could be pushed up to just those paths with non-null incoming values. - // For now, don't bother splitting critical edges for this. + // are no relevant side effects between the PHI and the call, and the call + // is not a release that doesn't have the clang.imprecise_release tag, the + // call could be pushed up to just those paths with non-null incoming + // values. For now, don't bother splitting critical edges for this. + if (Class == ARCInstKind::Release && + !Inst->getMetadata(MDKindCache.get(ARCMDKindID::ImpreciseRelease))) + continue; + SmallVector, 4> Worklist; Worklist.push_back(std::make_pair(Inst, Arg)); do { diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp index d13e941044f14..c512ff584a176 100644 --- a/lib/Transforms/ObjCARC/PtrState.cpp +++ b/lib/Transforms/ObjCARC/PtrState.cpp @@ -250,10 +250,14 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst, // If this is an invoke instruction, we're scanning it as part of // one of its successor blocks, since we can't insert code after it // in its own block, and we don't want to split critical edges. - if (isa(Inst)) - InsertReverseInsertPt(&*BB->getFirstInsertionPt()); - else - InsertReverseInsertPt(&*++Inst->getIterator()); + BasicBlock::iterator InsertAfter; + if (isa(Inst)) { + const auto IP = BB->getFirstInsertionPt(); + InsertAfter = IP == BB->end() ? std::prev(BB->end()) : IP; + } else { + InsertAfter = std::next(Inst->getIterator()); + } + InsertReverseInsertPt(&*InsertAfter); }; // Check for possible direct uses. diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index c47e904692dcf..f04d0f05ffc7e 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -15,8 +15,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/ADCE.h" - +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -27,14 +28,29 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "adce" @@ -53,10 +69,12 @@ static cl::opt RemoveLoops("adce-remove-loops", cl::init(false), cl::Hidden); namespace { + /// Information about Instructions struct InstInfoType { /// True if the associated instruction is live. bool Live = false; + /// Quick access to information for block containing associated Instruction. struct BlockInfoType *Block = nullptr; }; @@ -65,10 +83,13 @@ struct InstInfoType { struct BlockInfoType { /// True when this block contains a live instructions. bool Live = false; + /// True when this block ends in an unconditional branch. bool UnconditionalBranch = false; + /// True when this block is known to have live PHI nodes. bool HasLivePhiNodes = false; + /// Control dependence sources need to be live for this block. bool CFLive = false; @@ -76,8 +97,6 @@ struct BlockInfoType { /// holds the value &InstInfo[Terminator] InstInfoType *TerminatorLiveInfo = nullptr; - bool terminatorIsLive() const { return TerminatorLiveInfo->Live; } - /// Corresponding BasicBlock. BasicBlock *BB = nullptr; @@ -86,6 +105,8 @@ struct BlockInfoType { /// Post-order numbering of reverse control flow graph. unsigned PostOrder; + + bool terminatorIsLive() const { return TerminatorLiveInfo->Live; } }; class AggressiveDeadCodeElimination { @@ -107,6 +128,7 @@ class AggressiveDeadCodeElimination { /// Instructions known to be live where we need to mark /// reaching definitions as live. SmallVector Worklist; + /// Debug info scopes around a live instruction. SmallPtrSet AliveScopes; @@ -121,15 +143,19 @@ class AggressiveDeadCodeElimination { /// Set up auxiliary data structures for Instructions and BasicBlocks and /// initialize the Worklist to the set of must-be-live Instruscions. void initialize(); + /// Return true for operations which are always treated as live. bool isAlwaysLive(Instruction &I); + /// Return true for instrumentation instructions for value profiling. bool isInstrumentsConstant(Instruction &I); /// Propagate liveness to reaching definitions. void markLiveInstructions(); + /// Mark an instruction as live. void markLive(Instruction *I); + /// Mark a block as live. void markLive(BlockInfoType &BB); void markLive(BasicBlock *BB) { markLive(BlockInfo[BB]); } @@ -162,12 +188,14 @@ class AggressiveDeadCodeElimination { void makeUnconditional(BasicBlock *BB, BasicBlock *Target); public: - AggressiveDeadCodeElimination(Function &F, DominatorTree &DT, - PostDominatorTree &PDT) - : F(F), DT(DT), PDT(PDT) {} - bool performDeadCodeElimination(); + AggressiveDeadCodeElimination(Function &F, DominatorTree &DT, + PostDominatorTree &PDT) + : F(F), DT(DT), PDT(PDT) {} + + bool performDeadCodeElimination(); }; -} + +} // end anonymous namespace bool AggressiveDeadCodeElimination::performDeadCodeElimination() { initialize(); @@ -181,7 +209,6 @@ static bool isUnconditionalBranch(TerminatorInst *Term) { } void AggressiveDeadCodeElimination::initialize() { - auto NumBlocks = F.size(); // We will have an entry in the map for each block so we grow the @@ -223,7 +250,8 @@ void AggressiveDeadCodeElimination::initialize() { // to recording which nodes have been visited we also record whether // a node is currently on the "stack" of active ancestors of the current // node. - typedef DenseMap StatusMap ; + using StatusMap = DenseMap; + class DFState : public StatusMap { public: std::pair insert(BasicBlock *BB) { @@ -320,7 +348,6 @@ bool AggressiveDeadCodeElimination::isInstrumentsConstant(Instruction &I) { } void AggressiveDeadCodeElimination::markLiveInstructions() { - // Propagate liveness backwards to operands. do { // Worklist holds newly discovered live instructions @@ -345,7 +372,6 @@ void AggressiveDeadCodeElimination::markLiveInstructions() { } void AggressiveDeadCodeElimination::markLive(Instruction *I) { - auto &Info = InstInfo[I]; if (Info.Live) return; @@ -432,7 +458,6 @@ void AggressiveDeadCodeElimination::markPhiLive(PHINode *PN) { } void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() { - if (BlocksWithDeadTerminators.empty()) return; @@ -471,7 +496,6 @@ void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() { // //===----------------------------------------------------------------------===// bool AggressiveDeadCodeElimination::removeDeadInstructions() { - // Updates control and dataflow around dead blocks updateDeadRegions(); @@ -529,7 +553,6 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() { // A dead region is the set of dead blocks with a common live post-dominator. void AggressiveDeadCodeElimination::updateDeadRegions() { - DEBUG({ dbgs() << "final dead terminator blocks: " << '\n'; for (auto *BB : BlocksWithDeadTerminators) @@ -597,7 +620,6 @@ void AggressiveDeadCodeElimination::updateDeadRegions() { // reverse top-sort order void AggressiveDeadCodeElimination::computeReversePostOrder() { - // This provides a post-order numbering of the reverse control flow graph // Note that it is incomplete in the presence of infinite loops but we don't // need numbers blocks which don't reach the end of the functions since @@ -660,8 +682,10 @@ PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &FAM) { } namespace { + struct ADCELegacyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid + ADCELegacyPass() : FunctionPass(ID) { initializeADCELegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -689,9 +713,11 @@ struct ADCELegacyPass : public FunctionPass { AU.addPreserved(); } }; -} + +} // end anonymous namespace char ADCELegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 28157783daa7a..ef784fc788217 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -12,22 +12,39 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include +#include + using namespace llvm; #define DEBUG_TYPE "correlated-value-propagation" @@ -45,9 +62,11 @@ STATISTIC(NumSRems, "Number of srem converted to urem"); static cl::opt DontProcessAdds("cvp-dont-process-adds", cl::init(true)); namespace { + class CorrelatedValuePropagation : public FunctionPass { public: static char ID; + CorrelatedValuePropagation(): FunctionPass(ID) { initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry()); } @@ -59,9 +78,11 @@ namespace { AU.addPreserved(); } }; -} + +} // end anonymous namespace char CorrelatedValuePropagation::ID = 0; + INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation", "Value Propagation", false, false) INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass) @@ -335,18 +356,6 @@ static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { return true; } -// Helper function to rewrite srem and sdiv. As a policy choice, we choose not -// to waste compile time on anything where the operands are local defs. While -// LVI can sometimes reason about such cases, it's not its primary purpose. -static bool hasLocalDefs(BinaryOperator *SDI) { - for (Value *O : SDI->operands()) { - auto *I = dyn_cast(O); - if (I && I->getParent() == SDI->getParent()) - return true; - } - return false; -} - static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) { Constant *Zero = ConstantInt::get(SDI->getType(), 0); for (Value *O : SDI->operands()) { @@ -358,7 +367,7 @@ static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) { } static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || hasLocalDefs(SDI) || + if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI)) return false; @@ -376,7 +385,7 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) { /// conditions, this can sometimes prove conditions instcombine can't by /// exploiting range information. static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || hasLocalDefs(SDI) || + if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI)) return false; @@ -391,7 +400,7 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) { } static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { - if (SDI->getType()->isVectorTy() || hasLocalDefs(SDI)) + if (SDI->getType()->isVectorTy()) return false; Constant *Zero = ConstantInt::get(SDI->getType(), 0); @@ -410,12 +419,12 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { } static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) { - typedef OverflowingBinaryOperator OBO; + using OBO = OverflowingBinaryOperator; if (DontProcessAdds) return false; - if (AddOp->getType()->isVectorTy() || hasLocalDefs(AddOp)) + if (AddOp->getType()->isVectorTy()) return false; bool NSW = AddOp->hasNoSignedWrap(); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 8086a4496e584..877050ec17718 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -16,32 +16,55 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/DeadStoreElimination.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include +#include +#include +#include +#include #include +#include + using namespace llvm; #define DEBUG_TYPE "dse" @@ -62,12 +85,11 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging", cl::init(true), cl::Hidden, cl::desc("Enable partial store merging in DSE")); - //===----------------------------------------------------------------------===// // Helper functions //===----------------------------------------------------------------------===// -typedef std::map OverlapIntervalsTy; -typedef DenseMap InstOverlapIntervalsTy; +using OverlapIntervalsTy = std::map; +using InstOverlapIntervalsTy = DenseMap; /// Delete this instruction. Before we do, go through and zero out all the /// operands of this instruction. If any of them become dead, delete them and @@ -216,7 +238,6 @@ static bool isRemovable(Instruction *I) { case Intrinsic::init_trampoline: // Always safe to remove init_trampoline. return true; - case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: @@ -231,7 +252,6 @@ static bool isRemovable(Instruction *I) { return false; } - /// Returns true if the end of this instruction can be safely shortened in /// length. static bool isShortenableAtTheEnd(Instruction *I) { @@ -294,6 +314,7 @@ static uint64_t getPointerSize(const Value *V, const DataLayout &DL, } namespace { + enum OverwriteResult { OW_Begin, OW_Complete, @@ -301,7 +322,8 @@ enum OverwriteResult { OW_PartialEarlierWithFullLater, OW_Unknown }; -} + +} // end anonymous namespace /// Return 'OW_Complete' if a store to the 'Later' location completely /// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the @@ -868,7 +890,7 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset, if (!IsOverwriteEnd) LaterOffset = int64_t(LaterOffset + LaterSize); - if (!(llvm::isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) && + if (!(isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) && !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0)) return false; @@ -1286,9 +1308,12 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) { } namespace { + /// A legacy pass for the legacy pass manager that wraps \c DSEPass. class DSELegacyPass : public FunctionPass { public: + static char ID; // Pass identification, replacement for typeid + DSELegacyPass() : FunctionPass(ID) { initializeDSELegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -1317,12 +1342,12 @@ class DSELegacyPass : public FunctionPass { AU.addPreserved(); AU.addPreserved(); } - - static char ID; // Pass identification, replacement for typeid }; + } // end anonymous namespace char DSELegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index c5c9b2c185d63..6d1362a6a28e6 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -13,9 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -24,18 +27,36 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include #include +#include +#include + using namespace llvm; using namespace llvm::PatternMatch; @@ -53,6 +74,7 @@ STATISTIC(NumDSE, "Number of trivial dead stores removed"); //===----------------------------------------------------------------------===// namespace { + /// \brief Struct representing the available values in the scoped hash table. struct SimpleValue { Instruction *Inst; @@ -77,20 +99,25 @@ struct SimpleValue { isa(Inst) || isa(Inst); } }; -} + +} // end anonymous namespace namespace llvm { + template <> struct DenseMapInfo { static inline SimpleValue getEmptyKey() { return DenseMapInfo::getEmptyKey(); } + static inline SimpleValue getTombstoneKey() { return DenseMapInfo::getTombstoneKey(); } + static unsigned getHashValue(SimpleValue Val); static bool isEqual(SimpleValue LHS, SimpleValue RHS); }; -} + +} // end namespace llvm unsigned DenseMapInfo::getHashValue(SimpleValue Val) { Instruction *Inst = Val.Inst; @@ -181,6 +208,7 @@ bool DenseMapInfo::isEqual(SimpleValue LHS, SimpleValue RHS) { //===----------------------------------------------------------------------===// namespace { + /// \brief Struct representing the available call values in the scoped hash /// table. struct CallValue { @@ -206,20 +234,25 @@ struct CallValue { return true; } }; -} + +} // end anonymous namespace namespace llvm { + template <> struct DenseMapInfo { static inline CallValue getEmptyKey() { return DenseMapInfo::getEmptyKey(); } + static inline CallValue getTombstoneKey() { return DenseMapInfo::getTombstoneKey(); } + static unsigned getHashValue(CallValue Val); static bool isEqual(CallValue LHS, CallValue RHS); }; -} + +} // end namespace llvm unsigned DenseMapInfo::getHashValue(CallValue Val) { Instruction *Inst = Val.Inst; @@ -241,6 +274,7 @@ bool DenseMapInfo::isEqual(CallValue LHS, CallValue RHS) { //===----------------------------------------------------------------------===// namespace { + /// \brief A simple and fast domtree-based CSE pass. /// /// This pass does a simple depth-first walk over the dominator tree, @@ -257,10 +291,13 @@ class EarlyCSE { const SimplifyQuery SQ; MemorySSA *MSSA; std::unique_ptr MSSAUpdater; - typedef RecyclingAllocator< - BumpPtrAllocator, ScopedHashTableVal> AllocatorTy; - typedef ScopedHashTable, - AllocatorTy> ScopedHTType; + + using AllocatorTy = + RecyclingAllocator>; + using ScopedHTType = + ScopedHashTable, + AllocatorTy>; /// \brief A scoped hash table of the current values of all of our simple /// scalar expressions. @@ -285,44 +322,45 @@ class EarlyCSE { /// present the table; it is the responsibility of the consumer to inspect /// the atomicity/volatility if needed. struct LoadValue { - Instruction *DefInst; - unsigned Generation; - int MatchingId; - bool IsAtomic; - bool IsInvariant; - LoadValue() - : DefInst(nullptr), Generation(0), MatchingId(-1), IsAtomic(false), - IsInvariant(false) {} + Instruction *DefInst = nullptr; + unsigned Generation = 0; + int MatchingId = -1; + bool IsAtomic = false; + bool IsInvariant = false; + + LoadValue() = default; LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId, bool IsAtomic, bool IsInvariant) : DefInst(Inst), Generation(Generation), MatchingId(MatchingId), IsAtomic(IsAtomic), IsInvariant(IsInvariant) {} }; - typedef RecyclingAllocator> - LoadMapAllocator; - typedef ScopedHashTable, - LoadMapAllocator> LoadHTType; + + using LoadMapAllocator = + RecyclingAllocator>; + using LoadHTType = + ScopedHashTable, + LoadMapAllocator>; + LoadHTType AvailableLoads; /// \brief A scoped hash table of the current values of read-only call /// values. /// /// It uses the same generation count as loads. - typedef ScopedHashTable> - CallHTType; + using CallHTType = + ScopedHashTable>; CallHTType AvailableCalls; /// \brief This is the current generation of the memory value. - unsigned CurrentGeneration; + unsigned CurrentGeneration = 0; /// \brief Set up the EarlyCSE runner for a particular function. EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA) : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA), - MSSAUpdater(make_unique(MSSA)), CurrentGeneration(0) { - } + MSSAUpdater(llvm::make_unique(MSSA)) {} bool run(); @@ -336,11 +374,10 @@ class EarlyCSE { CallHTType &AvailableCalls) : Scope(AvailableValues), LoadScope(AvailableLoads), CallScope(AvailableCalls) {} - - private: NodeScope(const NodeScope &) = delete; - void operator=(const NodeScope &) = delete; + NodeScope &operator=(const NodeScope &) = delete; + private: ScopedHTType::ScopeTy Scope; LoadHTType::ScopeTy LoadScope; CallHTType::ScopeTy CallScope; @@ -356,8 +393,10 @@ class EarlyCSE { CallHTType &AvailableCalls, unsigned cg, DomTreeNode *n, DomTreeNode::iterator child, DomTreeNode::iterator end) : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child), - EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls), - Processed(false) {} + EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls) + {} + StackNode(const StackNode &) = delete; + StackNode &operator=(const StackNode &) = delete; // Accessors. unsigned currentGeneration() { return CurrentGeneration; } @@ -365,27 +404,25 @@ class EarlyCSE { void childGeneration(unsigned generation) { ChildGeneration = generation; } DomTreeNode *node() { return Node; } DomTreeNode::iterator childIter() { return ChildIter; } + DomTreeNode *nextChild() { DomTreeNode *child = *ChildIter; ++ChildIter; return child; } + DomTreeNode::iterator end() { return EndIter; } bool isProcessed() { return Processed; } void process() { Processed = true; } private: - StackNode(const StackNode &) = delete; - void operator=(const StackNode &) = delete; - - // Members. unsigned CurrentGeneration; unsigned ChildGeneration; DomTreeNode *Node; DomTreeNode::iterator ChildIter; DomTreeNode::iterator EndIter; NodeScope Scopes; - bool Processed; + bool Processed = false; }; /// \brief Wrapper class to handle memory instructions, including loads, @@ -393,24 +430,28 @@ class EarlyCSE { class ParseMemoryInst { public: ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) - : IsTargetMemInst(false), Inst(Inst) { + : Inst(Inst) { if (IntrinsicInst *II = dyn_cast(Inst)) if (TTI.getTgtMemIntrinsic(II, Info)) IsTargetMemInst = true; } + bool isLoad() const { if (IsTargetMemInst) return Info.ReadMem; return isa(Inst); } + bool isStore() const { if (IsTargetMemInst) return Info.WriteMem; return isa(Inst); } + bool isAtomic() const { if (IsTargetMemInst) return Info.Ordering != AtomicOrdering::NotAtomic; return Inst->isAtomic(); } + bool isUnordered() const { if (IsTargetMemInst) return Info.isUnordered(); @@ -447,6 +488,7 @@ class EarlyCSE { return (getPointerOperand() == Inst.getPointerOperand() && getMatchingId() == Inst.getMatchingId()); } + bool isValid() const { return getPointerOperand() != nullptr; } // For regular (non-intrinsic) loads/stores, this is set to -1. For @@ -457,6 +499,7 @@ class EarlyCSE { if (IsTargetMemInst) return Info.MatchingId; return -1; } + Value *getPointerOperand() const { if (IsTargetMemInst) return Info.PtrVal; if (LoadInst *LI = dyn_cast(Inst)) { @@ -466,17 +509,19 @@ class EarlyCSE { } return nullptr; } + bool mayReadFromMemory() const { if (IsTargetMemInst) return Info.ReadMem; return Inst->mayReadFromMemory(); } + bool mayWriteToMemory() const { if (IsTargetMemInst) return Info.WriteMem; return Inst->mayWriteToMemory(); } private: - bool IsTargetMemInst; + bool IsTargetMemInst = false; MemIntrinsicInfo Info; Instruction *Inst; }; @@ -524,8 +569,8 @@ class EarlyCSE { for (MemoryPhi *MP : PhisToCheck) { MemoryAccess *FirstIn = MP->getIncomingValue(0); - if (all_of(MP->incoming_values(), - [=](Use &In) { return In == FirstIn; })) + if (llvm::all_of(MP->incoming_values(), + [=](Use &In) { return In == FirstIn; })) WorkQueue.push_back(MP); } PhisToCheck.clear(); @@ -533,7 +578,8 @@ class EarlyCSE { } } }; -} + +} // end anonymous namespace /// Determine if the memory referenced by LaterInst is from the same heap /// version as EarlierInst. @@ -1014,6 +1060,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F, } namespace { + /// \brief A simple and fast domtree-based CSE pass. /// /// This pass does a simple depth-first walk over the dominator tree, @@ -1062,7 +1109,8 @@ class EarlyCSELegacyCommonPass : public FunctionPass { AU.setPreservesCFG(); } }; -} + +} // end anonymous namespace using EarlyCSELegacyPass = EarlyCSELegacyCommonPass; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 593aad74bd142..9d2c7b977f2e6 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -35,7 +35,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" @@ -1245,8 +1245,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (V->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); - ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI) - << "load eliminated by PRE"); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI) + << "load eliminated by PRE"; + }); ++NumPRELoad; return true; } @@ -1255,10 +1257,12 @@ static void reportLoadElim(LoadInst *LI, Value *AvailableValue, OptimizationRemarkEmitter *ORE) { using namespace ore; - ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadElim", LI) - << "load of type " << NV("Type", LI->getType()) << " eliminated" - << setExtraArgs() << " in favor of " - << NV("InfavorOfValue", AvailableValue)); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "LoadElim", LI) + << "load of type " << NV("Type", LI->getType()) << " eliminated" + << setExtraArgs() << " in favor of " + << NV("InfavorOfValue", AvailableValue); + }); } /// Attempt to eliminate a load whose dependencies are @@ -1362,6 +1366,11 @@ bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) { } markInstructionForDeletion(IntrinsicI); return false; + } else if (isa(V)) { + // If it's not false, and constant, it must evaluate to true. This means our + // assume is assume(true), and thus, pointless, and we don't want to do + // anything more here. + return false; } Constant *True = ConstantInt::getTrue(V->getContext()); diff --git a/lib/Transforms/Scalar/GVNHoist.cpp b/lib/Transforms/Scalar/GVNHoist.cpp index 77fd432d762f2..c13768d38f6e8 100644 --- a/lib/Transforms/Scalar/GVNHoist.cpp +++ b/lib/Transforms/Scalar/GVNHoist.cpp @@ -35,20 +35,50 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils/Local.h" - -#include +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -67,6 +97,7 @@ static cl::opt MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1), cl::desc("Max number of instructions to hoist " "(default unlimited = -1)")); + static cl::opt MaxNumberOfBBSInPath( "gvn-hoist-max-bbs", cl::Hidden, cl::init(4), cl::desc("Max number of basic blocks on the path between " @@ -84,16 +115,20 @@ static cl::opt namespace llvm { -typedef DenseMap BBSideEffectsSet; -typedef SmallVector SmallVecInsn; -typedef SmallVectorImpl SmallVecImplInsn; +using BBSideEffectsSet = DenseMap; +using SmallVecInsn = SmallVector; +using SmallVecImplInsn = SmallVectorImpl; + // Each element of a hoisting list contains the basic block where to hoist and // a list of instructions to be hoisted. -typedef std::pair HoistingPointInfo; -typedef SmallVector HoistingPointList; +using HoistingPointInfo = std::pair; + +using HoistingPointList = SmallVector; + // A map from a pair of VNs to all the instructions with those VNs. -typedef std::pair VNType; -typedef DenseMap> VNtoInsns; +using VNType = std::pair; + +using VNtoInsns = DenseMap>; // CHI keeps information about values flowing out of a basic block. It is // similar to PHI but in the inverse graph, and used for outgoing values on each @@ -107,19 +142,22 @@ typedef DenseMap> VNtoInsns; // instruction as well as the edge where the value is flowing to. struct CHIArg { VNType VN; + // Edge destination (shows the direction of flow), may not be where the I is. BasicBlock *Dest; + // The instruction (VN) which uses the values flowing out of CHI. Instruction *I; + bool operator==(const CHIArg &A) { return VN == A.VN; } bool operator!=(const CHIArg &A) { return !(*this == A); } }; -typedef SmallVectorImpl::iterator CHIIt; -typedef iterator_range CHIArgs; -typedef DenseMap> OutValuesType; -typedef DenseMap, 2>> - InValuesType; +using CHIIt = SmallVectorImpl::iterator; +using CHIArgs = iterator_range; +using OutValuesType = DenseMap>; +using InValuesType = + DenseMap, 2>>; // An invalid value number Used when inserting a single value number into // VNtoInsns. @@ -199,9 +237,7 @@ class CallInfo { } const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; } - const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; } - const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; } }; @@ -222,8 +258,7 @@ class GVNHoist { GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA, MemoryDependenceResults *MD, MemorySSA *MSSA) : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA), - MSSAUpdater(make_unique(MSSA)), - HoistingGeps(false) {} + MSSAUpdater(llvm::make_unique(MSSA)) {} bool run(Function &F) { NumFuncArgs = F.arg_size(); @@ -243,7 +278,7 @@ class GVNHoist { int ChainLength = 0; // FIXME: use lazy evaluation of VN to avoid the fix-point computation. - while (1) { + while (true) { if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength) return Res; @@ -302,10 +337,9 @@ class GVNHoist { DenseMap DFSNumber; BBSideEffectsSet BBSideEffects; DenseSet HoistBarrier; - SmallVector IDFBlocks; unsigned NumFuncArgs; - const bool HoistingGeps; + const bool HoistingGeps = false; enum InsKind { Unknown, Scalar, Load, Store }; @@ -338,7 +372,7 @@ class GVNHoist { return false; } - /* Return true when I1 appears before I2 in the instructions of BB. */ + // Return true when I1 appears before I2 in the instructions of BB. bool firstInBB(const Instruction *I1, const Instruction *I2) { assert(I1->getParent() == I2->getParent()); unsigned I1DFS = DFSNumber.lookup(I1); @@ -483,7 +517,6 @@ class GVNHoist { // to NewPt. bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) { - // In place hoisting is safe. if (NewPt == OldPt) return true; @@ -551,7 +584,7 @@ class GVNHoist { for (auto CHI : C) { BasicBlock *Dest = CHI.Dest; // Find if all the edges have values flowing out of BB. - bool Found = any_of(TI->successors(), [Dest](const BasicBlock *BB) { + bool Found = llvm::any_of(TI->successors(), [Dest](const BasicBlock *BB) { return BB == Dest; }); if (!Found) return false; @@ -579,7 +612,8 @@ class GVNHoist { } } - typedef DenseMap> RenameStackType; + using RenameStackType = DenseMap>; + // Push all the VNs corresponding to BB into RenameStack. void fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs, RenameStackType &RenameStack) { @@ -822,7 +856,6 @@ class GVNHoist { Instruction *ClonedGep = Gep->clone(); for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i) if (Instruction *Op = dyn_cast(Gep->getOperand(i))) { - // Check whether the operand is already available. if (DT->dominates(Op->getParent(), HoistPt)) continue; @@ -912,7 +945,7 @@ class GVNHoist { for (MemoryPhi *Phi : UsePhis) { auto In = Phi->incoming_values(); - if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) { + if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) { Phi->replaceAllUsesWith(NewMemAcc); MSSAUpdater->removeMemoryAccess(Phi); } @@ -1007,7 +1040,6 @@ class GVNHoist { // The order in which hoistings are done may influence the availability // of operands. if (!allOperandsAvailable(Repl, DestBB)) { - // When HoistingGeps there is nothing more we can do to make the // operands available: just continue. if (HoistingGeps) @@ -1028,7 +1060,6 @@ class GVNHoist { NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess); - if (isa(Repl)) ++NL; else if (isa(Repl)) @@ -1141,7 +1172,8 @@ class GVNHoistLegacyPass : public FunctionPass { AU.addPreserved(); } }; -} // namespace llvm + +} // end namespace llvm PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { DominatorTree &DT = AM.getResult(F); @@ -1161,6 +1193,7 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { } char GVNHoistLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist", "Early GVN Hoisting of Expressions", false, false) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) diff --git a/lib/Transforms/Scalar/GVNSink.cpp b/lib/Transforms/Scalar/GVNSink.cpp index 0128380718456..814a62cd7d65a 100644 --- a/lib/Transforms/Scalar/GVNSink.cpp +++ b/lib/Transforms/Scalar/GVNSink.cpp @@ -1,4 +1,4 @@ -//===- GVNSink.cpp - sink expressions into successors -------------------===// +//===- GVNSink.cpp - sink expressions into successors ---------------------===// // // The LLVM Compiler Infrastructure // @@ -31,33 +31,54 @@ /// replace %a1 with %c1, will it contribute in an equivalent way to all /// successive instructions?". The PostValueTable class in GVN provides this /// mapping. -/// +// //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/MemorySSA.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/Verifier.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ArrayRecycler.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/GVNExpression.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include +#include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "gvn-sink" @@ -72,8 +93,8 @@ LLVM_DUMP_METHOD void Expression::dump() const { dbgs() << "\n"; } -} -} +} // end namespace GVNExpression +} // end namespace llvm namespace { @@ -97,7 +118,7 @@ static bool isMemoryInst(const Instruction *I) { /// list returned by operator*. class LockstepReverseIterator { ArrayRef Blocks; - SmallPtrSet ActiveBlocks; + SmallSetVector ActiveBlocks; SmallVector Insts; bool Fail; @@ -115,7 +136,7 @@ class LockstepReverseIterator { for (BasicBlock *BB : Blocks) { if (BB->size() <= 1) { // Block wasn't big enough - only contained a terminator. - ActiveBlocks.erase(BB); + ActiveBlocks.remove(BB); continue; } Insts.push_back(BB->getTerminator()->getPrevNode()); @@ -126,13 +147,20 @@ class LockstepReverseIterator { bool isValid() const { return !Fail; } ArrayRef operator*() const { return Insts; } - SmallPtrSet &getActiveBlocks() { return ActiveBlocks; } - void restrictToBlocks(SmallPtrSetImpl &Blocks) { + // Note: This needs to return a SmallSetVector as the elements of + // ActiveBlocks will be later copied to Blocks using std::copy. The + // resultant order of elements in Blocks needs to be deterministic. + // Using SmallPtrSet instead causes non-deterministic order while + // copying. And we cannot simply sort Blocks as they need to match the + // corresponding Values. + SmallSetVector &getActiveBlocks() { return ActiveBlocks; } + + void restrictToBlocks(SmallSetVector &Blocks) { for (auto II = Insts.begin(); II != Insts.end();) { if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) == Blocks.end()) { - ActiveBlocks.erase((*II)->getParent()); + ActiveBlocks.remove((*II)->getParent()); II = Insts.erase(II); } else { ++II; @@ -146,7 +174,7 @@ class LockstepReverseIterator { SmallVector NewInsts; for (auto *Inst : Insts) { if (Inst == &Inst->getParent()->front()) - ActiveBlocks.erase(Inst->getParent()); + ActiveBlocks.remove(Inst->getParent()); else NewInsts.push_back(Inst->getPrevNode()); } @@ -180,14 +208,14 @@ struct SinkingInstructionCandidate { NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. - SplitEdgeCost; } + bool operator>(const SinkingInstructionCandidate &Other) const { return Cost > Other.Cost; } }; #ifndef NDEBUG -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, - const SinkingInstructionCandidate &C) { +raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) { OS << ""; return OS; @@ -204,7 +232,8 @@ class ModelledPHI { SmallVector Blocks; public: - ModelledPHI() {} + ModelledPHI() = default; + ModelledPHI(const PHINode *PN) { // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order. SmallVector, 4> Ops; @@ -216,6 +245,7 @@ class ModelledPHI { Values.push_back(P.second); } } + /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI /// without the same ID. /// \note This is specifically for DenseMapInfo - do not use this! @@ -242,7 +272,7 @@ class ModelledPHI { /// Restrict the PHI's contents down to only \c NewBlocks. /// \c NewBlocks must be a subset of \c this->Blocks. - void restrictToBlocks(const SmallPtrSetImpl &NewBlocks) { + void restrictToBlocks(const SmallSetVector &NewBlocks) { auto BI = Blocks.begin(); auto VI = Values.begin(); while (BI != Blocks.end()) { @@ -262,19 +292,23 @@ class ModelledPHI { ArrayRef getValues() const { return Values; } bool areAllIncomingValuesSame() const { - return all_of(Values, [&](Value *V) { return V == Values[0]; }); + return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; }); } + bool areAllIncomingValuesSameType() const { - return all_of( + return llvm::all_of( Values, [&](Value *V) { return V->getType() == Values[0]->getType(); }); } + bool areAnyIncomingValuesConstant() const { - return any_of(Values, [&](Value *V) { return isa(V); }); + return llvm::any_of(Values, [&](Value *V) { return isa(V); }); } + // Hash functor unsigned hash() const { return (unsigned)hash_combine_range(Values.begin(), Values.end()); } + bool operator==(const ModelledPHI &Other) const { return Values == Other.Values && Blocks == Other.Blocks; } @@ -285,17 +319,20 @@ template struct DenseMapInfo { static ModelledPHI Dummy = ModelledPHI::createDummy(0); return Dummy; } + static inline ModelledPHI &getTombstoneKey() { static ModelledPHI Dummy = ModelledPHI::createDummy(1); return Dummy; } + static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); } + static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) { return LHS == RHS; } }; -typedef DenseSet> ModelledPHISet; +using ModelledPHISet = DenseSet>; //===----------------------------------------------------------------------===// // ValueTable @@ -326,10 +363,11 @@ class InstructionUseExpr : public GVNExpression::BasicExpression { op_push_back(U.getUser()); std::sort(op_begin(), op_end()); } + void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; } void setVolatile(bool V) { Volatile = V; } - virtual hash_code getHashValue() const { + hash_code getHashValue() const override { return hash_combine(GVNExpression::BasicExpression::getHashValue(), MemoryUseOrder, Volatile); } @@ -349,7 +387,7 @@ class ValueTable { DenseMap HashNumbering; BumpPtrAllocator Allocator; ArrayRecycler Recycler; - uint32_t nextValueNumber; + uint32_t nextValueNumber = 1; /// Create an expression for I based on its opcode and its uses. If I /// touches or reads memory, the expression is also based upon its memory @@ -379,6 +417,8 @@ class ValueTable { } public: + ValueTable() = default; + /// Returns the value number for the specified value, assigning /// it a new number if it did not have one before. uint32_t lookupOrAdd(Value *V) { @@ -484,8 +524,6 @@ class ValueTable { nextValueNumber = 1; } - ValueTable() : nextValueNumber(1) {} - /// \c Inst uses or touches memory. Return an ID describing the memory state /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2), /// the exact same memory operations happen after I1 and I2. @@ -520,7 +558,8 @@ class ValueTable { class GVNSink { public: - GVNSink() : VN() {} + GVNSink() = default; + bool run(Function &F) { DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n"); @@ -577,8 +616,9 @@ class GVNSink { void foldPointlessPHINodes(BasicBlock *BB) { auto I = BB->begin(); while (PHINode *PN = dyn_cast(I++)) { - if (!all_of(PN->incoming_values(), - [&](const Value *V) { return V == PN->getIncomingValue(0); })) + if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) { + return V == PN->getIncomingValue(0); + })) continue; if (PN->getIncomingValue(0) != PN) PN->replaceAllUsesWith(PN->getIncomingValue(0)); @@ -625,7 +665,7 @@ Optional GVNSink::analyzeInstructionForSinking( SmallVector NewInsts; for (auto *I : Insts) { if (VN.lookup(I) != VNumToSink) - ActivePreds.erase(I->getParent()); + ActivePreds.remove(I->getParent()); else NewInsts.push_back(I); } @@ -795,7 +835,7 @@ void GVNSink::sinkLastInstruction(ArrayRef Blocks, SmallVector NewOperands; for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { - bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { + bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) { return I->getOperand(O) != I0->getOperand(O); }); if (!NeedPHI) { @@ -861,7 +901,8 @@ class GVNSinkLegacyPass : public FunctionPass { AU.addPreserved(); } }; -} // namespace + +} // end anonymous namespace PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { GVNSink G; @@ -874,6 +915,7 @@ PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { } char GVNSinkLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", "Early GVN sinking of Expressions", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 10782963177c6..9ce42a0682568 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -25,27 +25,54 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/IndVarSimplify.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" @@ -53,6 +80,10 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "indvars" @@ -91,6 +122,7 @@ DisableLFTR("disable-lftr", cl::Hidden, cl::init(false), cl::desc("Disable Linear Function Test Replace optimization")); namespace { + struct RewritePhi; class IndVarSimplify { @@ -131,7 +163,8 @@ class IndVarSimplify { bool run(Loop *L); }; -} + +} // end anonymous namespace /// Return true if the SCEV expansion generated by the rewriter can replace the /// original value. SCEV guarantees that it produces the same value, but the way @@ -251,7 +284,6 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) { /// is converted into /// for(int i = 0; i < 10000; ++i) /// bar((double)i); -/// void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0)); unsigned BackEdge = IncomingEdge^1; @@ -305,7 +337,6 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { L->contains(TheBr->getSuccessor(1)))) return; - // If it isn't a comparison with an integer-as-fp (the exit value), we can't // transform it. ConstantFP *ExitValueVal = dyn_cast(Compare->getOperand(1)); @@ -373,7 +404,6 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) { // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) return; - } else { // If we have a negative stride, we require the init to be greater than the // exit value. @@ -452,7 +482,6 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { // First step. Check to see if there are any floating-point recurrences. // If there are, change them into integer recurrences, permitting analysis by // the SCEV routines. - // BasicBlock *Header = L->getHeader(); SmallVector PHIs; @@ -472,18 +501,26 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) { } namespace { + // Collect information about PHI nodes which can be transformed in // rewriteLoopExitValues. struct RewritePhi { PHINode *PN; - unsigned Ith; // Ith incoming value. - Value *Val; // Exit value after expansion. - bool HighCost; // High Cost when expansion. + + // Ith incoming value. + unsigned Ith; + + // Exit value after expansion. + Value *Val; + + // High Cost when expansion. + bool HighCost; RewritePhi(PHINode *P, unsigned I, Value *V, bool H) : PN(P), Ith(I), Val(V), HighCost(H) {} }; -} + +} // end anonymous namespace Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L, Instruction *InsertPt, @@ -747,7 +784,6 @@ void IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) { /// aggressively. bool IndVarSimplify::canLoopBeDeleted( Loop *L, SmallVector &RewritePhiSet) { - BasicBlock *Preheader = L->getLoopPreheader(); // If there is no preheader, the loop will not be deleted. if (!Preheader) @@ -790,7 +826,9 @@ bool IndVarSimplify::canLoopBeDeleted( } for (auto *BB : L->blocks()) - if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); })) + if (llvm::any_of(*BB, [](Instruction &I) { + return I.mayHaveSideEffects(); + })) return false; return true; @@ -801,15 +839,21 @@ bool IndVarSimplify::canLoopBeDeleted( //===----------------------------------------------------------------------===// namespace { + // Collect information about induction variables that are used by sign/zero // extend operations. This information is recorded by CollectExtend and provides // the input to WidenIV. struct WideIVInfo { PHINode *NarrowIV = nullptr; - Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext - bool IsSigned = false; // Was a sext user seen before a zext? + + // Widest integer type created [sz]ext + Type *WidestNativeType = nullptr; + + // Was a sext user seen before a zext? + bool IsSigned = false; }; -} + +} // end anonymous namespace /// Update information about the induction variable that is extended by this /// sign or zero extend operation. This is used to determine the final width of @@ -885,7 +929,6 @@ struct NarrowIVDefUse { /// creating any new induction variables. To do this, it creates a new phi of /// the wider type and redirects all users, either removing extends or inserting /// truncs whenever we stop propagating the type. -/// class WidenIV { // Parameters PHINode *OrigPhi; @@ -902,22 +945,24 @@ class WidenIV { bool HasGuards; // Result - PHINode *WidePhi; - Instruction *WideInc; - const SCEV *WideIncExpr; + PHINode *WidePhi = nullptr; + Instruction *WideInc = nullptr; + const SCEV *WideIncExpr = nullptr; SmallVectorImpl &DeadInsts; SmallPtrSet Widened; SmallVector NarrowIVUsers; enum ExtendKind { ZeroExtended, SignExtended, Unknown }; + // A map tracking the kind of extension used to widen each narrow IV // and narrow IV user. // Key: pointer to a narrow IV or IV user. // Value: the kind of extension used to widen this Instruction. DenseMap, ExtendKind> ExtendKindMap; - typedef std::pair, AssertingVH> DefUserPair; + using DefUserPair = std::pair, AssertingVH>; + // A map with control-dependent ranges for post increment IV uses. The key is // a pair of IV def and a use of this def denoting the context. The value is // a ConstantRange representing possible values of the def at the given @@ -935,6 +980,7 @@ class WidenIV { void calculatePostIncRanges(PHINode *OrigPhi); void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); + void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { DefUserPair Key(Def, UseI); auto It = PostIncRangeInfos.find(Key); @@ -950,8 +996,7 @@ class WidenIV { bool HasGuards) : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), - HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr), - WideIncExpr(nullptr), DeadInsts(DI) { + HasGuards(HasGuards), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended; } @@ -969,7 +1014,7 @@ class WidenIV { ExtendKind getExtendKind(Instruction *I); - typedef std::pair WidenedRecTy; + using WidenedRecTy = std::pair; WidenedRecTy getWideRecurrence(NarrowIVDefUse DU); @@ -984,7 +1029,8 @@ class WidenIV { void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef); }; -} // anonymous namespace + +} // end anonymous namespace /// Perform a quick domtree based check for loop invariance assuming that V is /// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this @@ -1182,7 +1228,6 @@ const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS, /// operands is an AddRec for this loop, return the AddRec and the kind of /// extension used. WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) { - // Handle the common case of add const unsigned OpCode = DU.NarrowUse->getOpcode(); // Only Add/Sub/Mul instructions supported yet. @@ -1310,7 +1355,7 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0); unsigned CastWidth = SE->getTypeSizeInBits(Op->getType()); unsigned IVWidth = SE->getTypeSizeInBits(WideType); - assert (CastWidth <= IVWidth && "Unexpected width while widening compare."); + assert(CastWidth <= IVWidth && "Unexpected width while widening compare."); // Widen the compare instruction. IRBuilder<> Builder( @@ -1461,7 +1506,6 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { } /// Add eligible users of NarrowDef to NarrowIVUsers. -/// void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); bool NonNegativeDef = @@ -1494,7 +1538,6 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { /// /// It would be simpler to delete uses as they are processed, but we must avoid /// invalidating SCEV expressions. -/// PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) { // Is this phi an induction variable? const SCEVAddRecExpr *AddRec = dyn_cast(SE->getSCEV(OrigPhi)); @@ -1696,12 +1739,12 @@ void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { // Live IV Reduction - Minimize IVs live across the loop. //===----------------------------------------------------------------------===// - //===----------------------------------------------------------------------===// // Simplification of IV users based on SCEV evaluation. //===----------------------------------------------------------------------===// namespace { + class IndVarSimplifyVisitor : public IVVisitor { ScalarEvolution *SE; const TargetTransformInfo *TTI; @@ -1721,14 +1764,14 @@ class IndVarSimplifyVisitor : public IVVisitor { // Implement the interface used by simplifyUsersOfIV. void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); } }; -} + +} // end anonymous namespace /// Iteratively perform simplification on a worklist of IV users. Each /// successive simplification may push more users which may themselves be /// candidates for simplification. /// /// Sign/Zero extend elimination is interleaved with IV simplification. -/// void IndVarSimplify::simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI) { @@ -1759,7 +1802,8 @@ void IndVarSimplify::simplifyAndExtend(Loop *L, // Information about sign/zero extensions of CurrIV. IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT); - Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor); + Changed |= + simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor); if (Visitor.WI.WidestNativeType) { WideIVs.push_back(Visitor.WI); @@ -2501,8 +2545,10 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, } namespace { + struct IndVarSimplifyLegacyPass : public LoopPass { static char ID; // Pass identification, replacement for typeid + IndVarSimplifyLegacyPass() : LoopPass(ID) { initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -2529,9 +2575,11 @@ struct IndVarSimplifyLegacyPass : public LoopPass { getLoopAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char IndVarSimplifyLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars", "Induction Variable Simplification", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index ce318f7d1de9c..42c74c3a3ccb5 100644 --- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -1,4 +1,4 @@ -//===-- InductiveRangeCheckElimination.cpp - ------------------------------===// +//===- InductiveRangeCheckElimination.cpp - -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// // The InductiveRangeCheckElimination pass splits a loop's iteration space into // three disjoint ranges. It does that in a way such that the loop running in // the middle loop provably does not need range checks. As an example, it will @@ -39,30 +40,61 @@ // throw_out_of_bounds(); // } // } +// //===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include +#include +#include +#include +#include +#include using namespace llvm; +using namespace llvm::PatternMatch; static cl::opt LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, cl::init(64)); @@ -79,6 +111,9 @@ static cl::opt MaxExitProbReciprocal("irce-max-exit-prob-reciprocal", static cl::opt SkipProfitabilityChecks("irce-skip-profitability-checks", cl::Hidden, cl::init(false)); +static cl::opt AllowUnsignedLatchCondition("irce-allow-unsigned-latch", + cl::Hidden, cl::init(true)); + static const char *ClonedLoopTag = "irce.loop.clone"; #define DEBUG_TYPE "irce" @@ -119,10 +154,11 @@ class InductiveRangeCheck { Value *Length = nullptr; Use *CheckUse = nullptr; RangeCheckKind Kind = RANGE_CHECK_UNKNOWN; + bool IsSigned = true; static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI, ScalarEvolution &SE, Value *&Index, - Value *&Length); + Value *&Length, bool &IsSigned); static void extractRangeChecksFromCond(Loop *L, ScalarEvolution &SE, Use &ConditionUse, @@ -133,6 +169,7 @@ class InductiveRangeCheck { const SCEV *getOffset() const { return Offset; } const SCEV *getScale() const { return Scale; } Value *getLength() const { return Length; } + bool isSigned() const { return IsSigned; } void print(raw_ostream &OS) const { OS << "InductiveRangeCheck:\n"; @@ -173,6 +210,14 @@ class InductiveRangeCheck { Type *getType() const { return Begin->getType(); } const SCEV *getBegin() const { return Begin; } const SCEV *getEnd() const { return End; } + bool isEmpty(ScalarEvolution &SE, bool IsSigned) const { + if (Begin == End) + return true; + if (IsSigned) + return SE.isKnownPredicate(ICmpInst::ICMP_SGE, Begin, End); + else + return SE.isKnownPredicate(ICmpInst::ICMP_UGE, Begin, End); + } }; /// This is the value the condition of the branch needs to evaluate to for the @@ -199,6 +244,7 @@ class InductiveRangeCheck { class InductiveRangeCheckElimination : public LoopPass { public: static char ID; + InductiveRangeCheckElimination() : LoopPass(ID) { initializeInductiveRangeCheckEliminationPass( *PassRegistry::getPassRegistry()); @@ -212,8 +258,9 @@ class InductiveRangeCheckElimination : public LoopPass { bool runOnLoop(Loop *L, LPPassManager &LPM) override; }; +} // end anonymous namespace + char InductiveRangeCheckElimination::ID = 0; -} INITIALIZE_PASS_BEGIN(InductiveRangeCheckElimination, "irce", "Inductive range check elimination", false, false) @@ -247,12 +294,10 @@ StringRef InductiveRangeCheck::rangeCheckKindToStr( /// range checked, and set `Length` to the upper limit `Index` is being range /// checked with if (and only if) the range check type is stronger or equal to /// RANGE_CHECK_UPPER. -/// InductiveRangeCheck::RangeCheckKind InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, ScalarEvolution &SE, Value *&Index, - Value *&Length) { - + Value *&Length, bool &IsSigned) { auto IsNonNegativeAndNotLoopVarying = [&SE, L](Value *V) { const SCEV *S = SE.getSCEV(V); if (isa(S)) @@ -262,8 +307,6 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, SE.isKnownNonNegative(S); }; - using namespace llvm::PatternMatch; - ICmpInst::Predicate Pred = ICI->getPredicate(); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); @@ -276,6 +319,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGE: + IsSigned = true; if (match(RHS, m_ConstantInt<0>())) { Index = LHS; return RANGE_CHECK_LOWER; @@ -286,6 +330,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_SGT: + IsSigned = true; if (match(RHS, m_ConstantInt<-1>())) { Index = LHS; return RANGE_CHECK_LOWER; @@ -302,6 +347,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, std::swap(LHS, RHS); LLVM_FALLTHROUGH; case ICmpInst::ICMP_UGT: + IsSigned = false; if (IsNonNegativeAndNotLoopVarying(LHS)) { Index = RHS; Length = LHS; @@ -317,8 +363,6 @@ void InductiveRangeCheck::extractRangeChecksFromCond( Loop *L, ScalarEvolution &SE, Use &ConditionUse, SmallVectorImpl &Checks, SmallPtrSetImpl &Visited) { - using namespace llvm::PatternMatch; - Value *Condition = ConditionUse.get(); if (!Visited.insert(Condition).second) return; @@ -336,8 +380,8 @@ void InductiveRangeCheck::extractRangeChecksFromCond( const auto &RChkA = SubChecks[0]; const auto &RChkB = SubChecks[1]; if ((RChkA.Length == RChkB.Length || !RChkA.Length || !RChkB.Length) && - RChkA.Offset == RChkB.Offset && RChkA.Scale == RChkB.Scale) { - + RChkA.Offset == RChkB.Offset && RChkA.Scale == RChkB.Scale && + RChkA.IsSigned == RChkB.IsSigned) { // If RChkA.Kind == RChkB.Kind then we just found two identical checks. // But if one of them is a RANGE_CHECK_LOWER and the other is a // RANGE_CHECK_UPPER (only possibility if they're different) then @@ -346,6 +390,7 @@ void InductiveRangeCheck::extractRangeChecksFromCond( (InductiveRangeCheck::RangeCheckKind)(RChkA.Kind | RChkB.Kind); SubChecks[0].Length = RChkA.Length ? RChkA.Length : RChkB.Length; SubChecks[0].CheckUse = &ConditionUse; + SubChecks[0].IsSigned = RChkA.IsSigned; // We updated one of the checks in place, now erase the other. SubChecks.pop_back(); @@ -361,7 +406,8 @@ void InductiveRangeCheck::extractRangeChecksFromCond( return; Value *Length = nullptr, *Index; - auto RCKind = parseRangeCheckICmp(L, ICI, SE, Index, Length); + bool IsSigned; + auto RCKind = parseRangeCheckICmp(L, ICI, SE, Index, Length, IsSigned); if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) return; @@ -378,13 +424,13 @@ void InductiveRangeCheck::extractRangeChecksFromCond( IRC.Scale = IndexAddRec->getStepRecurrence(SE); IRC.CheckUse = &ConditionUse; IRC.Kind = RCKind; + IRC.IsSigned = IsSigned; Checks.push_back(IRC); } void InductiveRangeCheck::extractRangeChecksFromBranch( BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo &BPI, SmallVectorImpl &Checks) { - if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch()) return; @@ -435,16 +481,16 @@ namespace { // kinds of loops we can deal with -- ones that have a single latch that is also // an exiting block *and* have a canonical induction variable. struct LoopStructure { - const char *Tag; + const char *Tag = ""; - BasicBlock *Header; - BasicBlock *Latch; + BasicBlock *Header = nullptr; + BasicBlock *Latch = nullptr; // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th // successor is `LatchExit', the exit block of the loop. - BranchInst *LatchBr; - BasicBlock *LatchExit; - unsigned LatchBrExitIdx; + BranchInst *LatchBr = nullptr; + BasicBlock *LatchExit = nullptr; + unsigned LatchBrExitIdx = std::numeric_limits::max(); // The loop represented by this instance of LoopStructure is semantically // equivalent to: @@ -455,18 +501,14 @@ struct LoopStructure { // for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarBase) // ... body ... - Value *IndVarBase; - Value *IndVarStart; - Value *IndVarStep; - Value *LoopExitAt; - bool IndVarIncreasing; - bool IsSignedPredicate; + Value *IndVarBase = nullptr; + Value *IndVarStart = nullptr; + Value *IndVarStep = nullptr; + Value *LoopExitAt = nullptr; + bool IndVarIncreasing = false; + bool IsSignedPredicate = true; - LoopStructure() - : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr), - LatchExit(nullptr), LatchBrExitIdx(-1), IndVarBase(nullptr), - IndVarStart(nullptr), IndVarStep(nullptr), LoopExitAt(nullptr), - IndVarIncreasing(false), IsSignedPredicate(true) {} + LoopStructure() = default; template LoopStructure map(M Map) const { LoopStructure Result; @@ -499,7 +541,6 @@ struct LoopStructure { /// loops to run any remaining iterations. The pre loop runs any iterations in /// which the induction variable is < Begin, and the post loop runs any /// iterations in which the induction variable is >= End. -/// class LoopConstrainer { // The representation of a clone of the original loop we started out with. struct ClonedLoop { @@ -516,13 +557,12 @@ class LoopConstrainer { // Result of rewriting the range of a loop. See changeIterationSpaceEnd for // more details on what these fields mean. struct RewrittenRangeInfo { - BasicBlock *PseudoExit; - BasicBlock *ExitSelector; + BasicBlock *PseudoExit = nullptr; + BasicBlock *ExitSelector = nullptr; std::vector PHIValuesAtPseudoExit; - PHINode *IndVarEnd; + PHINode *IndVarEnd = nullptr; - RewrittenRangeInfo() - : PseudoExit(nullptr), ExitSelector(nullptr), IndVarEnd(nullptr) {} + RewrittenRangeInfo() = default; }; // Calculated subranges we restrict the iteration space of the main loop to. @@ -546,14 +586,12 @@ class LoopConstrainer { // Compute a safe set of limits for the main loop to run in -- effectively the // intersection of `Range' and the iteration space of the original loop. // Return None if unable to compute the set of subranges. - // Optional calculateSubRanges(bool IsSignedPredicate) const; // Clone `OriginalLoop' and return the result in CLResult. The IR after // running `cloneLoop' is well formed except for the PHI nodes in CLResult -- // the PHI nodes say that there is an incoming edge from `OriginalPreheader` // but there is no such edge. - // void cloneLoop(ClonedLoop &CLResult, const char *Tag) const; // Create the appropriate loop structure needed to describe a cloned copy of @@ -582,7 +620,6 @@ class LoopConstrainer { // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate // preheader because it is made to branch to the loop header only // conditionally. - // RewrittenRangeInfo changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader, Value *ExitLoopAt, @@ -590,7 +627,6 @@ class LoopConstrainer { // The loop denoted by `LS' has `OldPreheader' as its preheader. This // function creates a new preheader for `LS' and returns it. - // BasicBlock *createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader, const char *Tag) const; @@ -618,12 +654,13 @@ class LoopConstrainer { // Information about the original loop we started out with. Loop &OriginalLoop; - const SCEV *LatchTakenCount; - BasicBlock *OriginalPreheader; + + const SCEV *LatchTakenCount = nullptr; + BasicBlock *OriginalPreheader = nullptr; // The preheader of the main loop. This may or may not be different from // `OriginalPreheader'. - BasicBlock *MainLoopPreheader; + BasicBlock *MainLoopPreheader = nullptr; // The range we need to run the main loop in. InductiveRangeCheck::Range Range; @@ -637,15 +674,14 @@ class LoopConstrainer { const LoopStructure &LS, ScalarEvolution &SE, DominatorTree &DT, InductiveRangeCheck::Range R) : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), - SE(SE), DT(DT), LPM(LPM), LI(LI), OriginalLoop(L), - LatchTakenCount(nullptr), OriginalPreheader(nullptr), - MainLoopPreheader(nullptr), Range(R), MainLoopStructure(LS) {} + SE(SE), DT(DT), LPM(LPM), LI(LI), OriginalLoop(L), Range(R), + MainLoopStructure(LS) {} // Entry point for the algorithm. Returns true on success. bool run(); }; -} +} // end anonymous namespace void LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block, BasicBlock *ReplaceBy) { @@ -889,6 +925,12 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, IsSignedPredicate = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT; + + if (!IsSignedPredicate && !AllowUnsignedLatchCondition) { + FailureReason = "unsigned latch conditions are explicitly prohibited"; + return None; + } + // The predicate that we need to check that the induction variable lies // within bounds. ICmpInst::Predicate BoundPred = @@ -964,6 +1006,12 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, IsSignedPredicate = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT; + + if (!IsSignedPredicate && !AllowUnsignedLatchCondition) { + FailureReason = "unsigned latch conditions are explicitly prohibited"; + return None; + } + // The predicate that we need to check that the induction variable lies // within bounds. ICmpInst::Predicate BoundPred = @@ -1077,7 +1125,6 @@ LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const { // that case, `Clamp` will always return `Smallest` and // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`) // will be an empty range. Returning an empty range is always safe. - // Smallest = SE.getAddExpr(End, One); Greatest = SE.getAddExpr(Start, One); @@ -1167,7 +1214,6 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt, BasicBlock *ContinuationBlock) const { - // We start with a loop with a single latch: // // +--------------------+ @@ -1238,7 +1284,6 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( // | original exit <----+ // | | // +--------------------+ - // RewrittenRangeInfo RRI; @@ -1341,7 +1386,6 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( void LoopConstrainer::rewriteIncomingValuesForPHIs( LoopStructure &LS, BasicBlock *ContinuationBlock, const LoopConstrainer::RewrittenRangeInfo &RRI) const { - unsigned PHIIndex = 0; for (Instruction &I : *LS.Header) { auto *PN = dyn_cast(&I); @@ -1359,7 +1403,6 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs( BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader, const char *Tag) const { - BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header); BranchInst::Create(LS.Header, Preheader); @@ -1630,12 +1673,18 @@ InductiveRangeCheck::computeSafeIterationSpace( } static Optional -IntersectRange(ScalarEvolution &SE, - const Optional &R1, - const InductiveRangeCheck::Range &R2) { +IntersectSignedRange(ScalarEvolution &SE, + const Optional &R1, + const InductiveRangeCheck::Range &R2) { + if (R2.isEmpty(SE, /* IsSigned */ true)) + return None; if (!R1.hasValue()) return R2; auto &R1Value = R1.getValue(); + // We never return empty ranges from this function, and R1 is supposed to be + // a result of intersection. Thus, R1 is never empty. + assert(!R1Value.isEmpty(SE, /* IsSigned */ true) && + "We should never have empty R1!"); // TODO: we could widen the smaller range and have this work; but for now we // bail out to keep things simple. @@ -1645,7 +1694,40 @@ IntersectRange(ScalarEvolution &SE, const SCEV *NewBegin = SE.getSMaxExpr(R1Value.getBegin(), R2.getBegin()); const SCEV *NewEnd = SE.getSMinExpr(R1Value.getEnd(), R2.getEnd()); - return InductiveRangeCheck::Range(NewBegin, NewEnd); + // If the resulting range is empty, just return None. + auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd); + if (Ret.isEmpty(SE, /* IsSigned */ true)) + return None; + return Ret; +} + +static Optional +IntersectUnsignedRange(ScalarEvolution &SE, + const Optional &R1, + const InductiveRangeCheck::Range &R2) { + if (R2.isEmpty(SE, /* IsSigned */ false)) + return None; + if (!R1.hasValue()) + return R2; + auto &R1Value = R1.getValue(); + // We never return empty ranges from this function, and R1 is supposed to be + // a result of intersection. Thus, R1 is never empty. + assert(!R1Value.isEmpty(SE, /* IsSigned */ false) && + "We should never have empty R1!"); + + // TODO: we could widen the smaller range and have this work; but for now we + // bail out to keep things simple. + if (R1Value.getType() != R2.getType()) + return None; + + const SCEV *NewBegin = SE.getUMaxExpr(R1Value.getBegin(), R2.getBegin()); + const SCEV *NewEnd = SE.getUMinExpr(R1Value.getEnd(), R2.getEnd()); + + // If the resulting range is empty, just return None. + auto Ret = InductiveRangeCheck::Range(NewBegin, NewEnd); + if (Ret.isEmpty(SE, /* IsSigned */ false)) + return None; + return Ret; } bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -1706,14 +1788,44 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { Instruction *ExprInsertPt = Preheader->getTerminator(); SmallVector RangeChecksToEliminate; + auto RangeIsNonNegative = [&](InductiveRangeCheck::Range &R) { + return SE.isKnownNonNegative(R.getBegin()) && + SE.isKnownNonNegative(R.getEnd()); + }; + // Basing on the type of latch predicate, we interpret the IV iteration range + // as signed or unsigned range. We use different min/max functions (signed or + // unsigned) when intersecting this range with safe iteration ranges implied + // by range checks. + auto IntersectRange = + LS.IsSignedPredicate ? IntersectSignedRange : IntersectUnsignedRange; IRBuilder<> B(ExprInsertPt); for (InductiveRangeCheck &IRC : RangeChecks) { auto Result = IRC.computeSafeIterationSpace(SE, IndVar); if (Result.hasValue()) { + // Intersecting a signed and an unsigned ranges may produce incorrect + // results because we can use neither signed nor unsigned min/max for + // reliably correct intersection if a range contains negative values + // which are either actually negative or big positive. Intersection is + // safe in two following cases: + // 1. Both ranges are signed/unsigned, then we use signed/unsigned min/max + // respectively for their intersection; + // 2. IRC safe iteration space only contains values from [0, SINT_MAX]. + // The interpretation of these values is unambiguous. + // We take the type of IV iteration range as a reference (we will + // intersect it with the resulting range of all IRC's later in + // calculateSubRanges). Only ranges of IRC of the same type are considered + // for removal unless we prove that its range doesn't contain ambiguous + // values. + if (IRC.isSigned() != LS.IsSignedPredicate && + !RangeIsNonNegative(Result.getValue())) + continue; auto MaybeSafeIterRange = IntersectRange(SE, SafeIterRange, Result.getValue()); if (MaybeSafeIterRange.hasValue()) { + assert( + !MaybeSafeIterRange.getValue().isEmpty(SE, LS.IsSignedPredicate) && + "We should never return empty ranges!"); RangeChecksToEliminate.push_back(IRC); SafeIterRange = MaybeSafeIterRange.getValue(); } diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 58b14bc8d0718..ca6e437b770c8 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -148,10 +148,9 @@ class InferAddressSpaces : public FunctionPass { // Changes the flat address expressions in function F to point to specific // address spaces if InferredAddrSpace says so. Postorder is the postorder of // all flat expressions in the use-def graph of function F. - bool - rewriteWithNewAddressSpaces(ArrayRef Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, - Function *F) const; + bool rewriteWithNewAddressSpaces( + const TargetTransformInfo &TTI, ArrayRef Postorder, + const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const; void appendsFlatAddressExpressionToPostorderStack( Value *V, std::vector> &PostorderStack, @@ -602,7 +601,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) { // Changes the address spaces of the flat address expressions who are inferred // to point to a specific address space. - return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); + return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F); } // Constants need to be tracked through RAUW to handle cases with nested @@ -710,23 +709,32 @@ Optional InferAddressSpaces::updateAddressSpace( /// \p returns true if \p U is the pointer operand of a memory instruction with /// a single pointer operand that can have its address space changed by simply -/// mutating the use to a new value. -static bool isSimplePointerUseValidToReplace(Use &U) { +/// mutating the use to a new value. If the memory instruction is volatile, +/// return true only if the target allows the memory instruction to be volatile +/// in the new address space. +static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI, + Use &U, unsigned AddrSpace) { User *Inst = U.getUser(); unsigned OpNo = U.getOperandNo(); + bool VolatileIsAllowed = false; + if (auto *I = dyn_cast(Inst)) + VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace); if (auto *LI = dyn_cast(Inst)) - return OpNo == LoadInst::getPointerOperandIndex() && !LI->isVolatile(); + return OpNo == LoadInst::getPointerOperandIndex() && + (VolatileIsAllowed || !LI->isVolatile()); if (auto *SI = dyn_cast(Inst)) - return OpNo == StoreInst::getPointerOperandIndex() && !SI->isVolatile(); + return OpNo == StoreInst::getPointerOperandIndex() && + (VolatileIsAllowed || !SI->isVolatile()); if (auto *RMW = dyn_cast(Inst)) - return OpNo == AtomicRMWInst::getPointerOperandIndex() && !RMW->isVolatile(); + return OpNo == AtomicRMWInst::getPointerOperandIndex() && + (VolatileIsAllowed || !RMW->isVolatile()); if (auto *CmpX = dyn_cast(Inst)) { return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() && - !CmpX->isVolatile(); + (VolatileIsAllowed || !CmpX->isVolatile()); } return false; @@ -820,7 +828,7 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I, } bool InferAddressSpaces::rewriteWithNewAddressSpaces( - ArrayRef Postorder, + const TargetTransformInfo &TTI, ArrayRef Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { // For each address expression to be modified, creates a clone of it with its // pointer operands converted to the new address space. Since the pointer @@ -880,7 +888,8 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces( // to the next instruction. I = skipToNextUser(I, E); - if (isSimplePointerUseValidToReplace(U)) { + if (isSimplePointerUseValidToReplace( + TTI, U, V->getType()->getPointerAddressSpace())) { // If V is used as the pointer operand of a compatible memory operation, // sets the pointer operand to NewV. This replacement does not change // the element type, so the resultant load/store is still valid. diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 33afc207a959a..ade4fbbcb6f2f 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -648,8 +648,6 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( return true; } - PredValueInfoTy LHSVals, RHSVals; - // Handle some boolean conditions. if (I->getType()->getPrimitiveSizeInBits() == 1) { assert(Preference == WantInteger && "One-bit non-integer type?"); @@ -657,6 +655,8 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors( // X & false -> false if (I->getOpcode() == Instruction::Or || I->getOpcode() == Instruction::And) { + PredValueInfoTy LHSVals, RHSVals; + ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals, WantInteger, CxtI); ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals, diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index f45d362e077d3..55755c394a07a 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -42,7 +42,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -577,10 +577,13 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { + // SafetyInfo is nullptr if we are checking for sinking from preheader to + // loop body. + const bool SinkingToLoopBody = !SafetyInfo; // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { if (!LI->isUnordered()) - return false; // Don't hoist volatile/atomic loads! + return false; // Don't sink/hoist volatile or ordered atomic loads! // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. @@ -589,6 +592,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, if (LI->getMetadata(LLVMContext::MD_invariant_load)) return true; + if (LI->isAtomic() && SinkingToLoopBody) + return false; // Don't sink unordered atomic loads to loop body. + // This checks for an invariant.start dominating the load. if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; @@ -606,10 +612,12 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) - ORE->emit(OptimizationRemarkMissed( - DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) - << "failed to move load with loop-invariant address " - "because the loop may invalidate its value"); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) + << "failed to move load with loop-invariant address " + "because the loop may invalidate its value"; + }); return !Invalidated; } else if (CallInst *CI = dyn_cast(&I)) { @@ -664,9 +672,9 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, !isa(I)) return false; - // SafetyInfo is nullptr if we are checking for sinking from preheader to - // loop body. It will be always safe as there is no speculative execution. - if (!SafetyInfo) + // If we are checking for sinking from preheader to loop body it will be + // always safe as there is no speculative execution. + if (SinkingToLoopBody) return true; // TODO: Plumb the context instruction through to make hoisting and sinking @@ -808,8 +816,10 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); - ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) - << "sinking " << ore::NV("Inst", &I)); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) + << "sinking " << ore::NV("Inst", &I); + }); bool Changed = false; if (isa(I)) ++NumMovedLoads; @@ -881,8 +891,10 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, auto *Preheader = CurLoop->getLoopPreheader(); DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I << "\n"); - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) - << "hoisting " << ore::NV("Inst", &I)); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " + << ore::NV("Inst", &I); + }); // Metadata can be dependent on conditions we are hoisting above. // Conservatively strip all metadata on the instruction unless we were @@ -932,10 +944,12 @@ static bool isSafeToExecuteUnconditionally(Instruction &Inst, if (!GuaranteedToExecute) { auto *LI = dyn_cast(&Inst); if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) - ORE->emit(OptimizationRemarkMissed( - DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) - << "failed to hoist load with loop-invariant address " - "because load is conditionally executed"); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) + << "failed to hoist load with loop-invariant address " + "because load is conditionally executed"; + }); } return GuaranteedToExecute; @@ -1251,9 +1265,11 @@ bool llvm::promoteLoopAccessesToScalars( // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr << '\n'); - ORE->emit( - OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0]) - << "Moving accesses to memory location out of the loop"); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", + LoopUses[0]) + << "Moving accesses to memory location out of the loop"; + }); ++NumPromoted; // Grab a debug location for the inserted loads/stores; given that the diff --git a/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 42dc38c37e0e4..3b5b9c99a3c07 100644 --- a/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -20,7 +20,7 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -327,8 +327,10 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { ++NumPrefetches; DEBUG(dbgs() << " Access: " << *PtrValue << ", SCEV: " << *LSCEV << "\n"); - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Prefetched", MemI) - << "prefetched memory access"); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Prefetched", MemI) + << "prefetched memory access"; + }); MadeChange = true; } diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 12e7b96256cea..82604a8842bf7 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -30,21 +30,6 @@ using namespace llvm; STATISTIC(NumDeleted, "Number of loops deleted"); -/// This function deletes dead loops. The caller of this function needs to -/// guarantee that the loop is infact dead. Here we handle two kinds of dead -/// loop. The first kind (\p isLoopDead) is where only invariant values from -/// within the loop are used outside of it. The second kind (\p -/// isLoopNeverExecuted) is where the loop is provably never executed. We can -/// always remove never executed loops since they will not cause any difference -/// to program behaviour. -/// -/// This also updates the relevant analysis information in \p DT, \p SE, and \p -/// LI. It also updates the loop PM if an updater struct is provided. -// TODO: This function will be used by loop-simplifyCFG as well. So, move this -// to LoopUtils.cpp -static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, - LoopInfo &LI); - enum class LoopDeletionResult { Unmodified, Modified, @@ -183,7 +168,7 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, P->setIncomingValue(i, UndefValue::get(P->getType())); BI++; } - deleteDeadLoop(L, DT, SE, LI); + deleteDeadLoop(L, &DT, &SE, &LI); ++NumDeleted; return LoopDeletionResult::Deleted; } @@ -219,129 +204,12 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, } DEBUG(dbgs() << "Loop is invariant, delete it!"); - deleteDeadLoop(L, DT, SE, LI); + deleteDeadLoop(L, &DT, &SE, &LI); ++NumDeleted; return LoopDeletionResult::Deleted; } -static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, - LoopInfo &LI) { - assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); - auto *Preheader = L->getLoopPreheader(); - assert(Preheader && "Preheader should exist!"); - - // Now that we know the removal is safe, remove the loop by changing the - // branch from the preheader to go to the single exit block. - // - // Because we're deleting a large chunk of code at once, the sequence in which - // we remove things is very important to avoid invalidation issues. - - // Tell ScalarEvolution that the loop is deleted. Do this before - // deleting the loop so that ScalarEvolution can look at the loop - // to determine what it needs to clean up. - SE.forgetLoop(L); - - auto *ExitBlock = L->getUniqueExitBlock(); - assert(ExitBlock && "Should have a unique exit block!"); - assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); - - auto *OldBr = dyn_cast(Preheader->getTerminator()); - assert(OldBr && "Preheader must end with a branch"); - assert(OldBr->isUnconditional() && "Preheader must have a single successor"); - // Connect the preheader to the exit block. Keep the old edge to the header - // around to perform the dominator tree update in two separate steps - // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge - // preheader -> header. - // - // - // 0. Preheader 1. Preheader 2. Preheader - // | | | | - // V | V | - // Header <--\ | Header <--\ | Header <--\ - // | | | | | | | | | | | - // | V | | | V | | | V | - // | Body --/ | | Body --/ | | Body --/ - // V V V V V - // Exit Exit Exit - // - // By doing this is two separate steps we can perform the dominator tree - // update without using the batch update API. - // - // Even when the loop is never executed, we cannot remove the edge from the - // source block to the exit block. Consider the case where the unexecuted loop - // branches back to an outer loop. If we deleted the loop and removed the edge - // coming to this inner loop, this will break the outer loop structure (by - // deleting the backedge of the outer loop). If the outer loop is indeed a - // non-loop, it will be deleted in a future iteration of loop deletion pass. - IRBuilder<> Builder(OldBr); - Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); - // Remove the old branch. The conditional branch becomes a new terminator. - OldBr->eraseFromParent(); - - // Update the dominator tree by informing it about the new edge from the - // preheader to the exit. - DT.insertEdge(Preheader, ExitBlock); - - // Rewrite phis in the exit block to get their inputs from the Preheader - // instead of the exiting block. - BasicBlock::iterator BI = ExitBlock->begin(); - while (PHINode *P = dyn_cast(BI)) { - // Set the zero'th element of Phi to be from the preheader and remove all - // other incoming values. Given the loop has dedicated exits, all other - // incoming values must be from the exiting blocks. - int PredIndex = 0; - P->setIncomingBlock(PredIndex, Preheader); - // Removes all incoming values from all other exiting blocks (including - // duplicate values from an exiting block). - // Nuke all entries except the zero'th entry which is the preheader entry. - // NOTE! We need to remove Incoming Values in the reverse order as done - // below, to keep the indices valid for deletion (removeIncomingValues - // updates getNumIncomingValues and shifts all values down into the operand - // being deleted). - for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i) - P->removeIncomingValue(e-i, false); - - assert((P->getNumIncomingValues() == 1 && - P->getIncomingBlock(PredIndex) == Preheader) && - "Should have exactly one value and that's from the preheader!"); - ++BI; - } - - // Disconnect the loop body by branching directly to its exit. - Builder.SetInsertPoint(Preheader->getTerminator()); - Builder.CreateBr(ExitBlock); - // Remove the old branch. - Preheader->getTerminator()->eraseFromParent(); - - // Inform the dominator tree about the removed edge. - DT.deleteEdge(Preheader, L->getHeader()); - - // Remove the block from the reference counting scheme, so that we can - // delete it freely later. - for (auto *Block : L->blocks()) - Block->dropAllReferences(); - - // Erase the instructions and the blocks without having to worry - // about ordering because we already dropped the references. - // NOTE: This iteration is safe because erasing the block does not remove its - // entry from the loop's block list. We do that in the next section. - for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); - LI != LE; ++LI) - (*LI)->eraseFromParent(); - - // Finally, the blocks from loopinfo. This has to happen late because - // otherwise our loop iterators won't work. - - SmallPtrSet blocks; - blocks.insert(L->block_begin(), L->block_end()); - for (BasicBlock *BB : blocks) - LI.removeBlock(BB); - - // The last step is to update LoopInfo now that we've eliminated this loop. - LI.erase(L); -} - PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &Updater) { diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp index 3624bba103450..5bd8508519390 100644 --- a/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/lib/Transforms/Scalar/LoopDistribute.cpp @@ -23,32 +23,61 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopDistribute.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include +#include #include +#include +#include + +using namespace llvm; #define LDIST_NAME "loop-distribute" #define DEBUG_TYPE LDIST_NAME -using namespace llvm; - static cl::opt LDistVerify("loop-distribute-verify", cl::Hidden, cl::desc("Turn on DominatorTree and LoopInfo verification " @@ -81,14 +110,15 @@ static cl::opt EnableLoopDistribute( STATISTIC(NumLoopsDistributed, "Number of loops distributed"); namespace { + /// \brief Maintains the set of instructions of the loop for a partition before /// cloning. After cloning, it hosts the new loop. class InstPartition { - typedef SmallPtrSet InstructionSet; + using InstructionSet = SmallPtrSet; public: InstPartition(Instruction *I, Loop *L, bool DepCycle = false) - : DepCycle(DepCycle), OrigLoop(L), ClonedLoop(nullptr) { + : DepCycle(DepCycle), OrigLoop(L) { Set.insert(I); } @@ -220,7 +250,7 @@ class InstPartition { /// \brief The cloned loop. If this partition is mapped to the original loop, /// this is null. - Loop *ClonedLoop; + Loop *ClonedLoop = nullptr; /// \brief The blocks of ClonedLoop including the preheader. If this /// partition is mapped to the original loop, this is empty. @@ -235,7 +265,7 @@ class InstPartition { /// \brief Holds the set of Partitions. It populates them, merges them and then /// clones the loops. class InstPartitionContainer { - typedef DenseMap InstToPartitionIdT; + using InstToPartitionIdT = DenseMap; public: InstPartitionContainer(Loop *L, LoopInfo *LI, DominatorTree *DT) @@ -308,8 +338,8 @@ class InstPartitionContainer { /// /// Return if any partitions were merged. bool mergeToAvoidDuplicatedLoads() { - typedef DenseMap LoadToPartitionT; - typedef EquivalenceClasses ToBeMergedT; + using LoadToPartitionT = DenseMap; + using ToBeMergedT = EquivalenceClasses; LoadToPartitionT LoadToPartition; ToBeMergedT ToBeMerged; @@ -511,7 +541,7 @@ class InstPartitionContainer { } private: - typedef std::list PartitionContainerT; + using PartitionContainerT = std::list; /// \brief List of partitions. PartitionContainerT PartitionContainer; @@ -552,17 +582,17 @@ class InstPartitionContainer { /// By traversing the memory instructions in program order and accumulating this /// number, we know whether any unsafe dependence crosses over a program point. class MemoryInstructionDependences { - typedef MemoryDepChecker::Dependence Dependence; + using Dependence = MemoryDepChecker::Dependence; public: struct Entry { Instruction *Inst; - unsigned NumUnsafeDependencesStartOrEnd; + unsigned NumUnsafeDependencesStartOrEnd = 0; - Entry(Instruction *Inst) : Inst(Inst), NumUnsafeDependencesStartOrEnd(0) {} + Entry(Instruction *Inst) : Inst(Inst) {} }; - typedef SmallVector AccessesType; + using AccessesType = SmallVector; AccessesType::const_iterator begin() const { return Accesses.begin(); } AccessesType::const_iterator end() const { return Accesses.end(); } @@ -594,7 +624,7 @@ class LoopDistributeForLoop { public: LoopDistributeForLoop(Loop *L, Function *F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) - : L(L), F(F), LI(LI), LAI(nullptr), DT(DT), SE(SE), ORE(ORE) { + : L(L), F(F), LI(LI), DT(DT), SE(SE), ORE(ORE) { setForced(); } @@ -755,9 +785,11 @@ class LoopDistributeForLoop { ++NumLoopsDistributed; // Report the success. - ORE->emit(OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(), - L->getHeader()) - << "distributed loop"); + ORE->emit([&]() { + return OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(), + L->getHeader()) + << "distributed loop"; + }); return true; } @@ -769,11 +801,13 @@ class LoopDistributeForLoop { DEBUG(dbgs() << "Skipping; " << Message << "\n"); // With Rpass-missed report that distribution failed. - ORE->emit( - OptimizationRemarkMissed(LDIST_NAME, "NotDistributed", L->getStartLoc(), - L->getHeader()) - << "loop not distributed: use -Rpass-analysis=loop-distribute for more " - "info"); + ORE->emit([&]() { + return OptimizationRemarkMissed(LDIST_NAME, "NotDistributed", + L->getStartLoc(), L->getHeader()) + << "loop not distributed: use -Rpass-analysis=loop-distribute for " + "more " + "info"; + }); // With Rpass-analysis report why. This is on by default if distribution // was requested explicitly. @@ -857,7 +891,7 @@ class LoopDistributeForLoop { // Analyses used. LoopInfo *LI; - const LoopAccessInfo *LAI; + const LoopAccessInfo *LAI = nullptr; DominatorTree *DT; ScalarEvolution *SE; OptimizationRemarkEmitter *ORE; @@ -871,6 +905,8 @@ class LoopDistributeForLoop { Optional IsForced; }; +} // end anonymous namespace + /// Shared implementation between new and old PMs. static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, @@ -901,9 +937,13 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT, return Changed; } +namespace { + /// \brief The pass class. class LoopDistributeLegacy : public FunctionPass { public: + static char ID; + LoopDistributeLegacy() : FunctionPass(ID) { // The default is set by the caller. initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry()); @@ -934,10 +974,9 @@ class LoopDistributeLegacy : public FunctionPass { AU.addRequired(); AU.addPreserved(); } - - static char ID; }; -} // anonymous namespace + +} // end anonymous namespace PreservedAnalyses LoopDistributePass::run(Function &F, FunctionAnalysisManager &AM) { @@ -971,6 +1010,7 @@ PreservedAnalyses LoopDistributePass::run(Function &F, } char LoopDistributeLegacy::ID; + static const char ldist_name[] = "Loop Distribution"; INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, @@ -982,6 +1022,4 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false) -namespace llvm { -FunctionPass *createLoopDistributePass() { return new LoopDistributeLegacy(); } -} +FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 9051b7ceb3a7e..413fb75d1725d 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1,4 +1,4 @@ -//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// +//===- LoopIdiomRecognize.cpp - Loop idiom recognition --------------------===// // // The LLVM Compiler Infrastructure // @@ -38,32 +38,64 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "loop-idiom" @@ -80,7 +112,7 @@ static cl::opt UseLIRCodeSizeHeurs( namespace { class LoopIdiomRecognize { - Loop *CurLoop; + Loop *CurLoop = nullptr; AliasAnalysis *AA; DominatorTree *DT; LoopInfo *LI; @@ -96,20 +128,21 @@ class LoopIdiomRecognize { TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, const DataLayout *DL) - : CurLoop(nullptr), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), - DL(DL) {} + : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {} bool runOnLoop(Loop *L); private: - typedef SmallVector StoreList; - typedef MapVector StoreListMap; + using StoreList = SmallVector; + using StoreListMap = MapVector; + StoreListMap StoreRefsForMemset; StoreListMap StoreRefsForMemsetPattern; StoreList StoreRefsForMemcpy; bool HasMemset; bool HasMemsetPattern; bool HasMemcpy; + /// Return code for isLegalStore() enum LegalStoreKind { None = 0, @@ -164,6 +197,7 @@ class LoopIdiomRecognize { class LoopIdiomRecognizeLegacyPass : public LoopPass { public: static char ID; + explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) { initializeLoopIdiomRecognizeLegacyPassPass( *PassRegistry::getPassRegistry()); @@ -190,14 +224,16 @@ class LoopIdiomRecognizeLegacyPass : public LoopPass { /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. - /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); getLoopAnalysisUsage(AU); } }; -} // End anonymous namespace. + +} // end anonymous namespace + +char LoopIdiomRecognizeLegacyPass::ID = 0; PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, @@ -211,7 +247,6 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, return getLoopPassPreservedAnalyses(); } -char LoopIdiomRecognizeLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) @@ -354,7 +389,6 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) { LoopIdiomRecognize::LegalStoreKind LoopIdiomRecognize::isLegalStore(StoreInst *SI) { - // Don't touch volatile stores. if (SI->isVolatile()) return LegalStoreKind::None; @@ -1488,7 +1522,7 @@ static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val, /// PhiX = PHI [InitX, DefX] /// CntInst = CntPhi + 1 /// DefX = PhiX >> 1 -// LOOP_BODY +/// LOOP_BODY /// Br: loop if (DefX != 0) /// Use(CntPhi) or Use(CntInst) /// diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index af095560cc025..40d468a084d49 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -12,22 +12,33 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopInstSimplify.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/Debug.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/User.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include + using namespace llvm; #define DEBUG_TYPE "loop-instsimplify" @@ -45,7 +56,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI, // The bit we are stealing from the pointer represents whether this basic // block is the header of a subloop, in which case we only process its phis. - typedef PointerIntPair WorklistItem; + using WorklistItem = PointerIntPair; SmallVector VisitStack; SmallPtrSet Visited; @@ -151,9 +162,11 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI, } namespace { + class LoopInstSimplifyLegacyPass : public LoopPass { public: static char ID; // Pass ID, replacement for typeid + LoopInstSimplifyLegacyPass() : LoopPass(ID) { initializeLoopInstSimplifyLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -181,7 +194,8 @@ class LoopInstSimplifyLegacyPass : public LoopPass { getLoopAnalysisUsage(AU); } }; -} + +} // end anonymous namespace PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, @@ -195,6 +209,7 @@ PreservedAnalyses LoopInstSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, } char LoopInstSimplifyLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(LoopInstSimplifyLegacyPass, "loop-instsimplify", "Simplify instructions in loops", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 1559e80f06f66..4f8dafef230a0 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1,4 +1,4 @@ -//===- LoopInterchange.cpp - Loop interchange pass------------------------===// +//===- LoopInterchange.cpp - Loop interchange pass-------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,33 +13,38 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include +#include using namespace llvm; @@ -51,10 +56,12 @@ static cl::opt LoopInterchangeCostThreshold( namespace { -typedef SmallVector LoopVector; +using LoopVector = SmallVector; // TODO: Check if we can use a sparse matrix here. -typedef std::vector> CharMatrix; +using CharMatrix = std::vector>; + +} // end anonymous namespace // Maximum number of dependencies that can be handled in the dependency matrix. static const unsigned MaxMemInstrCount = 100; @@ -62,10 +69,8 @@ static const unsigned MaxMemInstrCount = 100; // Maximum loop depth supported. static const unsigned MaxLoopNestDepth = 10; -struct LoopInterchange; - #ifdef DUMP_DEP_MATRICIES -void printDepMatrix(CharMatrix &DepMatrix) { +static void printDepMatrix(CharMatrix &DepMatrix) { for (auto &Row : DepMatrix) { for (auto D : Row) DEBUG(dbgs() << D << " "); @@ -76,7 +81,8 @@ void printDepMatrix(CharMatrix &DepMatrix) { static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI) { - typedef SmallVector ValueVector; + using ValueVector = SmallVector; + ValueVector MemInstr; // For each block. @@ -168,7 +174,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, } // We don't have a DepMatrix to check legality return false. - if (DepMatrix.size() == 0) + if (DepMatrix.empty()) return false; return true; } @@ -213,7 +219,6 @@ static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row, static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, unsigned OuterLoopId, char InnerDep, char OuterDep) { - if (isOuterMostDepPositive(DepMatrix, Row, OuterLoopId)) return false; @@ -252,7 +257,6 @@ static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row, static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, unsigned InnerLoopId, unsigned OuterLoopId) { - unsigned NumRows = DepMatrix.size(); // For each row check if it is valid to interchange. for (unsigned Row = 0; Row < NumRows; ++Row) { @@ -267,7 +271,6 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix, } static void populateWorklist(Loop &L, SmallVector &V) { - DEBUG(dbgs() << "Calling populateWorklist on Func: " << L.getHeader()->getParent()->getName() << " Loop: %" << L.getHeader()->getName() << '\n'); @@ -317,6 +320,8 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) { return nullptr; } +namespace { + /// LoopInterchangeLegality checks if it is legal to interchange the loop. class LoopInterchangeLegality { public: @@ -324,11 +329,12 @@ class LoopInterchangeLegality { LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA, OptimizationRemarkEmitter *ORE) : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), - PreserveLCSSA(PreserveLCSSA), ORE(ORE), InnerLoopHasReduction(false) {} + PreserveLCSSA(PreserveLCSSA), ORE(ORE) {} /// Check if the loops can be interchanged. bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix); + /// Check if the loop structure is understood. We do not handle triangular /// loops for now. bool isLoopStructureUnderstood(PHINode *InnerInductionVar); @@ -345,6 +351,7 @@ class LoopInterchangeLegality { bool findInductionAndReductions(Loop *L, SmallVector &Inductions, SmallVector &Reductions); + Loop *OuterLoop; Loop *InnerLoop; @@ -352,10 +359,11 @@ class LoopInterchangeLegality { LoopInfo *LI; DominatorTree *DT; bool PreserveLCSSA; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - bool InnerLoopHasReduction; + bool InnerLoopHasReduction = false; }; /// LoopInterchangeProfitability checks if it is profitable to interchange the @@ -378,6 +386,7 @@ class LoopInterchangeProfitability { /// Scev analysis. ScalarEvolution *SE; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; }; @@ -412,6 +421,7 @@ class LoopInterchangeTransform { /// Scev analysis. ScalarEvolution *SE; + LoopInfo *LI; DominatorTree *DT; BasicBlock *LoopExit; @@ -421,16 +431,16 @@ class LoopInterchangeTransform { // Main LoopInterchange Pass. struct LoopInterchange : public FunctionPass { static char ID; - ScalarEvolution *SE; - LoopInfo *LI; - DependenceInfo *DI; - DominatorTree *DT; + ScalarEvolution *SE = nullptr; + LoopInfo *LI = nullptr; + DependenceInfo *DI = nullptr; + DominatorTree *DT = nullptr; bool PreserveLCSSA; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - LoopInterchange() - : FunctionPass(ID), SE(nullptr), LI(nullptr), DI(nullptr), DT(nullptr) { + LoopInterchange() : FunctionPass(ID) { initializeLoopInterchangePass(*PassRegistry::getPassRegistry()); } @@ -498,7 +508,6 @@ struct LoopInterchange : public FunctionPass { } bool processLoopList(LoopVector LoopList, Function &F) { - bool Changed = false; unsigned LoopNestDepth = LoopList.size(); if (LoopNestDepth < 2) { @@ -577,7 +586,6 @@ struct LoopInterchange : public FunctionPass { bool processLoop(LoopVector LoopList, unsigned InnerLoopId, unsigned OuterLoopId, BasicBlock *LoopNestExit, std::vector> &DependencyMatrix) { - DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"); Loop *InnerLoop = LoopList[InnerLoopId]; @@ -596,10 +604,12 @@ struct LoopInterchange : public FunctionPass { return false; } - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Interchanged", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Loop interchanged with enclosing loop."); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Interchanged", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Loop interchanged with enclosing loop."; + }); LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit, LIL.hasInnerLoopReduction()); @@ -609,9 +619,10 @@ struct LoopInterchange : public FunctionPass { } }; -} // end of namespace +} // end anonymous namespace + bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) { - return none_of(Ins->users(), [=](User *U) -> bool { + return llvm::none_of(Ins->users(), [=](User *U) -> bool { auto *UserIns = dyn_cast(U); RecurrenceDescriptor RD; return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD); @@ -677,10 +688,8 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { return true; } - bool LoopInterchangeLegality::isLoopStructureUnderstood( PHINode *InnerInduction) { - unsigned Num = InnerInduction->getNumOperands(); BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); for (unsigned i = 0; i < Num; ++i) { @@ -759,7 +768,6 @@ static BasicBlock *getLoopLatchExitBlock(BasicBlock *LatchBlock, // This function indicates the current limitations in the transform as a result // of which we do not proceed. bool LoopInterchangeLegality::currentLimitations() { - BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch(); @@ -772,12 +780,13 @@ bool LoopInterchangeLegality::currentLimitations() { if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes " << "are supported currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "UnsupportedPHIInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with induction or reduction PHI nodes can be" - " interchange currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with induction or reduction PHI nodes can be" + " interchange currently."; + }); return true; } @@ -785,12 +794,13 @@ bool LoopInterchangeLegality::currentLimitations() { if (Inductions.size() != 1) { DEBUG(dbgs() << "We currently only support loops with 1 induction variable." << "Failed to interchange due to current limitation\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "MultiInductionInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with 1 induction variable can be " - "interchanged currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with 1 induction variable can be " + "interchanged currently."; + }); return true; } if (Reductions.size() > 0) @@ -801,12 +811,13 @@ bool LoopInterchangeLegality::currentLimitations() { if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes " << "are supported currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "UnsupportedPHIOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Only outer loops with induction or reduction PHI nodes can be" - " interchanged currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with induction or reduction PHI nodes can be" + " interchanged currently."; + }); return true; } @@ -815,35 +826,38 @@ bool LoopInterchangeLegality::currentLimitations() { if (!Reductions.empty()) { DEBUG(dbgs() << "Outer loops with reductions are not supported " << "currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "ReductionsOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Outer loops with reductions cannot be interchangeed " - "currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "ReductionsOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Outer loops with reductions cannot be interchangeed " + "currently."; + }); return true; } // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) { DEBUG(dbgs() << "Loops with more than 1 induction variables are not " << "supported currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "MultiIndutionOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Only outer loops with 1 induction variable can be " - "interchanged currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with 1 induction variable can be " + "interchanged currently."; + }); return true; } // TODO: Triangular loops are not handled for now. if (!isLoopStructureUnderstood(InnerInductionVar)) { DEBUG(dbgs() << "Loop structure not understood by pass\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "UnsupportedStructureInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Inner loop structure not understood currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Inner loop structure not understood currently."; + }); return true; } @@ -852,24 +866,26 @@ bool LoopInterchangeLegality::currentLimitations() { getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader); if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) { DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "NoLCSSAPHIOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Only outer loops with LCSSA PHIs can be interchange " - "currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuter", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Only outer loops with LCSSA PHIs can be interchange " + "currently."; + }); return true; } LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader); if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) { DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "NoLCSSAPHIOuterInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with LCSSA PHIs can be interchange " - "currently."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Only inner loops with LCSSA PHIs can be interchange " + "currently."; + }); return true; } @@ -894,11 +910,12 @@ bool LoopInterchangeLegality::currentLimitations() { if (!InnerIndexVarInc) { DEBUG(dbgs() << "Did not find an instruction to increment the induction " << "variable.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "NoIncrementInInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "The inner loop does not increment the induction variable."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoIncrementInInner", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "The inner loop does not increment the induction variable."; + }); return true; } @@ -907,7 +924,7 @@ bool LoopInterchangeLegality::currentLimitations() { // instruction. bool FoundInduction = false; - for (const Instruction &I : reverse(*InnerLoopLatch)) { + for (const Instruction &I : llvm::reverse(*InnerLoopLatch)) { if (isa(I) || isa(I) || isa(I) || isa(I)) continue; @@ -917,12 +934,13 @@ bool LoopInterchangeLegality::currentLimitations() { if (!I.isIdenticalTo(InnerIndexVarInc)) { DEBUG(dbgs() << "Found unsupported instructions between induction " << "variable increment and branch.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "UnsupportedInsBetweenInduction", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Found unsupported instruction between induction variable " - "increment and branch."); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "UnsupportedInsBetweenInduction", + InnerLoop->getStartLoc(), InnerLoop->getHeader()) + << "Found unsupported instruction between induction variable " + "increment and branch."; + }); return true; } @@ -933,11 +951,12 @@ bool LoopInterchangeLegality::currentLimitations() { // current limitation. if (!FoundInduction) { DEBUG(dbgs() << "Did not find the induction variable.\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "NoIndutionVariable", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Did not find the induction variable."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoIndutionVariable", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Did not find the induction variable."; + }); return true; } return false; @@ -946,16 +965,16 @@ bool LoopInterchangeLegality::currentLimitations() { bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { - if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) { DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << " due to dependence\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "Dependence", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Cannot interchange loops due to dependences."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops due to dependences."; + }); return false; } @@ -1003,12 +1022,13 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, // Check if the loops are tightly nested. if (!tightlyNested(OuterLoop, InnerLoop)) { DEBUG(dbgs() << "Loops not tightly nested\n"); - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "NotTightlyNested", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Cannot interchange loops because they are not tightly " - "nested."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Cannot interchange loops because they are not tightly " + "nested."; + }); return false; } @@ -1086,7 +1106,6 @@ static bool isProfitableForVectorization(unsigned InnerLoopId, bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { - // TODO: Add better profitability checks. // e.g // 1) Construct dependency matrix and move the one with no loop carried dep @@ -1105,14 +1124,15 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId, if (isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix)) return true; - ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, - "InterchangeNotProfitable", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Interchanging loops is too costly (cost=" - << ore::NV("Cost", Cost) << ", threshold=" - << ore::NV("Threshold", LoopInterchangeCostThreshold) << - ") and it does not improve parallelism."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable", + InnerLoop->getStartLoc(), + InnerLoop->getHeader()) + << "Interchanging loops is too costly (cost=" + << ore::NV("Cost", Cost) << ", threshold=" + << ore::NV("Threshold", LoopInterchangeCostThreshold) + << ") and it does not improve parallelism."; + }); return false; } @@ -1151,7 +1171,7 @@ bool LoopInterchangeTransform::transform() { bool Transformed = false; Instruction *InnerIndexVar; - if (InnerLoop->getSubLoops().size() == 0) { + if (InnerLoop->getSubLoops().empty()) { BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); DEBUG(dbgs() << "Calling Split Inner Loop\n"); PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); @@ -1165,7 +1185,11 @@ bool LoopInterchangeTransform::transform() { else InnerIndexVar = dyn_cast(InductionPHI->getIncomingValue(0)); - // + // Ensure that InductionPHI is the first Phi node as required by + // splitInnerLoopHeader + if (&InductionPHI->getParent()->front() != InductionPHI) + InductionPHI->moveBefore(&InductionPHI->getParent()->front()); + // Split at the place were the induction variable is // incremented/decremented. // TODO: This splitting logic may not work always. Fix this. @@ -1194,13 +1218,12 @@ void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { } void LoopInterchangeTransform::splitInnerLoopHeader() { - // Split the inner loop header out. Here make sure that the reduction PHI's // stay in the innerloop body. BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); if (InnerLoopHasReduction) { - // FIXME: Check if the induction PHI will always be the first PHI. + // Note: The induction PHI must be the first PHI for this to work BasicBlock *New = InnerLoopHeader->splitBasicBlock( ++(InnerLoopHeader->begin()), InnerLoopHeader->getName() + ".split"); if (LI) @@ -1250,7 +1273,6 @@ void LoopInterchangeTransform::updateIncomingBlock(BasicBlock *CurrBlock, } bool LoopInterchangeTransform::adjustLoopBranches() { - DEBUG(dbgs() << "adjustLoopBranches called\n"); // Adjust the loop preheader BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); @@ -1358,8 +1380,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() { return true; } -void LoopInterchangeTransform::adjustLoopPreheaders() { +void LoopInterchangeTransform::adjustLoopPreheaders() { // We have interchanged the preheaders so we need to interchange the data in // the preheader as well. // This is because the content of inner preheader was previously executed @@ -1379,7 +1401,6 @@ void LoopInterchangeTransform::adjustLoopPreheaders() { } bool LoopInterchangeTransform::adjustLoopLinks() { - // Adjust all branches in the inner and outer loop. bool Changed = adjustLoopBranches(); if (Changed) @@ -1388,6 +1409,7 @@ bool LoopInterchangeTransform::adjustLoopLinks() { } char LoopInterchange::ID = 0; + INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange", "Interchanges loops for cache reuse", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp index 20b37c4b70e6d..7c89a2ef5e8ad 100644 --- a/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -28,22 +28,29 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/LoopVersioning.h" #include @@ -53,11 +60,11 @@ #include #include +using namespace llvm; + #define LLE_OPTION "loop-load-elim" #define DEBUG_TYPE LLE_OPTION -using namespace llvm; - static cl::opt CheckPerElim( "runtime-check-per-loop-load-elim", cl::Hidden, cl::desc("Max number of memchecks allowed per eliminated load on average"), @@ -127,10 +134,12 @@ struct StoreToLoadForwardingCandidate { #endif }; +} // end anonymous namespace + /// \brief Check if the store dominates all latches, so as long as there is no /// intervening store this value will be loaded in the next iteration. -bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, - DominatorTree *DT) { +static bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, + DominatorTree *DT) { SmallVector Latches; L->getLoopLatches(Latches); return llvm::all_of(Latches, [&](const BasicBlock *Latch) { @@ -143,6 +152,8 @@ static bool isLoadConditional(LoadInst *Load, Loop *L) { return Load->getParent() != L->getHeader(); } +namespace { + /// \brief The per-loop class that does most of the work. class LoadEliminationForLoop { public: @@ -241,8 +252,8 @@ class LoadEliminationForLoop { std::forward_list &Candidates) { // If Store is nullptr it means that we have multiple stores forwarding to // this store. - typedef DenseMap - LoadToSingleCandT; + using LoadToSingleCandT = + DenseMap; LoadToSingleCandT LoadToSingleCand; for (const auto &Cand : Candidates) { @@ -393,7 +404,6 @@ class LoadEliminationForLoop { void propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand, SCEVExpander &SEE) { - // // loop: // %x = load %gep_i // = ... %x @@ -431,6 +441,7 @@ class LoadEliminationForLoop { bool processLoop() { DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName() << "\" checking " << *L << "\n"); + // Look for store-to-load forwarding cases across the // backedge. E.g.: // @@ -558,6 +569,8 @@ class LoadEliminationForLoop { PredicatedScalarEvolution PSE; }; +} // end anonymous namespace + static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, function_ref GetLAI) { @@ -584,10 +597,14 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, return Changed; } +namespace { + /// \brief The pass. Most of the work is delegated to the per-loop /// LoadEliminationForLoop class. class LoopLoadElimination : public FunctionPass { public: + static char ID; + LoopLoadElimination() : FunctionPass(ID) { initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry()); } @@ -616,13 +633,12 @@ class LoopLoadElimination : public FunctionPass { AU.addPreserved(); AU.addPreserved(); } - - static char ID; }; } // end anonymous namespace char LoopLoadElimination::ID; + static const char LLE_name[] = "Loop Load Elimination"; INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) @@ -633,9 +649,7 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) -namespace llvm { - -FunctionPass *createLoopLoadEliminationPass() { +FunctionPass *llvm::createLoopLoadEliminationPass() { return new LoopLoadElimination(); } @@ -662,5 +676,3 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F, PreservedAnalyses PA; return PA; } - -} // end namespace llvm diff --git a/lib/Transforms/Scalar/LoopPredication.cpp b/lib/Transforms/Scalar/LoopPredication.cpp index 84577dd182a42..393c6049af1fe 100644 --- a/lib/Transforms/Scalar/LoopPredication.cpp +++ b/lib/Transforms/Scalar/LoopPredication.cpp @@ -100,26 +100,25 @@ // implies M. // // For now the transformation is limited to the following case: -// * The loop has a single latch with either ult or slt icmp condition. +// * The loop has a single latch with the condition of the form: +// ++i latchLimit, where is u<, u<=, s<, or s<=. // * The step of the IV used in the latch condition is 1. // * The IV of the latch condition is the same as the post increment IV of the // guard condition. -// * The guard condition is ult. +// * The guard condition is +// i u< guardLimit. // -// In this case the latch is of the from: -// ++i u< latchLimit or ++i s< latchLimit -// and the guard is of the form: -// i u< guardLimit -// -// For the unsigned latch comparison case M is: +// For the ult latch comparison case M is: // forall X . X u< guardLimit && (X + 1) u< latchLimit => // (X + 1) u< guardLimit // // This is true if latchLimit u<= guardLimit since then // (X + 1) u< latchLimit u<= guardLimit == (X + 1) u< guardLimit. // -// So the widened condition is: +// So for ult condition the widened condition is: // i.start u< guardLimit && latchLimit u<= guardLimit +// Similarly for ule condition the widened condition is: +// i.start u< guardLimit && latchLimit u< guardLimit // // For the signed latch comparison case M is: // forall X . X u< guardLimit && (X + 1) s< latchLimit => @@ -147,6 +146,8 @@ // // So the widened condition is: // i.start u< guardLimit && latchLimit s<= guardLimit +// Similarly for sle condition the widened condition is: +// i.start u< guardLimit && latchLimit s< guardLimit // //===----------------------------------------------------------------------===// @@ -288,6 +289,10 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander, Type *Ty = LHS->getType(); assert(Ty == RHS->getType() && "expandCheck operands have different types?"); + + if (SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS)) + return Builder.getTrue(); + Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt); Value *RHSV = Expander.expandCodeFor(RHS, Ty, InsertAt); return Builder.CreateICmp(Pred, LHSV, RHSV); @@ -303,7 +308,7 @@ Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, DEBUG(ICI->dump()); // parseLoopStructure guarantees that the latch condition is: - // ++i u< latchLimit or ++i s< latchLimit + // ++i latchLimit, where is u<, u<=, s<, or s<=. // We are looking for the range checks of the form: // i u< guardLimit auto RangeCheck = parseLoopICmp(ICI); @@ -327,15 +332,27 @@ Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, assert(RangeCheckIV->getStepRecurrence(*SE)->isOne() && "must be one"); const SCEV *Start = RangeCheckIV->getStart(); - // Generate the widened condition. See the file header comment for reasoning. - // If the latch condition is unsigned: - // i.start u< guardLimit && latchLimit u<= guardLimit - // If the latch condition is signed: - // i.start u< guardLimit && latchLimit s<= guardLimit - - auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred) - ? ICmpInst::ICMP_SLE - : ICmpInst::ICMP_ULE; + // Generate the widened condition: + // i.start u< guardLimit && latchLimit guardLimit + // where depends on the latch condition predicate. See the file + // header comment for the reasoning. + ICmpInst::Predicate LimitCheckPred; + switch (LatchCheck.Pred) { + case ICmpInst::ICMP_ULT: + LimitCheckPred = ICmpInst::ICMP_ULE; + break; + case ICmpInst::ICMP_ULE: + LimitCheckPred = ICmpInst::ICMP_ULT; + break; + case ICmpInst::ICMP_SLT: + LimitCheckPred = ICmpInst::ICMP_SLE; + break; + case ICmpInst::ICMP_SLE: + LimitCheckPred = ICmpInst::ICMP_SLT; + break; + default: + llvm_unreachable("Unsupported loop latch!"); + } auto CanExpand = [this](const SCEV *S) { return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE); @@ -345,10 +362,10 @@ Optional LoopPredication::widenICmpRangeCheck(ICmpInst *ICI, return None; Instruction *InsertAt = Preheader->getTerminator(); - auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck->Pred, - Start, RangeCheck->Limit, InsertAt); auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred, LatchCheck.Limit, RangeCheck->Limit, InsertAt); + auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck->Pred, + Start, RangeCheck->Limit, InsertAt); return Builder.CreateAnd(FirstIterationCheck, LimitCheck); } @@ -443,7 +460,9 @@ Optional LoopPredication::parseLoopLatchICmp() { } if (Result->Pred != ICmpInst::ICMP_ULT && - Result->Pred != ICmpInst::ICMP_SLT) { + Result->Pred != ICmpInst::ICMP_SLT && + Result->Pred != ICmpInst::ICMP_ULE && + Result->Pred != ICmpInst::ICMP_SLE) { DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred << ")!\n"); return None; diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index fc0216e76a5bb..d1a54b8779500 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -1,4 +1,4 @@ -//===-- LoopReroll.cpp - Loop rerolling pass ------------------------------===// +//===- LoopReroll.cpp - Loop rerolling pass -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -11,22 +11,42 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -34,6 +54,13 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -127,6 +154,7 @@ NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), // br %cmp, header, exit namespace { + enum IterationLimits { /// The maximum number of iterations that we'll try and reroll. IL_MaxRerollIterations = 32, @@ -139,6 +167,7 @@ namespace { class LoopReroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid + LoopReroll() : LoopPass(ID) { initializeLoopRerollPass(*PassRegistry::getPassRegistry()); } @@ -158,11 +187,12 @@ namespace { DominatorTree *DT; bool PreserveLCSSA; - typedef SmallVector SmallInstructionVector; - typedef SmallSet SmallInstructionSet; + using SmallInstructionVector = SmallVector; + using SmallInstructionSet = SmallSet; // Map between induction variable and its increment DenseMap IVToIncMap; + // For loop with multiple induction variable, remember the one used only to // control the loop. Instruction *LoopControlIV; @@ -171,8 +201,7 @@ namespace { // representing a reduction. Only the last value may be used outside the // loop. struct SimpleLoopReduction { - SimpleLoopReduction(Instruction *P, Loop *L) - : Valid(false), Instructions(1, P) { + SimpleLoopReduction(Instruction *P, Loop *L) : Instructions(1, P) { assert(isa(P) && "First reduction instruction must be a PHI"); add(L); } @@ -204,8 +233,8 @@ namespace { return Instructions.size()-1; } - typedef SmallInstructionVector::iterator iterator; - typedef SmallInstructionVector::const_iterator const_iterator; + using iterator = SmallInstructionVector::iterator; + using const_iterator = SmallInstructionVector::const_iterator; iterator begin() { assert(Valid && "Using invalid reduction"); @@ -221,7 +250,7 @@ namespace { const_iterator end() const { return Instructions.end(); } protected: - bool Valid; + bool Valid = false; SmallInstructionVector Instructions; void add(Loop *L); @@ -230,7 +259,7 @@ namespace { // The set of all reductions, and state tracking of possible reductions // during loop instruction processing. struct ReductionTracker { - typedef SmallVector SmallReductionVector; + using SmallReductionVector = SmallVector; // Add a new possible reduction. void addSLR(SimpleLoopReduction &SLR) { PossibleReds.push_back(SLR); } @@ -342,6 +371,7 @@ namespace { struct DAGRootSet { Instruction *BaseInst; SmallInstructionVector Roots; + // The instructions between IV and BaseInst (but not including BaseInst). SmallInstructionSet SubsumedInsts; }; @@ -361,15 +391,17 @@ namespace { /// Stage 1: Find all the DAG roots for the induction variable. bool findRoots(); + /// Stage 2: Validate if the found roots are valid. bool validate(ReductionTracker &Reductions); + /// Stage 3: Assuming validate() returned true, perform the /// replacement. /// @param IterCount The maximum iteration count of L. void replace(const SCEV *IterCount); protected: - typedef MapVector UsesTy; + using UsesTy = MapVector; void findRootsRecursive(Instruction *IVU, SmallInstructionSet SubsumedInsts); @@ -412,22 +444,29 @@ namespace { // The loop induction variable. Instruction *IV; + // Loop step amount. int64_t Inc; + // Loop reroll count; if Inc == 1, this records the scaling applied // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ; // If Inc is not 1, Scale = Inc. uint64_t Scale; + // The roots themselves. SmallVector RootSets; + // All increment instructions for IV. SmallInstructionVector LoopIncs; + // Map of all instructions in the loop (in order) to the iterations // they are used in (or specially, IL_All for instructions // used in the loop increment mechanism). UsesTy Uses; + // Map between induction variable and its increment DenseMap &IVToIncMap; + Instruction *LoopControlIV; }; @@ -446,9 +485,11 @@ namespace { bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions); }; -} + +} // end anonymous namespace char LoopReroll::ID = 0; + INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -1069,7 +1110,6 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po } return true; - } /// Get the next instruction in "In" that is a member of set Val. @@ -1124,7 +1164,7 @@ static bool isIgnorableInst(const Instruction *I) { switch (II->getIntrinsicID()) { default: return false; - case llvm::Intrinsic::annotation: + case Intrinsic::annotation: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: // TODO: the following intrinsics may also be whitelisted: @@ -1407,8 +1447,8 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { BaseIt = nextInstr(0, Uses, Visited); RootIt = nextInstr(Iter, Uses, Visited); } - assert (BaseIt == Uses.end() && RootIt == Uses.end() && - "Mismatched set sizes!"); + assert(BaseIt == Uses.end() && RootIt == Uses.end() && + "Mismatched set sizes!"); } DEBUG(dbgs() << "LRR: Matched all iteration increments for " << diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 6462e3fb85629..bbb179d3790c4 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -65,7 +65,9 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -80,13 +82,18 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" @@ -98,7 +105,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include @@ -107,8 +113,8 @@ #include #include #include +#include #include -#include #include using namespace llvm; @@ -160,15 +166,14 @@ namespace { struct MemAccessTy { /// Used in situations where the accessed memory type is unknown. - static const unsigned UnknownAddressSpace = ~0u; + static const unsigned UnknownAddressSpace = + std::numeric_limits::max(); - Type *MemTy; - unsigned AddrSpace; + Type *MemTy = nullptr; + unsigned AddrSpace = UnknownAddressSpace; - MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {} - - MemAccessTy(Type *Ty, unsigned AS) : - MemTy(Ty), AddrSpace(AS) {} + MemAccessTy() = default; + MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {} bool operator==(MemAccessTy Other) const { return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace; @@ -209,7 +214,7 @@ namespace { /// Map register candidates to information about how they are used. class RegUseTracker { - typedef DenseMap RegUsesTy; + using RegUsesTy = DenseMap; RegUsesTy RegUsesMap; SmallVector RegSequence; @@ -225,8 +230,9 @@ class RegUseTracker { void clear(); - typedef SmallVectorImpl::iterator iterator; - typedef SmallVectorImpl::const_iterator const_iterator; + using iterator = SmallVectorImpl::iterator; + using const_iterator = SmallVectorImpl::const_iterator; + iterator begin() { return RegSequence.begin(); } iterator end() { return RegSequence.end(); } const_iterator begin() const { return RegSequence.begin(); } @@ -299,16 +305,16 @@ namespace { /// satisfying a use. It may include broken-out immediates and scaled registers. struct Formula { /// Global base address used for complex addressing. - GlobalValue *BaseGV; + GlobalValue *BaseGV = nullptr; /// Base offset for complex addressing. - int64_t BaseOffset; + int64_t BaseOffset = 0; /// Whether any complex addressing has a base register. - bool HasBaseReg; + bool HasBaseReg = false; /// The scale of any complex addressing. - int64_t Scale; + int64_t Scale = 0; /// The list of "base" registers for this use. When this is non-empty. The /// canonical representation of a formula is @@ -328,16 +334,14 @@ struct Formula { /// The 'scaled' register for this use. This should be non-null when Scale is /// not zero. - const SCEV *ScaledReg; + const SCEV *ScaledReg = nullptr; /// An additional constant offset which added near the use. This requires a /// temporary register, but the offset itself can live in an add immediate /// field rather than a register. - int64_t UnfoldedOffset; + int64_t UnfoldedOffset = 0; - Formula() - : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0), - ScaledReg(nullptr), UnfoldedOffset(0) {} + Formula() = default; void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); @@ -955,6 +959,7 @@ class LSRUse; /// accurate cost model. static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F); + // Get the cost of the scaling factor used in F for LU. static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F, @@ -1025,11 +1030,11 @@ class Cost { /// equivalent, possibly strength-reduced, replacement. struct LSRFixup { /// The instruction which will be updated. - Instruction *UserInst; + Instruction *UserInst = nullptr; /// The operand of the instruction which will be replaced. The operand may be /// used more than once; every instance will be replaced. - Value *OperandValToReplace; + Value *OperandValToReplace = nullptr; /// If this user is to use the post-incremented value of an induction /// variable, this variable is non-null and holds the loop associated with the @@ -1039,11 +1044,11 @@ struct LSRFixup { /// A constant offset to be added to the LSRUse expression. This allows /// multiple fixups to share the same LSRUse with different offsets, for /// example in an unrolled loop. - int64_t Offset; + int64_t Offset = 0; - bool isUseFullyOutsideLoop(const Loop *L) const; + LSRFixup() = default; - LSRFixup(); + bool isUseFullyOutsideLoop(const Loop *L) const; void print(raw_ostream &OS) const; void dump() const; @@ -1093,7 +1098,7 @@ class LSRUse { // TODO: Add a generic icmp too? }; - typedef PointerIntPair SCEVUseKindPair; + using SCEVUseKindPair = PointerIntPair; KindType Kind; MemAccessTy AccessTy; @@ -1102,25 +1107,25 @@ class LSRUse { SmallVector Fixups; /// Keep track of the min and max offsets of the fixups. - int64_t MinOffset; - int64_t MaxOffset; + int64_t MinOffset = std::numeric_limits::max(); + int64_t MaxOffset = std::numeric_limits::min(); /// This records whether all of the fixups using this LSRUse are outside of /// the loop, in which case some special-case heuristics may be used. - bool AllFixupsOutsideLoop; + bool AllFixupsOutsideLoop = true; /// RigidFormula is set to true to guarantee that this use will be associated /// with a single formula--the one that initially matched. Some SCEV /// expressions cannot be expanded. This allows LSR to consider the registers /// used by those expressions without the need to expand them later after /// changing the formula. - bool RigidFormula; + bool RigidFormula = false; /// This records the widest use type for any fixup using this /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max /// fixup widths to be equivalent, because the narrower one may be relying on /// the implicit truncation to truncate away bogus bits. - Type *WidestFixupType; + Type *WidestFixupType = nullptr; /// A list of ways to build a value that can satisfy this user. After the /// list is populated, one of these is selected heuristically and used to @@ -1130,10 +1135,7 @@ class LSRUse { /// The set of register candidates used by all formulae in this LSRUse. SmallPtrSet Regs; - LSRUse(KindType K, MemAccessTy AT) - : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN), - AllFixupsOutsideLoop(true), RigidFormula(false), - WidestFixupType(nullptr) {} + LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {} LSRFixup &getNewFixup() { Fixups.push_back(LSRFixup()); @@ -1339,14 +1341,14 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, /// Set this cost to a losing value. void Cost::Lose() { - C.Insns = ~0u; - C.NumRegs = ~0u; - C.AddRecCost = ~0u; - C.NumIVMuls = ~0u; - C.NumBaseAdds = ~0u; - C.ImmCost = ~0u; - C.SetupCost = ~0u; - C.ScaleCost = ~0u; + C.Insns = std::numeric_limits::max(); + C.NumRegs = std::numeric_limits::max(); + C.AddRecCost = std::numeric_limits::max(); + C.NumIVMuls = std::numeric_limits::max(); + C.NumBaseAdds = std::numeric_limits::max(); + C.ImmCost = std::numeric_limits::max(); + C.SetupCost = std::numeric_limits::max(); + C.ScaleCost = std::numeric_limits::max(); } /// Choose the lower cost. @@ -1383,10 +1385,6 @@ LLVM_DUMP_METHOD void Cost::dump() const { } #endif -LSRFixup::LSRFixup() - : UserInst(nullptr), OperandValToReplace(nullptr), - Offset(0) {} - /// Test whether this fixup always uses its value outside of the given loop. bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { // PHI nodes use their value in their incoming blocks. @@ -1579,7 +1577,8 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset // Offs is the ICmp immediate. if (Scale == 0) - // The cast does the right thing with INT64_MIN. + // The cast does the right thing with + // std::numeric_limits::min(). BaseOffset = -(uint64_t)BaseOffset; return TTI.isLegalICmpImmediate(BaseOffset); } @@ -1777,22 +1776,21 @@ struct IVInc { Value* IVOperand; const SCEV *IncExpr; - IVInc(Instruction *U, Value *O, const SCEV *E): - UserInst(U), IVOperand(O), IncExpr(E) {} + IVInc(Instruction *U, Value *O, const SCEV *E) + : UserInst(U), IVOperand(O), IncExpr(E) {} }; // The list of IV increments in program order. We typically add the head of a // chain without finding subsequent links. struct IVChain { - SmallVector Incs; - const SCEV *ExprBase; - - IVChain() : ExprBase(nullptr) {} + SmallVector Incs; + const SCEV *ExprBase = nullptr; + IVChain() = default; IVChain(const IVInc &Head, const SCEV *Base) - : Incs(1, Head), ExprBase(Base) {} + : Incs(1, Head), ExprBase(Base) {} - typedef SmallVectorImpl::const_iterator const_iterator; + using const_iterator = SmallVectorImpl::const_iterator; // Return the first increment in the chain. const_iterator begin() const { @@ -1834,13 +1832,13 @@ class LSRInstance { LoopInfo &LI; const TargetTransformInfo &TTI; Loop *const L; - bool Changed; + bool Changed = false; /// This is the insert position that the current loop's induction variable /// increment should be placed. In simple loops, this is the latch block's /// terminator. But in more complicated cases, this is a position which will /// dominate all the in-loop post-increment users. - Instruction *IVIncInsertPos; + Instruction *IVIncInsertPos = nullptr; /// Interesting factors between use strides. /// @@ -1886,7 +1884,7 @@ class LSRInstance { void CollectFixupsAndInitialFormulae(); // Support for sharing of LSRUses between LSRFixups. - typedef DenseMap UseMapTy; + using UseMapTy = DenseMap; UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, @@ -2127,7 +2125,7 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { /// unfortunately this can come up even for loops where the user didn't use /// a C do-while loop. For example, seemingly well-behaved top-test loops /// will commonly be lowered like this: -// +/// /// if (n > 0) { /// i = 0; /// do { @@ -2161,7 +2159,6 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { /// This function solves this problem by detecting this type of loop and /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting /// the instructions for the maximum computation. -/// ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Check that the loop matches the pattern we're looking for. if (Cond->getPredicate() != CmpInst::ICMP_EQ && @@ -2301,7 +2298,6 @@ LSRInstance::OptimizeLoopTermCond() { // Otherwise treat this as a rotated loop. for (BasicBlock *ExitingBlock : ExitingBlocks) { - // Get the terminating condition for the loop if possible. If we // can, we want to change it to use a post-incremented version of its // induction variable, to allow coalescing the live ranges for the IV into @@ -3465,7 +3461,6 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, for (SmallVectorImpl::const_iterator J = AddOps.begin(), JE = AddOps.end(); J != JE; ++J) { - // Loop-variant "unknown" values are uninteresting; we won't be able to // do anything meaningful with them. if (isa(*J) && !SE.isLoopInvariant(*J, L)) @@ -3698,7 +3693,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, // Check each interesting stride. for (int64_t Factor : Factors) { // Check that the multiplication doesn't overflow. - if (Base.BaseOffset == INT64_MIN && Factor == -1) + if (Base.BaseOffset == std::numeric_limits::min() && Factor == -1) continue; int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor; if (NewBaseOffset / Factor != Base.BaseOffset) @@ -3710,7 +3705,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, // Check that multiplying with the use offset doesn't overflow. int64_t Offset = LU.MinOffset; - if (Offset == INT64_MIN && Factor == -1) + if (Offset == std::numeric_limits::min() && Factor == -1) continue; Offset = (uint64_t)Offset * Factor; if (Offset / Factor != LU.MinOffset) @@ -3748,7 +3743,8 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, // Check that multiplying with the unfolded offset doesn't overflow. if (F.UnfoldedOffset != 0) { - if (F.UnfoldedOffset == INT64_MIN && Factor == -1) + if (F.UnfoldedOffset == std::numeric_limits::min() && + Factor == -1) continue; F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor; if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset) @@ -3872,7 +3868,7 @@ struct WorkItem { const SCEV *OrigReg; WorkItem(size_t LI, int64_t I, const SCEV *R) - : LUIdx(LI), Imm(I), OrigReg(R) {} + : LUIdx(LI), Imm(I), OrigReg(R) {} void print(raw_ostream &OS) const; void dump() const; @@ -3895,7 +3891,8 @@ LLVM_DUMP_METHOD void WorkItem::dump() const { /// opportunities between them. void LSRInstance::GenerateCrossUseConstantOffsets() { // Group the registers by their value without any added constant offset. - typedef std::map ImmMapTy; + using ImmMapTy = std::map; + DenseMap Map; DenseMap UsedByIndicesMap; SmallVector Sequence; @@ -4099,8 +4096,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { // Collect the best formula for each unique set of shared registers. This // is reset for each use. - typedef DenseMap, size_t, UniquifierDenseMapInfo> - BestFormulaeTy; + using BestFormulaeTy = + DenseMap, size_t, UniquifierDenseMapInfo>; + BestFormulaeTy BestFormulae; for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { @@ -4187,7 +4185,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() { } // This is a rough guess that seems to work fairly well. -static const size_t ComplexityLimit = UINT16_MAX; +static const size_t ComplexityLimit = std::numeric_limits::max(); /// Estimate the worst-case number of solutions the solver might have to /// consider. It almost never considers this many solutions because it prune the @@ -4371,7 +4369,8 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { "from the Formulae with the same Scale and ScaledReg.\n"); // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. - typedef DenseMap, size_t> BestFormulaeTy; + using BestFormulaeTy = DenseMap, size_t>; + BestFormulaeTy BestFormulae; #ifndef NDEBUG bool ChangedFormulae = false; @@ -4493,7 +4492,6 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { /// Use3: /// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted /// reg(c) + reg({b,+,1}) 1 + 2/3 - void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() { if (EstimateSearchSpaceComplexity() < ComplexityLimit) return; @@ -4588,7 +4586,6 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() { print_uses(dbgs())); } - /// Pick a register which seems likely to be profitable, and then in any use /// which has any reference to that register, delete all formulae which do not /// reference that register. @@ -5235,8 +5232,7 @@ void LSRInstance::ImplementSolution( LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI) - : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false), - IVIncInsertPos(nullptr) { + : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; @@ -5487,6 +5483,7 @@ PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, } char LoopStrengthReduce::ID = 0; + INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", "Loop Strength Reduction", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index a7822c9c791d0..7b1d6446a24a5 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1,4 +1,4 @@ -//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===// +//===- LoopUnroll.cpp - Loop unroller pass --------------------------------===// // // The LLVM Compiler Infrastructure // @@ -13,30 +13,55 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LoopUnrollPass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/LoopUnrollAnalyzer.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" -#include +#include +#include +#include +#include +#include +#include #include using namespace llvm; @@ -135,7 +160,7 @@ static cl::opt UnrollRevisitChildLoops( /// A magic value for use with the Threshold parameter to indicate /// that the loop unroll should be performed regardless of how much /// code expansion would result. -static const unsigned NoThreshold = UINT_MAX; +static const unsigned NoThreshold = std::numeric_limits::max(); /// Gather the various unrolling parameters based on the defaults, compiler /// flags, TTI overrides and user specified parameters. @@ -155,8 +180,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( UP.Count = 0; UP.PeelCount = 0; UP.DefaultUnrollRuntimeCount = 8; - UP.MaxCount = UINT_MAX; - UP.FullUnrollMaxCount = UINT_MAX; + UP.MaxCount = std::numeric_limits::max(); + UP.FullUnrollMaxCount = std::numeric_limits::max(); UP.BEInsns = 2; UP.Partial = false; UP.Runtime = false; @@ -222,6 +247,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( } namespace { + /// A struct to densely store the state of an instruction after unrolling at /// each iteration. /// @@ -237,25 +263,27 @@ struct UnrolledInstState { /// Hashing and equality testing for a set of the instruction states. struct UnrolledInstStateKeyInfo { - typedef DenseMapInfo PtrInfo; - typedef DenseMapInfo> PairInfo; + using PtrInfo = DenseMapInfo; + using PairInfo = DenseMapInfo>; + static inline UnrolledInstState getEmptyKey() { return {PtrInfo::getEmptyKey(), 0, 0, 0}; } + static inline UnrolledInstState getTombstoneKey() { return {PtrInfo::getTombstoneKey(), 0, 0, 0}; } + static inline unsigned getHashValue(const UnrolledInstState &S) { return PairInfo::getHashValue({S.I, S.Iteration}); } + static inline bool isEqual(const UnrolledInstState &LHS, const UnrolledInstState &RHS) { return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration}); } }; -} -namespace { struct EstimatedUnrollCost { /// \brief The estimated cost after unrolling. unsigned UnrolledCost; @@ -264,7 +292,8 @@ struct EstimatedUnrollCost { /// rolled form. unsigned RolledDynamicCost; }; -} + +} // end anonymous namespace /// \brief Figure out if the loop is worth full unrolling. /// @@ -286,7 +315,8 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to // analyze *massive* trip counts, so we force the max to be reasonably small. - assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && + assert(UnrollMaxIterationsCountToAnalyze < + (unsigned)(std::numeric_limits::max() / 2) && "The unroll iterations max is too large!"); // Only analyze inner loops. We can't properly estimate cost of nested loops @@ -649,43 +679,6 @@ static unsigned UnrollCountPragmaValue(const Loop *L) { return 0; } -// Remove existing unroll metadata and add unroll disable metadata to -// indicate the loop has already been unrolled. This prevents a loop -// from being unrolled more than is directed by a pragma if the loop -// unrolling pass is run more than once (which it generally is). -static void SetLoopAlreadyUnrolled(Loop *L) { - MDNode *LoopID = L->getLoopID(); - // First remove any existing loop unrolling metadata. - SmallVector MDs; - // Reserve first location for self reference to the LoopID metadata node. - MDs.push_back(nullptr); - - if (LoopID) { - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - bool IsUnrollMetadata = false; - MDNode *MD = dyn_cast(LoopID->getOperand(i)); - if (MD) { - const MDString *S = dyn_cast(MD->getOperand(0)); - IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); - } - if (!IsUnrollMetadata) - MDs.push_back(LoopID->getOperand(i)); - } - } - - // Add unroll(disable) metadata to disable future unrolling. - LLVMContext &Context = L->getHeader()->getContext(); - SmallVector DisableOperands; - DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable")); - MDNode *DisableNode = MDNode::get(Context, DisableOperands); - MDs.push_back(DisableNode); - - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); - L->setLoopID(NewLoopID); -} - // Computes the boosting factor for complete unrolling. // If fully unrolling the loop would save a lot of RolledDynamicCost, it would // be beneficial to fully unroll the loop even if unrolledcost is large. We @@ -693,7 +686,7 @@ static void SetLoopAlreadyUnrolled(Loop *L) { // the unroll threshold. static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost, unsigned MaxPercentThresholdBoost) { - if (Cost.RolledDynamicCost >= UINT_MAX / 100) + if (Cost.RolledDynamicCost >= std::numeric_limits::max() / 100) return 100; else if (Cost.UnrolledCost != 0) // The boosting factor is RolledDynamicCost / UnrolledCost @@ -842,11 +835,14 @@ static bool computeUnrollCount( } if (UP.Count < 2) { if (PragmaEnableUnroll) - ORE->emit( - OptimizationRemarkMissed(DEBUG_TYPE, "UnrollAsDirectedTooLarge", - L->getStartLoc(), L->getHeader()) - << "Unable to unroll loop as directed by unroll(enable) pragma " - "because unrolled size is too large."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "UnrollAsDirectedTooLarge", + L->getStartLoc(), L->getHeader()) + << "Unable to unroll loop as directed by unroll(enable) " + "pragma " + "because unrolled size is too large."; + }); UP.Count = 0; } } else { @@ -856,22 +852,27 @@ static bool computeUnrollCount( UP.Count = UP.MaxCount; if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && UP.Count != TripCount) - ORE->emit( - OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollAsDirectedTooLarge", - L->getStartLoc(), L->getHeader()) - << "Unable to fully unroll loop as directed by unroll pragma because " - "unrolled size is too large."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "FullUnrollAsDirectedTooLarge", + L->getStartLoc(), L->getHeader()) + << "Unable to fully unroll loop as directed by unroll pragma " + "because " + "unrolled size is too large."; + }); return ExplicitUnroll; } assert(TripCount == 0 && "All cases when TripCount is constant should be covered here."); if (PragmaFullUnroll) - ORE->emit( - OptimizationRemarkMissed(DEBUG_TYPE, - "CantFullUnrollAsDirectedRuntimeTripCount", - L->getStartLoc(), L->getHeader()) - << "Unable to fully unroll loop as directed by unroll(full) pragma " - "because loop has a runtime trip count."); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount", + L->getStartLoc(), L->getHeader()) + << "Unable to fully unroll loop as directed by unroll(full) " + "pragma " + "because loop has a runtime trip count."; + }); // 6th priority is runtime unrolling. // Don't unroll a runtime trip count loop when it is disabled. @@ -920,19 +921,23 @@ static bool computeUnrollCount( "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"); + using namespace ore; + if (PragmaCount > 0 && !UP.AllowRemainder) - ORE->emit( - OptimizationRemarkMissed(DEBUG_TYPE, - "DifferentUnrollCountFromDirected", - L->getStartLoc(), L->getHeader()) - << "Unable to unroll loop the number of times directed by " - "unroll_count pragma because remainder loop is restricted " - "(that could architecture specific or because the loop " - "contains a convergent instruction) and so must have an unroll " - "count that divides the loop trip multiple of " - << NV("TripMultiple", TripMultiple) << ". Unrolling instead " - << NV("UnrollCount", UP.Count) << " time(s)."); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "DifferentUnrollCountFromDirected", + L->getStartLoc(), L->getHeader()) + << "Unable to unroll loop the number of times directed by " + "unroll_count pragma because remainder loop is restricted " + "(that could architecture specific or because the loop " + "contains a convergent instruction) and so must have an " + "unroll " + "count that divides the loop trip multiple of " + << NV("TripMultiple", TripMultiple) << ". Unrolling instead " + << NV("UnrollCount", UP.Count) << " time(s)."; + }); } if (UP.Count > UP.MaxCount) @@ -954,7 +959,7 @@ static LoopUnrollResult tryToUnrollLoop( << "] Loop %" << L->getHeader()->getName() << "\n"); if (HasUnrollDisablePragma(L)) return LoopUnrollResult::Unmodified; - if (!L->isLoopSimplifyForm()) { + if (!L->isLoopSimplifyForm()) { DEBUG( dbgs() << " Not unrolling loop which is not in loop-simplify form.\n"); return LoopUnrollResult::Unmodified; @@ -1058,15 +1063,25 @@ static LoopUnrollResult tryToUnrollLoop( // we had, so we don't want to unroll or peel again. if (UnrollResult != LoopUnrollResult::FullyUnrolled && (IsCountSetExplicitly || UP.PeelCount)) - SetLoopAlreadyUnrolled(L); + L->setLoopAlreadyUnrolled(); return UnrollResult; } namespace { + class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid + + int OptLevel; + Optional ProvidedCount; + Optional ProvidedThreshold; + Optional ProvidedAllowPartial; + Optional ProvidedRuntime; + Optional ProvidedUpperBound; + Optional ProvidedAllowPeeling; + LoopUnroll(int OptLevel = 2, Optional Threshold = None, Optional Count = None, Optional AllowPartial = None, Optional Runtime = None, @@ -1079,14 +1094,6 @@ class LoopUnroll : public LoopPass { initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } - int OptLevel; - Optional ProvidedCount; - Optional ProvidedThreshold; - Optional ProvidedAllowPartial; - Optional ProvidedRuntime; - Optional ProvidedUpperBound; - Optional ProvidedAllowPeeling; - bool runOnLoop(Loop *L, LPPassManager &LPM) override { if (skipLoop(L)) return false; @@ -1118,7 +1125,6 @@ class LoopUnroll : public LoopPass { /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG... - /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); @@ -1127,9 +1133,11 @@ class LoopUnroll : public LoopPass { getLoopAnalysisUsage(AU); } }; -} + +} // end anonymous namespace char LoopUnroll::ID = 0; + INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopPass) @@ -1152,7 +1160,7 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count, } Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) { - return llvm::createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0); + return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0); } PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, @@ -1304,6 +1312,9 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, // for unrolling is only needed to get optimization remarks emitted in // a forward order. Loop &L = *Worklist.pop_back_val(); +#ifndef NDEBUG + Loop *ParentL = L.getParentLoop(); +#endif // The API here is quite complex to call, but there are only two interesting // states we support: partial and full (or "simple") unrolling. However, to @@ -1326,7 +1337,6 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, // The parent must not be damaged by unrolling! #ifndef NDEBUG - Loop *ParentL = L.getParentLoop(); if (Result != LoopUnrollResult::Unmodified && ParentL) ParentL->verifyLoop(); #endif diff --git a/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/lib/Transforms/Scalar/LoopVersioningLICM.cpp index c23d891b6504a..53b25e688e822 100644 --- a/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -1,4 +1,4 @@ -//===----------- LoopVersioningLICM.cpp - LICM Loop Versioning ------------===// +//===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===// // // The LLVM Compiler Infrastructure // @@ -60,41 +60,41 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" -#include "llvm/IR/PatternMatch.h" -#include "llvm/IR/PredIteratorCache.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" -#include "llvm/Transforms/Utils/ValueMapper.h" +#include +#include + +using namespace llvm; #define DEBUG_TYPE "loop-versioning-licm" -static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable"; -using namespace llvm; +static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable"; /// Threshold minimum allowed percentage for possible /// invariant instructions in a loop. @@ -143,9 +143,16 @@ void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *MDString, } namespace { + struct LoopVersioningLICM : public LoopPass { static char ID; + LoopVersioningLICM() + : LoopPass(ID), LoopDepthThreshold(LVLoopDepthThreshold), + InvariantThreshold(LVInvarThreshold) { + initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry()); + } + bool runOnLoop(Loop *L, LPPassManager &LPM) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -161,13 +168,6 @@ struct LoopVersioningLICM : public LoopPass { AU.addPreserved(); } - LoopVersioningLICM() - : LoopPass(ID), AA(nullptr), SE(nullptr), LAA(nullptr), LAI(nullptr), - CurLoop(nullptr), LoopDepthThreshold(LVLoopDepthThreshold), - InvariantThreshold(LVInvarThreshold), LoadAndStoreCounter(0), - InvariantCounter(0), IsReadOnlyLoop(true) { - initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry()); - } StringRef getPassName() const override { return "Loop Versioning for LICM"; } void reset() { @@ -191,30 +191,49 @@ struct LoopVersioningLICM : public LoopPass { }; private: - AliasAnalysis *AA; // Current AliasAnalysis information - ScalarEvolution *SE; // Current ScalarEvolution - LoopAccessLegacyAnalysis *LAA; // Current LoopAccessAnalysis - const LoopAccessInfo *LAI; // Current Loop's LoopAccessInfo + // Current AliasAnalysis information + AliasAnalysis *AA = nullptr; + + // Current ScalarEvolution + ScalarEvolution *SE = nullptr; + + // Current LoopAccessAnalysis + LoopAccessLegacyAnalysis *LAA = nullptr; + + // Current Loop's LoopAccessInfo + const LoopAccessInfo *LAI = nullptr; + + // The current loop we are working on. + Loop *CurLoop = nullptr; + + // AliasSet information for the current loop. + std::unique_ptr CurAST; - Loop *CurLoop; // The current loop we are working on. - std::unique_ptr - CurAST; // AliasSet information for the current loop. + // Maximum loop nest threshold + unsigned LoopDepthThreshold; - unsigned LoopDepthThreshold; // Maximum loop nest threshold - float InvariantThreshold; // Minimum invariant threshold - unsigned LoadAndStoreCounter; // Counter to track num of load & store - unsigned InvariantCounter; // Counter to track num of invariant - bool IsReadOnlyLoop; // Read only loop marker. + // Minimum invariant threshold + float InvariantThreshold; + + // Counter to track num of load & store + unsigned LoadAndStoreCounter = 0; + + // Counter to track num of invariant + unsigned InvariantCounter = 0; + + // Read only loop marker. + bool IsReadOnlyLoop = true; bool isLegalForVersioning(); bool legalLoopStructure(); bool legalLoopInstructions(); bool legalLoopMemoryAccesses(); bool isLoopAlreadyVisited(); - void setNoAliasToLoop(Loop *); - bool instructionSafeForVersioning(Instruction *); + void setNoAliasToLoop(Loop *VerLoop); + bool instructionSafeForVersioning(Instruction *I); }; -} + +} // end anonymous namespace /// \brief Check loop structure and confirms it's good for LoopVersioningLICM. bool LoopVersioningLICM::legalLoopStructure() { @@ -225,7 +244,7 @@ bool LoopVersioningLICM::legalLoopStructure() { return false; } // Loop should be innermost loop, if not return false. - if (CurLoop->getSubLoops().size()) { + if (!CurLoop->getSubLoops().empty()) { DEBUG(dbgs() << " loop is not innermost\n"); return false; } @@ -562,6 +581,7 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) { } char LoopVersioningLICM::ID = 0; + INITIALIZE_PASS_BEGIN(LoopVersioningLICM, "loop-versioning-licm", "Loop Versioning For LICM", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 0eb4e19896bb5..a4b4330bfedb1 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -14,10 +14,12 @@ #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" @@ -25,6 +27,8 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -41,6 +45,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -54,6 +59,7 @@ #include #include #include +#include using namespace llvm; @@ -225,15 +231,18 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const { namespace { class MemsetRanges { + using range_iterator = SmallVectorImpl::iterator; + /// A sorted list of the memset ranges. SmallVector Ranges; - typedef SmallVectorImpl::iterator range_iterator; + const DataLayout &DL; public: MemsetRanges(const DataLayout &DL) : DL(DL) {} - typedef SmallVectorImpl::const_iterator const_iterator; + using const_iterator = SmallVectorImpl::const_iterator; + const_iterator begin() const { return Ranges.begin(); } const_iterator end() const { return Ranges.end(); } bool empty() const { return Ranges.empty(); } @@ -259,7 +268,6 @@ class MemsetRanges { void addRange(int64_t Start, int64_t Size, Value *Ptr, unsigned Alignment, Instruction *Inst); - }; } // end anonymous namespace @@ -356,10 +364,10 @@ class MemCpyOptLegacyPass : public FunctionPass { } }; -char MemCpyOptLegacyPass::ID = 0; - } // end anonymous namespace +char MemCpyOptLegacyPass::ID = 0; + /// The public interface to this file... FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); } @@ -450,7 +458,6 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst, // emit memset's for anything big enough to be worthwhile. Instruction *AMemSet = nullptr; for (const MemsetRange &Range : Ranges) { - if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. diff --git a/lib/Transforms/Scalar/MergeICmps.cpp b/lib/Transforms/Scalar/MergeICmps.cpp index 7bd730014c837..1244a9776fac2 100644 --- a/lib/Transforms/Scalar/MergeICmps.cpp +++ b/lib/Transforms/Scalar/MergeICmps.cpp @@ -22,18 +22,20 @@ //===----------------------------------------------------------------------===// +#include +#include +#include +#include #include "llvm/ADT/APSInt.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" -#include -#include -#include -#include using namespace llvm; @@ -41,8 +43,6 @@ namespace { #define DEBUG_TYPE "mergeicmps" -#define MERGEICMPS_DOT_ON - // A BCE atom. struct BCEAtom { BCEAtom() : GEP(nullptr), LoadI(nullptr), Offset() {} @@ -50,7 +50,21 @@ struct BCEAtom { const Value *Base() const { return GEP ? GEP->getPointerOperand() : nullptr; } bool operator<(const BCEAtom &O) const { - return Base() == O.Base() ? Offset.slt(O.Offset) : Base() < O.Base(); + assert(Base() && "invalid atom"); + assert(O.Base() && "invalid atom"); + // Just ordering by (Base(), Offset) is sufficient. However because this + // means that the ordering will depend on the addresses of the base + // values, which are not reproducible from run to run. To guarantee + // stability, we use the names of the values if they exist; we sort by: + // (Base.getName(), Base(), Offset). + const int NameCmp = Base()->getName().compare(O.Base()->getName()); + if (NameCmp == 0) { + if (Base() == O.Base()) { + return Offset.slt(O.Offset); + } + return Base() < O.Base(); + } + return NameCmp < 0; } GetElementPtrInst *GEP; @@ -99,15 +113,16 @@ BCEAtom visitICmpLoadOperand(Value *const Val) { // A basic block with a comparison between two BCE atoms. // Note: the terminology is misleading: the comparison is symmetric, so there -// is no real {l/r}hs. To break the symmetry, we use the smallest atom as Lhs. +// is no real {l/r}hs. What we want though is to have the same base on the +// left (resp. right), so that we can detect consecutive loads. To ensure this +// we put the smallest atom on the left. class BCECmpBlock { public: BCECmpBlock() {} BCECmpBlock(BCEAtom L, BCEAtom R, int SizeBits) : Lhs_(L), Rhs_(R), SizeBits_(SizeBits) { - if (Rhs_ < Lhs_) - std::swap(Rhs_, Lhs_); + if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_); } bool IsValid() const { @@ -152,17 +167,13 @@ bool BCECmpBlock::doesOtherWork() const { // Note: The GEPs and/or loads are not necessarily in the same block. for (const Instruction &Inst : *BB) { if (const auto *const GEP = dyn_cast(&Inst)) { - if (!(Lhs_.GEP == GEP || Rhs_.GEP == GEP)) - return true; + if (!(Lhs_.GEP == GEP || Rhs_.GEP == GEP)) return true; } else if (const auto *const L = dyn_cast(&Inst)) { - if (!(Lhs_.LoadI == L || Rhs_.LoadI == L)) - return true; + if (!(Lhs_.LoadI == L || Rhs_.LoadI == L)) return true; } else if (const auto *const C = dyn_cast(&Inst)) { - if (C != CmpI) - return true; + if (C != CmpI) return true; } else if (const auto *const Br = dyn_cast(&Inst)) { - if (Br != BranchI) - return true; + if (Br != BranchI) return true; } else { return true; } @@ -179,11 +190,9 @@ BCECmpBlock visitICmp(const ICmpInst *const CmpI, << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne") << "\n"); auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0)); - if (!Lhs.Base()) - return {}; + if (!Lhs.Base()) return {}; auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1)); - if (!Rhs.Base()) - return {}; + if (!Rhs.Base()) return {}; return BCECmpBlock(std::move(Lhs), std::move(Rhs), CmpI->getOperand(0)->getType()->getScalarSizeInBits()); } @@ -194,11 +203,9 @@ BCECmpBlock visitICmp(const ICmpInst *const CmpI, // BCE atoms, returns the comparison. BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block, const BasicBlock *const PhiBlock) { - if (Block->empty()) - return {}; + if (Block->empty()) return {}; auto *const BranchI = dyn_cast(Block->getTerminator()); - if (!BranchI) - return {}; + if (!BranchI) return {}; DEBUG(dbgs() << "branch\n"); if (BranchI->isUnconditional()) { // In this case, we expect an incoming value which is the result of the @@ -206,8 +213,7 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block, // that this does not mean that this is the last incoming value, blocks // can be reordered). auto *const CmpI = dyn_cast(Val); - if (!CmpI) - return {}; + if (!CmpI) return {}; DEBUG(dbgs() << "icmp\n"); auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ); Result.CmpI = CmpI; @@ -218,12 +224,10 @@ BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block, // chained). const auto *const Const = dyn_cast(Val); DEBUG(dbgs() << "const\n"); - if (!Const->isZero()) - return {}; + if (!Const->isZero()) return {}; DEBUG(dbgs() << "false\n"); auto *const CmpI = dyn_cast(BranchI->getCondition()); - if (!CmpI) - return {}; + if (!CmpI) return {}; DEBUG(dbgs() << "icmp\n"); assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch"); BasicBlock *const FalseBlock = BranchI->getSuccessor(1); @@ -359,8 +363,7 @@ void BCECmpChain::dump() const { << Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8) << " bytes)\"];\n"; const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB); - if (I > 0) - errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n"; + if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n"; errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n"; } errs() << " \"Phi\" [label=\"Phi\"];\n"; @@ -379,8 +382,7 @@ bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI) { break; } } - if (!AtLeastOneMerged) - return false; + if (!AtLeastOneMerged) return false; } // Remove phi references to comparison blocks, they will be rebuilt as we @@ -444,10 +446,9 @@ void BCECmpChain::mergeComparisons(ArrayRef Comparisons, IRBuilder<> Builder(BB); const auto &DL = Phi.getModule()->getDataLayout(); - Value *const MemCmpCall = - emitMemCmp(FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP, - ConstantInt::get(DL.getIntPtrType(Context), TotalSize), - Builder, DL, TLI); + Value *const MemCmpCall = emitMemCmp( + FirstComparison.Lhs().GEP, FirstComparison.Rhs().GEP, ConstantInt::get(DL.getIntPtrType(Context), TotalSize), + Builder, DL, TLI); Value *const MemCmpIsZero = Builder.CreateICmpEQ( MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0)); @@ -561,8 +562,7 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) { // last block and reconstruct the order. BasicBlock *LastBlock = nullptr; for (unsigned I = 0; I < Phi.getNumIncomingValues(); ++I) { - if (isa(Phi.getIncomingValue(I))) - continue; + if (isa(Phi.getIncomingValue(I))) continue; if (LastBlock) { // There are several non-constant values. DEBUG(dbgs() << "skip: several non-constant values\n"); @@ -582,8 +582,7 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI) { const auto Blocks = getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues()); - if (Blocks.empty()) - return false; + if (Blocks.empty()) return false; BCECmpChain CmpChain(Blocks, Phi); if (CmpChain.size() < 2) { @@ -605,22 +604,30 @@ class MergeICmps : public FunctionPass { bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; const auto &TLI = getAnalysis().getTLI(); - auto PA = runImpl(F, &TLI); + const auto &TTI = getAnalysis().getTTI(F); + auto PA = runImpl(F, &TLI, &TTI); return !PA.areAllPreserved(); } private: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); } - PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI); + PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI); }; -PreservedAnalyses MergeICmps::runImpl(Function &F, - const TargetLibraryInfo *TLI) { +PreservedAnalyses MergeICmps::runImpl(Function &F, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI) { DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n"); + // We only try merging comparisons if the target wants to expand memcmp later. + // The rationale is to avoid turning small chains into memcmp calls. + unsigned MaxLoadSize; + if (!TTI->enableMemCmpExpansion(MaxLoadSize)) return PreservedAnalyses::all(); + bool MadeChange = false; for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) { @@ -629,8 +636,7 @@ PreservedAnalyses MergeICmps::runImpl(Function &F, MadeChange |= processPhi(*Phi, TLI); } - if (MadeChange) - return PreservedAnalyses::none(); + if (MadeChange) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } @@ -640,8 +646,8 @@ char MergeICmps::ID = 0; INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); } - diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index d0bfe36038973..b026c8d692c3d 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -77,19 +77,45 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/NaryReassociate.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include +#include + using namespace llvm; using namespace PatternMatch; #define DEBUG_TYPE "nary-reassociate" namespace { + class NaryReassociateLegacyPass : public FunctionPass { public: static char ID; @@ -101,6 +127,7 @@ class NaryReassociateLegacyPass : public FunctionPass { bool doInitialization(Module &M) override { return false; } + bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -118,9 +145,11 @@ class NaryReassociateLegacyPass : public FunctionPass { private: NaryReassociatePass Impl; }; -} // anonymous namespace + +} // end anonymous namespace char NaryReassociateLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass, "nary-reassociate", "Nary reassociation", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 8e7b1551a162b..a7f27939b1715 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -1,4 +1,4 @@ -//===---- NewGVN.cpp - Global Value Numbering Pass --------------*- C++ -*-===// +//===- NewGVN.cpp - Global Value Numbering Pass ---------------------------===// // // The LLVM Compiler Infrastructure // @@ -6,6 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file implements the new LLVM's Global Value Numbering pass. /// GVN partitions values computed by a function into congruence classes. @@ -48,38 +49,81 @@ /// published algorithms are O(Instructions). Instead, we use a technique that /// is O(number of operations with the same value number), enabling us to skip /// trying to eliminate things that have unique value numbers. +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/NewGVN.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" -#include "llvm/IR/PatternMatch.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ArrayRecycler.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/PointerLikeTypeTraits.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVNExpression.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PredicateInfo.h" #include "llvm/Transforms/Utils/VNCoercion.h" -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + using namespace llvm; -using namespace PatternMatch; using namespace llvm::GVNExpression; using namespace llvm::VNCoercion; + #define DEBUG_TYPE "newgvn" STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted"); @@ -117,6 +161,7 @@ static cl::opt EnablePhiOfOps("enable-phi-of-ops", cl::init(true), // Anchor methods. namespace llvm { namespace GVNExpression { + Expression::~Expression() = default; BasicExpression::~BasicExpression() = default; CallExpression::~CallExpression() = default; @@ -124,10 +169,12 @@ LoadExpression::~LoadExpression() = default; StoreExpression::~StoreExpression() = default; AggregateValueExpression::~AggregateValueExpression() = default; PHIExpression::~PHIExpression() = default; -} -} + +} // end namespace GVNExpression +} // end namespace llvm namespace { + // Tarjan's SCC finding algorithm with Nuutila's improvements // SCCIterator is actually fairly complex for the simple thing we want. // It also wants to hand us SCC's that are unrelated to the phi node we ask @@ -137,7 +184,6 @@ namespace { // instructions, // not generic values (arguments, etc). struct TarjanSCC { - TarjanSCC() : Components(1) {} void Start(const Instruction *Start) { @@ -192,15 +238,19 @@ struct TarjanSCC { Stack.push_back(I); } } + unsigned int DFSNum = 1; SmallPtrSet InComponent; DenseMap Root; SmallVector Stack; + // Store the components as vector of ptr sets, because we need the topo order // of SCC's, but not individual member order SmallVector, 8> Components; + DenseMap ValueToComponent; }; + // Congruence classes represent the set of expressions/instructions // that are all the same *during some scope in the function*. // That is, because of the way we perform equality propagation, and @@ -249,7 +299,9 @@ class CongruenceClass { explicit CongruenceClass(unsigned ID) : ID(ID) {} CongruenceClass(unsigned ID, Value *Leader, const Expression *E) : ID(ID), RepLeader(Leader), DefiningExpr(E) {} + unsigned getID() const { return ID; } + // True if this class has no members left. This is mainly used for assertion // purposes, and for skipping empty classes. bool isDead() const { @@ -257,6 +309,7 @@ class CongruenceClass { // perspective, it's really dead. return empty() && memory_empty(); } + // Leader functions Value *getLeader() const { return RepLeader; } void setLeader(Value *Leader) { RepLeader = Leader; } @@ -264,7 +317,6 @@ class CongruenceClass { return NextLeader; } void resetNextLeader() { NextLeader = {nullptr, ~0}; } - void addPossibleNextLeader(std::pair LeaderPair) { if (LeaderPair.second < NextLeader.second) NextLeader = LeaderPair; @@ -299,6 +351,7 @@ class CongruenceClass { iterator_range memory() const { return make_range(memory_begin(), memory_end()); } + void memory_insert(const MemoryMemberType *M) { MemoryMembers.insert(M); } void memory_erase(const MemoryMemberType *M) { MemoryMembers.erase(M); } @@ -338,35 +391,48 @@ class CongruenceClass { private: unsigned ID; + // Representative leader. Value *RepLeader = nullptr; + // The most dominating leader after our current leader, because the member set // is not sorted and is expensive to keep sorted all the time. std::pair NextLeader = {nullptr, ~0U}; + // If this is represented by a store, the value of the store. Value *RepStoredValue = nullptr; + // If this class contains MemoryDefs or MemoryPhis, this is the leading memory // access. const MemoryAccess *RepMemoryAccess = nullptr; + // Defining Expression. const Expression *DefiningExpr = nullptr; + // Actual members of this class. MemberSet Members; + // This is the set of MemoryPhis that exist in the class. MemoryDefs and // MemoryUses have real instructions representing them, so we only need to // track MemoryPhis here. MemoryMemberSet MemoryMembers; + // Number of stores in this congruence class. // This is used so we can detect store equivalence changes properly. int StoreCount = 0; }; -} // namespace + +} // end anonymous namespace namespace llvm { + struct ExactEqualsExpression { const Expression &E; + explicit ExactEqualsExpression(const Expression &E) : E(E) {} + hash_code getComputedHash() const { return E.getComputedHash(); } + bool operator==(const Expression &Other) const { return E.exactlyEquals(Other); } @@ -378,17 +444,21 @@ template <> struct DenseMapInfo { Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; return reinterpret_cast(Val); } + static const Expression *getTombstoneKey() { auto Val = static_cast(~1U); Val <<= PointerLikeTypeTraits::NumLowBitsAvailable; return reinterpret_cast(Val); } + static unsigned getHashValue(const Expression *E) { return E->getComputedHash(); } + static unsigned getHashValue(const ExactEqualsExpression &E) { return E.getComputedHash(); } + static bool isEqual(const ExactEqualsExpression &LHS, const Expression *RHS) { if (RHS == getTombstoneKey() || RHS == getEmptyKey()) return false; @@ -410,9 +480,11 @@ template <> struct DenseMapInfo { return *LHS == *RHS; } }; + } // end namespace llvm namespace { + class NewGVN { Function &F; DominatorTree *DT; @@ -449,17 +521,22 @@ class NewGVN { // Value Mappings. DenseMap ValueToClass; DenseMap ValueToExpression; + // Value PHI handling, used to make equivalence between phi(op, op) and // op(phi, phi). // These mappings just store various data that would normally be part of the // IR. - DenseSet PHINodeUses; + SmallPtrSet PHINodeUses; + DenseMap OpSafeForPHIOfOps; + // Map a temporary instruction we created to a parent block. DenseMap TempToBlock; + // Map between the already in-program instructions and the temporary phis we // created that they are known equivalent to. DenseMap RealToTemp; + // In order to know when we should re-process instructions that have // phi-of-ops, we track the set of expressions that they needed as // leaders. When we discover new leaders for those expressions, we process the @@ -471,23 +548,32 @@ class NewGVN { mutable DenseMap> AdditionalUsers; DenseMap> ExpressionToPhiOfOps; - // Map from basic block to the temporary operations we created - DenseMap> PHIOfOpsPHIs; + // Map from temporary operation to MemoryAccess. DenseMap TempToMemory; + // Set of all temporary instructions we created. // Note: This will include instructions that were just created during value // numbering. The way to test if something is using them is to check // RealToTemp. - DenseSet AllTempInstructions; + // This is the set of instructions to revisit on a reachability change. At + // the end of the main iteration loop it will contain at least all the phi of + // ops instructions that will be changed to phis, as well as regular phis. + // During the iteration loop, it may contain other things, such as phi of ops + // instructions that used edge reachability to reach a result, and so need to + // be revisited when the edge changes, independent of whether the phi they + // depended on changes. + DenseMap> RevisitOnReachabilityChange; + // Mapping from predicate info we used to the instructions we used it with. // In order to correctly ensure propagation, we must keep track of what // comparisons we used, so that when the values of the comparisons change, we // propagate the information to the places we used the comparison. mutable DenseMap> PredicateToUsers; + // the same reasoning as PredicateToUsers. When we skip MemoryAccesses for // stores, we no longer can rely solely on the def-use chains of MemorySSA. mutable DenseMap> @@ -515,6 +601,7 @@ class NewGVN { enum InstCycleState { ICS_Unknown, ICS_CycleFree, ICS_Cycle }; mutable DenseMap InstCycleState; + // Expression to class mapping. using ExpressionClassMap = DenseMap; ExpressionClassMap ExpressionToClass; @@ -571,6 +658,7 @@ class NewGVN { : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), PredInfo(make_unique(F, *DT, *AC)), SQ(DL, TLI, DT, AC) { } + bool runGVN(); private: @@ -578,7 +666,13 @@ class NewGVN { const Expression *createExpression(Instruction *) const; const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *, Instruction *) const; - PHIExpression *createPHIExpression(Instruction *, bool &HasBackEdge, + + // Our canonical form for phi arguments is a pair of incoming value, incoming + // basic block. + using ValPair = std::pair; + + PHIExpression *createPHIExpression(ArrayRef, const Instruction *, + BasicBlock *, bool &HasBackEdge, bool &OriginalOpsConstant) const; const DeadExpression *createDeadExpression() const; const VariableExpression *createVariableExpression(Value *) const; @@ -607,6 +701,7 @@ class NewGVN { CC->setMemoryLeader(MA); return CC; } + CongruenceClass *ensureLeaderOfMemoryClass(MemoryAccess *MA) { auto *CC = getMemoryClass(MA); if (CC->getMemoryLeader() != MA) @@ -620,16 +715,18 @@ class NewGVN { ValueToClass[Member] = CClass; return CClass; } + void initializeCongruenceClasses(Function &F); - const Expression *makePossiblePhiOfOps(Instruction *, + const Expression *makePossiblePHIOfOps(Instruction *, SmallPtrSetImpl &); Value *findLeaderForInst(Instruction *ValueOp, SmallPtrSetImpl &Visited, MemoryAccess *MemAccess, Instruction *OrigInst, BasicBlock *PredBB); - - bool OpIsSafeForPHIOfOps(Value *Op, Instruction *OrigInst, - const BasicBlock *PHIBlock, + bool OpIsSafeForPHIOfOpsHelper(Value *V, const BasicBlock *PHIBlock, + SmallPtrSetImpl &Visited, + SmallVectorImpl &Worklist); + bool OpIsSafeForPHIOfOps(Value *Op, const BasicBlock *PHIBlock, SmallPtrSetImpl &); void addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue); void removePhiOfOps(Instruction *I, PHINode *PHITemp); @@ -649,7 +746,10 @@ class NewGVN { const Expression *performSymbolicLoadEvaluation(Instruction *) const; const Expression *performSymbolicStoreEvaluation(Instruction *) const; const Expression *performSymbolicCallEvaluation(Instruction *) const; - const Expression *performSymbolicPHIEvaluation(Instruction *) const; + void sortPHIOps(MutableArrayRef Ops) const; + const Expression *performSymbolicPHIEvaluation(ArrayRef, + Instruction *I, + BasicBlock *PHIBlock) const; const Expression *performSymbolicAggrValueEvaluation(Instruction *) const; const Expression *performSymbolicCmpEvaluation(Instruction *) const; const Expression *performSymbolicPredicateInfoEvaluation(Instruction *) const; @@ -696,7 +796,7 @@ class NewGVN { const BasicBlock *) const; // New instruction creation. - void handleNewInstruction(Instruction *){}; + void handleNewInstruction(Instruction *) {} // Various instruction touch utilities template @@ -732,6 +832,7 @@ class NewGVN { MemoryAccess *getDefiningAccess(const MemoryAccess *) const; MemoryPhi *getMemoryAccess(const BasicBlock *) const; template T *getMinDFSOfRange(const Range &) const; + unsigned InstrToDFSNum(const Value *V) const { assert(isa(V) && "This should not be used for MemoryAccesses"); return InstrDFS.lookup(V); @@ -740,7 +841,9 @@ class NewGVN { unsigned InstrToDFSNum(const MemoryAccess *MA) const { return MemoryToDFSNum(MA); } + Value *InstrFromDFSNum(unsigned DFSNum) { return DFSToInstr[DFSNum]; } + // Given a MemoryAccess, return the relevant instruction DFS number. Note: // This deliberately takes a value so it can be used with Use's, which will // auto-convert to Value's but not to MemoryAccess's. @@ -751,12 +854,15 @@ class NewGVN { ? InstrToDFSNum(cast(MA)->getMemoryInst()) : InstrDFS.lookup(MA); } + bool isCycleFree(const Instruction *) const; bool isBackedge(BasicBlock *From, BasicBlock *To) const; + // Debug counter info. When verifying, we have to reset the value numbering // debug counter to the same state it started in to get the same results. std::pair StartingVNCounter; }; + } // end anonymous namespace template @@ -848,50 +954,59 @@ static bool isCopyOfAPHI(const Value *V) { return CO && isa(CO); } -PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, +// Sort PHI Operands into a canonical order. What we use here is an RPO +// order. The BlockInstRange numbers are generated in an RPO walk of the basic +// blocks. +void NewGVN::sortPHIOps(MutableArrayRef Ops) const { + std::sort(Ops.begin(), Ops.end(), [&](const ValPair &P1, const ValPair &P2) { + return BlockInstRange.lookup(P1.second).first < + BlockInstRange.lookup(P2.second).first; + }); +} + +// Return true if V is a value that will always be available (IE can +// be placed anywhere) in the function. We don't do globals here +// because they are often worse to put in place. +static bool alwaysAvailable(Value *V) { + return isa(V) || isa(V); +} + +// Create a PHIExpression from an array of {incoming edge, value} pairs. I is +// the original instruction we are creating a PHIExpression for (but may not be +// a phi node). We require, as an invariant, that all the PHIOperands in the +// same block are sorted the same way. sortPHIOps will sort them into a +// canonical order. +PHIExpression *NewGVN::createPHIExpression(ArrayRef PHIOperands, + const Instruction *I, + BasicBlock *PHIBlock, + bool &HasBackedge, bool &OriginalOpsConstant) const { - BasicBlock *PHIBlock = getBlockForValue(I); - auto *PN = cast(I); - auto *E = - new (ExpressionAllocator) PHIExpression(PN->getNumOperands(), PHIBlock); + unsigned NumOps = PHIOperands.size(); + auto *E = new (ExpressionAllocator) PHIExpression(NumOps, PHIBlock); E->allocateOperands(ArgRecycler, ExpressionAllocator); - E->setType(I->getType()); - E->setOpcode(I->getOpcode()); - - // NewGVN assumes the operands of a PHI node are in a consistent order across - // PHIs. LLVM doesn't seem to always guarantee this. While we need to fix - // this in LLVM at some point we don't want GVN to find wrong congruences. - // Therefore, here we sort uses in predecessor order. - // We're sorting the values by pointer. In theory this might be cause of - // non-determinism, but here we don't rely on the ordering for anything - // significant, e.g. we don't create new instructions based on it so we're - // fine. - SmallVector PHIOperands; - for (const Use &U : PN->operands()) - PHIOperands.push_back(&U); - std::sort(PHIOperands.begin(), PHIOperands.end(), - [&](const Use *U1, const Use *U2) { - return PN->getIncomingBlock(*U1) < PN->getIncomingBlock(*U2); - }); + E->setType(PHIOperands.begin()->first->getType()); + E->setOpcode(Instruction::PHI); // Filter out unreachable phi operands. - auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) { - auto *BB = PN->getIncomingBlock(*U); - if (isCopyOfPHI(*U, PN)) - return false; + auto Filtered = make_filter_range(PHIOperands, [&](const ValPair &P) { + auto *BB = P.second; + if (auto *PHIOp = dyn_cast(I)) + if (isCopyOfPHI(P.first, PHIOp)) + return false; if (!ReachableEdges.count({BB, PHIBlock})) return false; // Things in TOPClass are equivalent to everything. - if (ValueToClass.lookup(*U) == TOPClass) + if (ValueToClass.lookup(P.first) == TOPClass) return false; - OriginalOpsConstant = OriginalOpsConstant && isa(*U); + OriginalOpsConstant = OriginalOpsConstant && isa(P.first); HasBackedge = HasBackedge || isBackedge(BB, PHIBlock); - return lookupOperandLeader(*U) != PN; + return lookupOperandLeader(P.first) != I; }); - std::transform( - Filtered.begin(), Filtered.end(), op_inserter(E), - [&](const Use *U) -> Value * { return lookupOperandLeader(*U); }); + std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), + [&](const ValPair &P) -> Value * { + return lookupOperandLeader(P.first); + }); return E; } @@ -1139,7 +1254,7 @@ NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const { bool NewGVN::someEquivalentDominates(const Instruction *Inst, const Instruction *U) const { auto *CC = ValueToClass.lookup(Inst); - // This must be an instruction because we are only called from phi nodes + // This must be an instruction because we are only called from phi nodes // in the case that the value it needs to check against is an instruction. // The most likely candiates for dominance are the leader and the next leader. @@ -1157,6 +1272,8 @@ bool NewGVN::someEquivalentDominates(const Instruction *Inst, // any of these siblings. if (!CC) return false; + if (alwaysAvailable(CC->getLeader())) + return true; if (DT->dominates(cast(CC->getLeader()), U)) return true; if (CC->getNextLeader().first && @@ -1310,7 +1427,6 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, getConstantStoreValueForLoad(C, Offset, LoadType, DL)); } } - } else if (auto *DepLI = dyn_cast(DepInst)) { // Can't forward from non-atomic to atomic without violating memory model. if (LI->isAtomic() > DepLI->isAtomic()) @@ -1326,7 +1442,6 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr, return createConstantExpression(PossibleConstant); } } - } else if (auto *DepMI = dyn_cast(DepInst)) { int Offset = analyzeLoadFromClobberingMemInst(LoadType, LoadPtr, DepMI, DL); if (Offset >= 0) { @@ -1526,7 +1641,6 @@ const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I) const { // Retrieve the memory class for a given MemoryAccess. CongruenceClass *NewGVN::getMemoryClass(const MemoryAccess *MA) const { - auto *Result = MemoryAccessToClass.lookup(MA); assert(Result && "Should have found memory class"); return Result; @@ -1610,7 +1724,10 @@ bool NewGVN::isCycleFree(const Instruction *I) const { } // Evaluate PHI nodes symbolically and create an expression result. -const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { +const Expression * +NewGVN::performSymbolicPHIEvaluation(ArrayRef PHIOps, + Instruction *I, + BasicBlock *PHIBlock) const { // True if one of the incoming phi edges is a backedge. bool HasBackedge = false; // All constant tracks the state of whether all the *original* phi operands @@ -1618,8 +1735,8 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { // change in value of the phi is guaranteed not to later change the value of // the phi. IE it can't be v = phi(undef, v+1) bool OriginalOpsConstant = true; - auto *E = cast( - createPHIExpression(I, HasBackedge, OriginalOpsConstant)); + auto *E = cast(createPHIExpression( + PHIOps, I, PHIBlock, HasBackedge, OriginalOpsConstant)); // We match the semantics of SimplifyPhiNode from InstructionSimplify here. // See if all arguments are the same. // We track if any were undef because they need special handling. @@ -1728,6 +1845,7 @@ NewGVN::performSymbolicAggrValueEvaluation(Instruction *I) const { return createAggregateValueExpression(I); } + const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const { assert(isa(I) && "Expected a cmp instruction."); @@ -1825,7 +1943,6 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const { return createConstantExpression( ConstantInt::getFalse(CI->getType())); } - } else { // Just handle the ne and eq cases, where if we have the same // operands, we may know something. @@ -1849,13 +1966,6 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) const { return createExpression(I); } -// Return true if V is a value that will always be available (IE can -// be placed anywhere) in the function. We don't do globals here -// because they are often worse to put in place. -static bool alwaysAvailable(Value *V) { - return isa(V) || isa(V); -} - // Substitute and symbolize the value before value numbering. const Expression * NewGVN::performSymbolicEvaluation(Value *V, @@ -1875,9 +1985,15 @@ NewGVN::performSymbolicEvaluation(Value *V, case Instruction::InsertValue: E = performSymbolicAggrValueEvaluation(I); break; - case Instruction::PHI: - E = performSymbolicPHIEvaluation(I); - break; + case Instruction::PHI: { + SmallVector Ops; + auto *PN = cast(I); + for (unsigned i = 0; i < PN->getNumOperands(); ++i) + Ops.push_back({PN->getIncomingValue(i), PN->getIncomingBlock(i)}); + // Sort to ensure the invariant createPHIExpression requires is met. + sortPHIOps(Ops); + E = performSymbolicPHIEvaluation(Ops, I, getBlockForValue(I)); + } break; case Instruction::Call: E = performSymbolicCallEvaluation(I); break; @@ -1887,13 +2003,13 @@ NewGVN::performSymbolicEvaluation(Value *V, case Instruction::Load: E = performSymbolicLoadEvaluation(I); break; - case Instruction::BitCast: { + case Instruction::BitCast: E = createExpression(I); - } break; + break; case Instruction::ICmp: - case Instruction::FCmp: { + case Instruction::FCmp: E = performSymbolicCmpEvaluation(I); - } break; + break; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -2220,7 +2336,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, // For a given expression, mark the phi of ops instructions that could have // changed as a result. void NewGVN::markPhiOfOpsChanged(const Expression *E) { - touchAndErase(ExpressionToPhiOfOps, ExactEqualsExpression(*E)); + touchAndErase(ExpressionToPhiOfOps, E); } // Perform congruence finding on a given value numbering expression. @@ -2341,14 +2457,11 @@ void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) { if (MemoryAccess *MemPhi = getMemoryAccess(To)) TouchedInstructions.set(InstrToDFSNum(MemPhi)); - auto BI = To->begin(); - while (isa(BI)) { - TouchedInstructions.set(InstrToDFSNum(&*BI)); - ++BI; - } - for_each_found(PHIOfOpsPHIs, To, [&](const PHINode *I) { - TouchedInstructions.set(InstrToDFSNum(I)); - }); + // FIXME: We should just add a union op on a Bitvector and + // SparseBitVector. We can do it word by word faster than we are doing it + // here. + for (auto InstNum : RevisitOnReachabilityChange[To]) + TouchedInstructions.set(InstNum); } } } @@ -2449,10 +2562,13 @@ void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) { void NewGVN::removePhiOfOps(Instruction *I, PHINode *PHITemp) { InstrDFS.erase(PHITemp); // It's still a temp instruction. We keep it in the array so it gets erased. - // However, it's no longer used by I, or in the block/ - PHIOfOpsPHIs[getBlockForValue(PHITemp)].erase(PHITemp); + // However, it's no longer used by I, or in the block TempToBlock.erase(PHITemp); RealToTemp.erase(I); + // We don't remove the users from the phi node uses. This wastes a little + // time, but such is life. We could use two sets to track which were there + // are the start of NewGVN, and which were added, but right nowt he cost of + // tracking is more than the cost of checking for more phi of ops. } // Add PHI Op in BB as a PHI of operations version of ExistingValue. @@ -2460,9 +2576,13 @@ void NewGVN::addPhiOfOps(PHINode *Op, BasicBlock *BB, Instruction *ExistingValue) { InstrDFS[Op] = InstrToDFSNum(ExistingValue); AllTempInstructions.insert(Op); - PHIOfOpsPHIs[BB].insert(Op); TempToBlock[Op] = BB; RealToTemp[ExistingValue] = Op; + // Add all users to phi node use, as they are now uses of the phi of ops phis + // and may themselves be phi of ops. + for (auto *U : ExistingValue->users()) + if (auto *UI = dyn_cast(U)) + PHINodeUses.insert(UI); } static bool okayForPHIOfOps(const Instruction *I) { @@ -2472,21 +2592,17 @@ static bool okayForPHIOfOps(const Instruction *I) { isa(I); } -// Return true if this operand will be safe to use for phi of ops. -// -// The reason some operands are unsafe is that we are not trying to recursively -// translate everything back through phi nodes. We actually expect some lookups -// of expressions to fail. In particular, a lookup where the expression cannot -// exist in the predecessor. This is true even if the expression, as shown, can -// be determined to be constant. -bool NewGVN::OpIsSafeForPHIOfOps(Value *V, Instruction *OrigInst, - const BasicBlock *PHIBlock, - SmallPtrSetImpl &Visited) { +bool NewGVN::OpIsSafeForPHIOfOpsHelper( + Value *V, const BasicBlock *PHIBlock, + SmallPtrSetImpl &Visited, + SmallVectorImpl &Worklist) { + if (!isa(V)) return true; auto OISIt = OpSafeForPHIOfOps.find(V); if (OISIt != OpSafeForPHIOfOps.end()) return OISIt->second; + // Keep walking until we either dominate the phi block, or hit a phi, or run // out of things to check. if (DT->properlyDominates(getBlockForValue(V), PHIBlock)) { @@ -2498,23 +2614,43 @@ bool NewGVN::OpIsSafeForPHIOfOps(Value *V, Instruction *OrigInst, OpSafeForPHIOfOps.insert({V, false}); return false; } - for (auto Op : cast(V)->operand_values()) { + + auto *OrigI = cast(V); + for (auto *Op : OrigI->operand_values()) { if (!isa(Op)) continue; - // See if we already know the answer for this node. - auto OISIt = OpSafeForPHIOfOps.find(Op); + // Stop now if we find an unsafe operand. + auto OISIt = OpSafeForPHIOfOps.find(OrigI); if (OISIt != OpSafeForPHIOfOps.end()) { if (!OISIt->second) { OpSafeForPHIOfOps.insert({V, false}); return false; } + continue; } if (!Visited.insert(Op).second) continue; - if (!OpIsSafeForPHIOfOps(Op, OrigInst, PHIBlock, Visited)) { - OpSafeForPHIOfOps.insert({V, false}); + Worklist.push_back(cast(Op)); + } + return true; +} + +// Return true if this operand will be safe to use for phi of ops. +// +// The reason some operands are unsafe is that we are not trying to recursively +// translate everything back through phi nodes. We actually expect some lookups +// of expressions to fail. In particular, a lookup where the expression cannot +// exist in the predecessor. This is true even if the expression, as shown, can +// be determined to be constant. +bool NewGVN::OpIsSafeForPHIOfOps(Value *V, const BasicBlock *PHIBlock, + SmallPtrSetImpl &Visited) { + SmallVector Worklist; + if (!OpIsSafeForPHIOfOpsHelper(V, PHIBlock, Visited, Worklist)) + return false; + while (!Worklist.empty()) { + auto *I = Worklist.pop_back_val(); + if (!OpIsSafeForPHIOfOpsHelper(I, PHIBlock, Visited, Worklist)) return false; - } } OpSafeForPHIOfOps.insert({V, true}); return true; @@ -2561,7 +2697,7 @@ Value *NewGVN::findLeaderForInst(Instruction *TransInst, // When we see an instruction that is an op of phis, generate the equivalent phi // of ops form. const Expression * -NewGVN::makePossiblePhiOfOps(Instruction *I, +NewGVN::makePossiblePHIOfOps(Instruction *I, SmallPtrSetImpl &Visited) { if (!okayForPHIOfOps(I)) return nullptr; @@ -2589,24 +2725,32 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, SmallPtrSet VisitedOps; // Convert op of phis to phi of ops - for (auto &Op : I->operands()) { - if (!isa(Op)) - continue; + for (auto *Op : I->operand_values()) { + if (!isa(Op)) { + auto *ValuePHI = RealToTemp.lookup(Op); + if (!ValuePHI) + continue; + DEBUG(dbgs() << "Found possible dependent phi of ops\n"); + Op = ValuePHI; + } auto *OpPHI = cast(Op); // No point in doing this for one-operand phis. if (OpPHI->getNumOperands() == 1) continue; if (!DebugCounter::shouldExecute(PHIOfOpsCounter)) return nullptr; - SmallVector, 4> Ops; + SmallVector Ops; + SmallPtrSet Deps; auto *PHIBlock = getBlockForValue(OpPHI); - for (auto PredBB : OpPHI->blocks()) { + RevisitOnReachabilityChange[PHIBlock].reset(InstrToDFSNum(I)); + for (unsigned PredNum = 0; PredNum < OpPHI->getNumOperands(); ++PredNum) { + auto *PredBB = OpPHI->getIncomingBlock(PredNum); Value *FoundVal = nullptr; // We could just skip unreachable edges entirely but it's tricky to do // with rewriting existing phi nodes. if (ReachableEdges.count({PredBB, PHIBlock})) { - // Clone the instruction, create an expression from it, and see if we - // have a leader. + // Clone the instruction, create an expression from it that is + // translated back into the predecessor, and see if we have a leader. Instruction *ValueOp = I->clone(); if (MemAccess) TempToMemory.insert({ValueOp, MemAccess}); @@ -2614,16 +2758,22 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, VisitedOps.clear(); for (auto &Op : ValueOp->operands()) { auto *OrigOp = &*Op; - Op = Op->DoPHITranslation(PHIBlock, PredBB); - // When this operand changes, it could change whether there is a - // leader for us or not. - addAdditionalUsers(Op, I); + // When these operand changes, it could change whether there is a + // leader for us or not, so we have to add additional users. + if (isa(Op)) { + Op = Op->DoPHITranslation(PHIBlock, PredBB); + if (Op != OrigOp && Op != I) + Deps.insert(Op); + } else if (auto *ValuePHI = RealToTemp.lookup(Op)) { + if (getBlockForValue(ValuePHI) == PHIBlock) + Op = ValuePHI->getIncomingValue(PredNum); + } // If we phi-translated the op, it must be safe. - SafeForPHIOfOps = SafeForPHIOfOps && - (Op != OrigOp || - OpIsSafeForPHIOfOps(Op, I, PHIBlock, VisitedOps)); + SafeForPHIOfOps = + SafeForPHIOfOps && + (Op != OrigOp || OpIsSafeForPHIOfOps(Op, PHIBlock, VisitedOps)); } - // FIXME: For those things that are not safe We could generate + // FIXME: For those things that are not safe we could generate // expressions all the way down, and see if this comes out to a // constant. For anything where that is true, and unsafe, we should // have made a phi-of-ops (or value numbered it equivalent to something) @@ -2639,12 +2789,23 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, << getBlockName(PredBB) << " because the block is unreachable\n"); FoundVal = UndefValue::get(I->getType()); + RevisitOnReachabilityChange[PHIBlock].set(InstrToDFSNum(I)); } Ops.push_back({FoundVal, PredBB}); DEBUG(dbgs() << "Found phi of ops operand " << *FoundVal << " in " << getBlockName(PredBB) << "\n"); } + for (auto Dep : Deps) + addAdditionalUsers(Dep, I); + sortPHIOps(Ops); + auto *E = performSymbolicPHIEvaluation(Ops, I, PHIBlock); + if (isa(E) || isa(E)) { + DEBUG(dbgs() + << "Not creating real PHI of ops because it simplified to existing " + "value or constant\n"); + return E; + } auto *ValuePHI = RealToTemp.lookup(I); bool NewPHI = false; if (!ValuePHI) { @@ -2665,10 +2826,11 @@ NewGVN::makePossiblePhiOfOps(Instruction *I, ++i; } } - + RevisitOnReachabilityChange[PHIBlock].set(InstrToDFSNum(I)); DEBUG(dbgs() << "Created phi of ops " << *ValuePHI << " for " << *I << "\n"); - return performSymbolicEvaluation(ValuePHI, Visited); + + return E; } return nullptr; } @@ -2714,8 +2876,11 @@ void NewGVN::initializeCongruenceClasses(Function &F) { if (MD && isa(MD->getMemoryInst())) TOPClass->incStoreCount(); } + + // FIXME: This is trying to discover which instructions are uses of phi + // nodes. We should move this into one of the myriad of places that walk + // all the operands already. for (auto &I : *BB) { - // TODO: Move to helper if (isa(&I)) for (auto *U : I.users()) if (auto *UInst = dyn_cast(U)) @@ -2773,7 +2938,6 @@ void NewGVN::cleanupTables() { ExpressionToPhiOfOps.clear(); TempToBlock.clear(); TempToMemory.clear(); - PHIOfOpsPHIs.clear(); PHINodeUses.clear(); OpSafeForPHIOfOps.clear(); ReachableBlocks.clear(); @@ -2789,6 +2953,7 @@ void NewGVN::cleanupTables() { MemoryAccessToClass.clear(); PredicateToUsers.clear(); MemoryToUsers.clear(); + RevisitOnReachabilityChange.clear(); } // Assign local DFS number mapping to instructions, and leave space for Value @@ -2812,6 +2977,8 @@ std::pair NewGVN::assignDFSNumbers(BasicBlock *B, markInstructionForDeletion(&I); continue; } + if (isa(&I)) + RevisitOnReachabilityChange[B].set(End); InstrDFS[&I] = End++; DFSToInstr.emplace_back(&I); } @@ -2833,6 +3000,7 @@ void NewGVN::updateProcessedCount(const Value *V) { } #endif } + // Evaluate MemoryPhi nodes symbolically, just like PHI nodes void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) { // If all the arguments are the same, the MemoryPhi has the same value as the @@ -2901,7 +3069,7 @@ void NewGVN::valueNumberInstruction(Instruction *I) { // Make a phi of ops if necessary if (Symbolized && !isa(Symbolized) && !isa(Symbolized) && PHINodeUses.count(I)) { - auto *PHIE = makePossiblePhiOfOps(I, Visited); + auto *PHIE = makePossiblePHIOfOps(I, Visited); // If we created a phi of ops, use it. // If we couldn't create one, make sure we don't leave one lying around if (PHIE) { @@ -2910,7 +3078,6 @@ void NewGVN::valueNumberInstruction(Instruction *I) { removePhiOfOps(I, Op); } } - } else { // Mark the instruction as unused so we don't value number it again. InstrDFS[I] = 0; @@ -3024,7 +3191,7 @@ void NewGVN::verifyMemoryCongruency() const { // so we don't process them. if (auto *MemPHI = dyn_cast(Pair.first)) { for (auto &U : MemPHI->incoming_values()) { - if (Instruction *I = dyn_cast(U.get())) { + if (auto *I = dyn_cast(&*U)) { if (!isInstructionTriviallyDead(I)) return true; } @@ -3319,11 +3486,13 @@ struct NewGVN::ValueDFS { int DFSIn = 0; int DFSOut = 0; int LocalNum = 0; + // Only one of Def and U will be set. // The bool in the Def tells us whether the Def is the stored value of a // store. PointerIntPair Def; Use *U = nullptr; + bool operator<(const ValueDFS &Other) const { // It's not enough that any given field be less than - we have sets // of fields that need to be evaluated together to give a proper ordering. @@ -3558,7 +3727,6 @@ void NewGVN::markInstructionForDeletion(Instruction *I) { } void NewGVN::replaceInstruction(Instruction *I, Value *V) { - DEBUG(dbgs() << "Replacing " << *I << " with " << *V << "\n"); patchAndReplaceAllUsesWith(I, V); // We save the actual erasing to avoid invalidating memory @@ -3579,7 +3747,9 @@ class ValueDFSStack { ValueStack.emplace_back(V); DFSStack.emplace_back(DFSIn, DFSOut); } + bool empty() const { return DFSStack.empty(); } + bool isInScope(int DFSIn, int DFSOut) const { if (empty()) return false; @@ -3603,7 +3773,8 @@ class ValueDFSStack { SmallVector ValueStack; SmallVector, 8> DFSStack; }; -} + +} // end anonymous namespace // Given an expression, get the congruence class for it. CongruenceClass *NewGVN::getClassForExpression(const Expression *E) const { @@ -3679,36 +3850,39 @@ bool NewGVN::eliminateInstructions(Function &F) { // Go through all of our phi nodes, and kill the arguments associated with // unreachable edges. - auto ReplaceUnreachablePHIArgs = [&](PHINode &PHI, BasicBlock *BB) { - for (auto &Operand : PHI.incoming_values()) - if (!ReachableEdges.count({PHI.getIncomingBlock(Operand), BB})) { + auto ReplaceUnreachablePHIArgs = [&](PHINode *PHI, BasicBlock *BB) { + for (auto &Operand : PHI->incoming_values()) + if (!ReachableEdges.count({PHI->getIncomingBlock(Operand), BB})) { DEBUG(dbgs() << "Replacing incoming value of " << PHI << " for block " - << getBlockName(PHI.getIncomingBlock(Operand)) + << getBlockName(PHI->getIncomingBlock(Operand)) << " with undef due to it being unreachable\n"); - Operand.set(UndefValue::get(PHI.getType())); + Operand.set(UndefValue::get(PHI->getType())); } }; - SmallPtrSet BlocksWithPhis; - for (auto &B : F) - if ((!B.empty() && isa(*B.begin())) || - (PHIOfOpsPHIs.find(&B) != PHIOfOpsPHIs.end())) - BlocksWithPhis.insert(&B); + // Replace unreachable phi arguments. + // At this point, RevisitOnReachabilityChange only contains: + // + // 1. PHIs + // 2. Temporaries that will convert to PHIs + // 3. Operations that are affected by an unreachable edge but do not fit into + // 1 or 2 (rare). + // So it is a slight overshoot of what we want. We could make it exact by + // using two SparseBitVectors per block. DenseMap ReachablePredCount; - for (auto KV : ReachableEdges) + for (auto &KV : ReachableEdges) ReachablePredCount[KV.getEnd()]++; - for (auto *BB : BlocksWithPhis) - // TODO: It would be faster to use getNumIncomingBlocks() on a phi node in - // the block and subtract the pred count, but it's more complicated. - if (ReachablePredCount.lookup(BB) != - unsigned(std::distance(pred_begin(BB), pred_end(BB)))) { - for (auto II = BB->begin(); isa(II); ++II) { - auto &PHI = cast(*II); + for (auto &BBPair : RevisitOnReachabilityChange) { + for (auto InstNum : BBPair.second) { + auto *Inst = InstrFromDFSNum(InstNum); + auto *PHI = dyn_cast(Inst); + PHI = PHI ? PHI : dyn_cast_or_null(RealToTemp.lookup(Inst)); + if (!PHI) + continue; + auto *BB = BBPair.first; + if (ReachablePredCount.lookup(BB) != PHI->getNumIncomingValues()) ReplaceUnreachablePHIArgs(PHI, BB); - } - for_each_found(PHIOfOpsPHIs, BB, [&](PHINode *PHI) { - ReplaceUnreachablePHIArgs(*PHI, BB); - }); } + } // Map to store the use counts DenseMap UseCounts; @@ -4003,12 +4177,16 @@ bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const { } namespace { + class NewGVNLegacyPass : public FunctionPass { public: - static char ID; // Pass identification, replacement for typeid. + // Pass identification, replacement for typeid. + static char ID; + NewGVNLegacyPass() : FunctionPass(ID) { initializeNewGVNLegacyPassPass(*PassRegistry::getPassRegistry()); } + bool runOnFunction(Function &F) override; private: @@ -4022,7 +4200,8 @@ class NewGVNLegacyPass : public FunctionPass { AU.addPreserved(); } }; -} // namespace + +} // end anonymous namespace bool NewGVNLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) @@ -4036,6 +4215,8 @@ bool NewGVNLegacyPass::runOnFunction(Function &F) { .runGVN(); } +char NewGVNLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(NewGVNLegacyPass, "newgvn", "Global Value Numbering", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) @@ -4047,8 +4228,6 @@ INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_END(NewGVNLegacyPass, "newgvn", "Global Value Numbering", false, false) -char NewGVNLegacyPass::ID = 0; - // createGVNPass - The public interface to this file. FunctionPass *llvm::createNewGVNPass() { return new NewGVNLegacyPass(); } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 58832447e1e16..a44ca333fee69 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -21,28 +21,44 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/Reassociate.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include +#include +#include + using namespace llvm; using namespace reassociate; @@ -54,7 +70,6 @@ STATISTIC(NumFactor , "Number of multiplies factored"); #ifndef NDEBUG /// Print out the expression identified in the Ops list. -/// static void PrintOps(Instruction *I, const SmallVectorImpl &Ops) { Module *M = I->getModule(); dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " " @@ -354,7 +369,7 @@ static void IncorporateWeight(APInt &LHS, const APInt &RHS, unsigned Opcode) { } } -typedef std::pair RepeatedValue; +using RepeatedValue = std::pair; /// Given an associative binary expression, return the leaf /// nodes in Ops along with their weights (how many times the leaf occurs). The @@ -429,7 +444,6 @@ typedef std::pair RepeatedValue; /// that have all uses inside the expression (i.e. only used by non-leaf nodes /// of the expression) if it can turn them into binary operators of the right /// type and thus make the expression bigger. - static bool LinearizeExprTree(BinaryOperator *I, SmallVectorImpl &Ops) { DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); @@ -467,12 +481,12 @@ static bool LinearizeExprTree(BinaryOperator *I, // Leaves - Keeps track of the set of putative leaves as well as the number of // paths to each leaf seen so far. - typedef DenseMap LeafMap; + using LeafMap = DenseMap; LeafMap Leaves; // Leaf -> Total weight so far. - SmallVector LeafOrder; // Ensure deterministic leaf output order. + SmallVector LeafOrder; // Ensure deterministic leaf output order. #ifndef NDEBUG - SmallPtrSet Visited; // For sanity checking the iteration scheme. + SmallPtrSet Visited; // For sanity checking the iteration scheme. #endif while (!Worklist.empty()) { std::pair P = Worklist.pop_back_val(); @@ -770,7 +784,7 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I, break; ExpressionChanged->moveBefore(I); ExpressionChanged = cast(*ExpressionChanged->user_begin()); - } while (1); + } while (true); // Throw away any left over nodes from the original expression. for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i) @@ -793,7 +807,6 @@ static Value *NegateValue(Value *V, Instruction *BI, return ConstantExpr::getNeg(C); } - // We are trying to expose opportunity for reassociation. One of the things // that we want to do to achieve this is to push a negation as deep into an // expression chain as possible, to expose the add instructions. In practice, @@ -910,7 +923,6 @@ BreakUpSubtract(Instruction *Sub, SetVector> &ToRedo) { // // Calculate the negative value of Operand 1 of the sub instruction, // and set it as the RHS of the add instruction we just made. - // Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo); BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub); Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op. @@ -1154,7 +1166,6 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, // If it was successful, true is returned, and the "R" and "C" is returned // via "Res" and "ConstOpnd", respectively; otherwise, false is returned, // and both "Res" and "ConstOpnd" remain unchanged. -// bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd, Value *&Res) { // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 @@ -1180,7 +1191,6 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, RedoInsts.insert(T); return true; } - // Helper function of OptimizeXor(). It tries to simplify // "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a @@ -1227,7 +1237,6 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, Res = createAndInstr(I, X, C3); ConstOpnd ^= C1; - } else if (Opnd1->isOrExpr()) { // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2 // @@ -1346,7 +1355,6 @@ Value *ReassociatePass::OptimizeXor(Instruction *I, // step 3.2: When previous and current operands share the same symbolic // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" - // if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) { // Remove previous operand PrevOpnd->Invalidate(); @@ -2251,10 +2259,13 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { } namespace { + class ReassociateLegacyPass : public FunctionPass { ReassociatePass Impl; + public: static char ID; // Pass identification, replacement for typeid + ReassociateLegacyPass() : FunctionPass(ID) { initializeReassociateLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -2273,9 +2284,11 @@ namespace { AU.addPreserved(); } }; -} + +} // end anonymous namespace char ReassociateLegacyPass::ID = 0; + INITIALIZE_PASS(ReassociateLegacyPass, "reassociate", "Reassociate expressions", false, false) diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 4b8ddb7cc24f0..1ca77cfec3292 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -421,6 +421,11 @@ findBaseDefiningValueOfVector(Value *I) { if (auto *GEP = dyn_cast(I)) return findBaseDefiningValue(GEP->getPointerOperand()); + // If the pointer comes through a bitcast of a vector of pointers to + // a vector of another type of pointer, then look through the bitcast + if (auto *BC = dyn_cast(I)) + return findBaseDefiningValue(BC->getOperand(0)); + // A PHI or Select is a base defining value. The outer findBasePointer // algorithm is responsible for constructing a base value for this BDV. assert((isa(I) || isa(I)) && diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 4822cf7cce0fe..067af7f2cd3ad 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -18,30 +18,48 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/SCCP.h" +#include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueLatticeUtils.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Utils/Local.h" -#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "sccp" @@ -54,6 +72,7 @@ STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP"); STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP"); namespace { + /// LatticeVal class - This class represents the different lattice values that /// an LLVM value may occupy. It is a simple class with value semantics. /// @@ -88,9 +107,11 @@ class LatticeVal { LatticeVal() : Val(nullptr, unknown) {} bool isUnknown() const { return getLatticeValue() == unknown; } + bool isConstant() const { return getLatticeValue() == constant || getLatticeValue() == forcedconstant; } + bool isOverdefined() const { return getLatticeValue() == overdefined; } Constant *getConstant() const { @@ -154,10 +175,6 @@ class LatticeVal { Val.setPointer(V); } }; -} // end anonymous namespace. - - -namespace { //===----------------------------------------------------------------------===// // @@ -167,37 +184,36 @@ namespace { class SCCPSolver : public InstVisitor { const DataLayout &DL; const TargetLibraryInfo *TLI; - SmallPtrSet BBExecutable; // The BBs that are executable. - DenseMap ValueState; // The state each value is in. + SmallPtrSet BBExecutable; // The BBs that are executable. + DenseMap ValueState; // The state each value is in. /// StructValueState - This maintains ValueState for values that have /// StructType, for example for formal arguments, calls, insertelement, etc. - /// - DenseMap, LatticeVal> StructValueState; + DenseMap, LatticeVal> StructValueState; /// GlobalValue - If we are tracking any values for the contents of a global /// variable, we keep a mapping from the constant accessor to the element of /// the global, to the currently known value. If the value becomes /// overdefined, it's entry is simply removed from this map. - DenseMap TrackedGlobals; + DenseMap TrackedGlobals; /// TrackedRetVals - If we are tracking arguments into and the return /// value out of a function, it will have an entry in this map, indicating /// what the known return value for the function is. - DenseMap TrackedRetVals; + DenseMap TrackedRetVals; /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions /// that return multiple values. - DenseMap, LatticeVal> TrackedMultipleRetVals; + DenseMap, LatticeVal> TrackedMultipleRetVals; /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is /// represented here for efficient lookup. - SmallPtrSet MRVFunctionsTracked; + SmallPtrSet MRVFunctionsTracked; /// TrackingIncomingArguments - This is the set of functions for whose /// arguments we make optimistic assumptions about and try to prove as /// constants. - SmallPtrSet TrackingIncomingArguments; + SmallPtrSet TrackingIncomingArguments; /// The reason for two worklists is that overdefined is the lowest state /// on the lattice, and moving things to overdefined as fast as possible @@ -206,16 +222,17 @@ class SCCPSolver : public InstVisitor { /// By having a separate worklist, we accomplish this because everything /// possibly overdefined will become overdefined at the soonest possible /// point. - SmallVector OverdefinedInstWorkList; - SmallVector InstWorkList; - + SmallVector OverdefinedInstWorkList; + SmallVector InstWorkList; - SmallVector BBWorkList; // The BasicBlock work list + // The BasicBlock work list + SmallVector BBWorkList; /// KnownFeasibleEdges - Entries in this set are edges which have already had /// PHI nodes retriggered. - typedef std::pair Edge; + using Edge = std::pair; DenseSet KnownFeasibleEdges; + public: SCCPSolver(const DataLayout &DL, const TargetLibraryInfo *tli) : DL(DL), TLI(tli) {} @@ -263,8 +280,13 @@ class SCCPSolver : public InstVisitor { TrackingIncomingArguments.insert(F); } + /// Returns true if the given function is in the solver's set of + /// argument-tracked functions. + bool isArgumentTrackedFunction(Function *F) { + return TrackingIncomingArguments.count(F); + } + /// Solve - Solve for constants and executable blocks. - /// void Solve(); /// ResolvedUndefsIn - While solving the dataflow for a function, we assume @@ -297,7 +319,6 @@ class SCCPSolver : public InstVisitor { } /// getTrackedRetVals - Get the inferred return value map. - /// const DenseMap &getTrackedRetVals() { return TrackedRetVals; } @@ -349,7 +370,6 @@ class SCCPSolver : public InstVisitor { // markConstant - Make a value be marked as "constant". If the value // is not already a constant, add it to the instruction work list so that // the users of the instruction are updated later. - // void markConstant(LatticeVal &IV, Value *V, Constant *C) { if (!IV.markConstant(C)) return; DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n'); @@ -369,7 +389,6 @@ class SCCPSolver : public InstVisitor { pushToWorkList(IV, V); } - // markOverdefined - Make a value be marked as "overdefined". If the // value is not already overdefined, add it to the overdefined instruction // work list so that the users of the instruction are updated later. @@ -402,7 +421,6 @@ class SCCPSolver : public InstVisitor { mergeInValue(ValueState[V], V, MergeWithV); } - /// getValueState - Return the LatticeVal object that corresponds to the /// value. This function handles the case when the value hasn't been seen yet /// by properly seeding constants etc. @@ -457,7 +475,6 @@ class SCCPSolver : public InstVisitor { return LV; } - /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB /// work list if it is not already executable. void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) { @@ -480,18 +497,15 @@ class SCCPSolver : public InstVisitor { // getFeasibleSuccessors - Return a vector of booleans to indicate which // successors are reachable from a given terminator instruction. - // void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl &Succs); // isEdgeFeasible - Return true if the control flow edge from the 'From' basic // block to the 'To' basic block is currently feasible. - // bool isEdgeFeasible(BasicBlock *From, BasicBlock *To); // OperandChangedState - This method is invoked on all of the users of an // instruction that was just changed state somehow. Based on this // information, we need to update the specified user of this instruction. - // void OperandChangedState(Instruction *I) { if (BBExecutable.count(I->getParent())) // Inst is executable? visit(*I); @@ -506,6 +520,7 @@ class SCCPSolver : public InstVisitor { void visitPHINode(PHINode &I); // Terminators + void visitReturnInst(ReturnInst &I); void visitTerminatorInst(TerminatorInst &TI); @@ -515,26 +530,32 @@ class SCCPSolver : public InstVisitor { void visitCmpInst(CmpInst &I); void visitExtractValueInst(ExtractValueInst &EVI); void visitInsertValueInst(InsertValueInst &IVI); + void visitCatchSwitchInst(CatchSwitchInst &CPI) { markOverdefined(&CPI); visitTerminatorInst(CPI); } // Instructions that cannot be folded away. + void visitStoreInst (StoreInst &I); void visitLoadInst (LoadInst &I); void visitGetElementPtrInst(GetElementPtrInst &I); + void visitCallInst (CallInst &I) { visitCallSite(&I); } + void visitInvokeInst (InvokeInst &II) { visitCallSite(&II); visitTerminatorInst(II); } + void visitCallSite (CallSite CS); void visitResumeInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } void visitFenceInst (FenceInst &I) { /*returns void*/ } + void visitInstruction(Instruction &I) { // All the instructions we don't do any special handling for just // go to overdefined. @@ -545,10 +566,8 @@ class SCCPSolver : public InstVisitor { } // end anonymous namespace - // getFeasibleSuccessors - Return a vector of booleans to indicate which // successors are reachable from a given terminator instruction. -// void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl &Succs) { Succs.resize(TI.getNumSuccessors()); @@ -631,10 +650,8 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, llvm_unreachable("SCCP: Don't know how to handle this terminator!"); } - // isEdgeFeasible - Return true if the control flow edge from the 'From' basic // block to the 'To' basic block is currently feasible. -// bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { assert(BBExecutable.count(To) && "Dest should always be alive!"); @@ -710,7 +727,6 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { // destination executable // 7. If a conditional branch has a value that is overdefined, make all // successors executable. -// void SCCPSolver::visitPHINode(PHINode &PN) { // If this PN returns a struct, just mark the result overdefined. // TODO: We could do a lot better than this if code actually uses this. @@ -730,7 +746,6 @@ void SCCPSolver::visitPHINode(PHINode &PN) { // constant, and they agree with each other, the PHI becomes the identical // constant. If they are constant and don't agree, the PHI is overdefined. // If there are no executable operands, the PHI remains unknown. - // Constant *OperandVal = nullptr; for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { LatticeVal IV = getValueState(PN.getIncomingValue(i)); @@ -761,7 +776,6 @@ void SCCPSolver::visitPHINode(PHINode &PN) { // arguments that agree with each other(and OperandVal is the constant) or // OperandVal is null because there are no defined incoming arguments. If // this is the case, the PHI remains unknown. - // if (OperandVal) markConstant(&PN, OperandVal); // Acquire operand value } @@ -789,7 +803,6 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) { for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F, getStructValueState(ResultOp, i)); - } } @@ -820,7 +833,6 @@ void SCCPSolver::visitCastInst(CastInst &I) { } } - void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { // If this returns a struct, mark all elements over defined, we don't track // structs in structs. @@ -969,7 +981,6 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { } } - markOverdefined(&I); } @@ -998,7 +1009,6 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { // Handle getelementptr instructions. If all operands are constants then we // can turn this into a getelementptr ConstantExpr. -// void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) { if (ValueState[&I].isOverdefined()) return; @@ -1044,7 +1054,6 @@ void SCCPSolver::visitStoreInst(StoreInst &SI) { TrackedGlobals.erase(I); // No need to keep tracking this! } - // Handle load instructions. If the operand is a constant pointer to a constant // global, we can replace the load with the loaded constant value! void SCCPSolver::visitLoadInst(LoadInst &I) { @@ -1108,7 +1117,6 @@ void SCCPSolver::visitCallSite(CallSite CS) { // a declaration, maybe we can constant fold it. if (F && F->isDeclaration() && !I->getType()->isStructTy() && canConstantFoldCallTo(CS, F)) { - SmallVector Operands; for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end(); AI != E; ++AI) { @@ -1360,7 +1368,6 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // undef & X -> 0. X could be zero. markForcedConstant(&I, Constant::getNullValue(ITy)); return true; - case Instruction::Or: // Both operands undef -> undef if (Op0LV.isUnknown() && Op1LV.isUnknown()) @@ -1368,7 +1375,6 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // undef | X -> -1. X could be -1. markForcedConstant(&I, Constant::getAllOnesValue(ITy)); return true; - case Instruction::Xor: // undef ^ undef -> 0; strictly speaking, this is not strictly // necessary, but we try to be nice to people who expect this @@ -1379,7 +1385,6 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { } // undef ^ X -> undef break; - case Instruction::SDiv: case Instruction::UDiv: case Instruction::SRem: @@ -1397,7 +1402,6 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // undef % X -> 0. X could be 1. markForcedConstant(&I, Constant::getNullValue(ITy)); return true; - case Instruction::AShr: // X >>a undef -> undef. if (Op1LV.isUnknown()) break; @@ -1464,7 +1468,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { markOverdefined(&I); return true; case Instruction::Call: - case Instruction::Invoke: { + case Instruction::Invoke: // There are two reasons a call can have an undef result // 1. It could be tracked. // 2. It could be constant-foldable. @@ -1478,7 +1482,6 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) { // we do not know what return values are valid. markOverdefined(&I); return true; - } default: // If we don't know what should happen here, conservatively mark it // overdefined. @@ -1561,7 +1564,8 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { Constant *Const = nullptr; if (V->getType()->isStructTy()) { std::vector IVs = Solver.getStructLatticeValueFor(V); - if (any_of(IVs, [](const LatticeVal &LV) { return LV.isOverdefined(); })) + if (llvm::any_of(IVs, + [](const LatticeVal &LV) { return LV.isOverdefined(); })) return false; std::vector ConstVals; auto *ST = dyn_cast(V->getType()); @@ -1588,7 +1592,6 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) { // runSCCP() - Run the Sparse Conditional Constant Propagation algorithm, // and return true if the function was modified. -// static bool runSCCP(Function &F, const DataLayout &DL, const TargetLibraryInfo *TLI) { DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); @@ -1628,7 +1631,6 @@ static bool runSCCP(Function &F, const DataLayout &DL, // Iterate over all of the instructions in a function, replacing them with // constants if we have found them to be of constant values. - // for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) { Instruction *Inst = &*BI++; if (Inst->getType()->isVoidTy() || isa(Inst)) @@ -1659,6 +1661,7 @@ PreservedAnalyses SCCPPass::run(Function &F, FunctionAnalysisManager &AM) { } namespace { + //===--------------------------------------------------------------------===// // /// SCCP Class - This class uses the SCCPSolver to implement a per-function @@ -1666,18 +1669,20 @@ namespace { /// class SCCPLegacyPass : public FunctionPass { public: + // Pass identification, replacement for typeid + static char ID; + + SCCPLegacyPass() : FunctionPass(ID) { + initializeSCCPLegacyPassPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); } - static char ID; // Pass identification, replacement for typeid - SCCPLegacyPass() : FunctionPass(ID) { - initializeSCCPLegacyPassPass(*PassRegistry::getPassRegistry()); - } // runOnFunction - Run the Sparse Conditional Constant Propagation // algorithm, and return true if the function was modified. - // bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; @@ -1687,9 +1692,11 @@ class SCCPLegacyPass : public FunctionPass { return runSCCP(F, DL, TLI); } }; + } // end anonymous namespace char SCCPLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(SCCPLegacyPass, "sccp", "Sparse Conditional Constant Propagation", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -1699,38 +1706,11 @@ INITIALIZE_PASS_END(SCCPLegacyPass, "sccp", // createSCCPPass - This is the public interface to this file. FunctionPass *llvm::createSCCPPass() { return new SCCPLegacyPass(); } -static bool AddressIsTaken(const GlobalValue *GV) { - // Delete any dead constantexpr klingons. - GV->removeDeadConstantUsers(); - - for (const Use &U : GV->uses()) { - const User *UR = U.getUser(); - if (const auto *SI = dyn_cast(UR)) { - if (SI->getOperand(0) == GV || SI->isVolatile()) - return true; // Storing addr of GV. - } else if (isa(UR) || isa(UR)) { - // Make sure we are calling the function, not passing the address. - ImmutableCallSite CS(cast(UR)); - if (!CS.isCallee(&U)) - return true; - } else if (const auto *LI = dyn_cast(UR)) { - if (LI->isVolatile()) - return true; - } else if (isa(UR)) { - // blockaddress doesn't take the address of the function, it takes addr - // of label. - } else { - return true; - } - } - return false; -} - static void findReturnsToZap(Function &F, - SmallPtrSet &AddressTakenFunctions, - SmallVector &ReturnsToZap) { + SmallVector &ReturnsToZap, + SCCPSolver &Solver) { // We can only do this if we know that nothing else can call the function. - if (!F.hasLocalLinkage() || AddressTakenFunctions.count(&F)) + if (!Solver.isArgumentTrackedFunction(&F)) return; for (BasicBlock &BB : F) @@ -1743,39 +1723,22 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, const TargetLibraryInfo *TLI) { SCCPSolver Solver(DL, TLI); - // AddressTakenFunctions - This set keeps track of the address-taken functions - // that are in the input. As IPSCCP runs through and simplifies code, - // functions that were address taken can end up losing their - // address-taken-ness. Because of this, we keep track of their addresses from - // the first pass so we can use them for the later simplification pass. - SmallPtrSet AddressTakenFunctions; - // Loop over all functions, marking arguments to those with their addresses // taken or that are external as overdefined. - // for (Function &F : M) { if (F.isDeclaration()) continue; - // If this is an exact definition of this function, then we can propagate - // information about its result into callsites of it. - // Don't touch naked functions. They may contain asm returning a - // value we don't see, so we may end up interprocedurally propagating - // the return value incorrectly. - if (F.hasExactDefinition() && !F.hasFnAttribute(Attribute::Naked)) + // Determine if we can track the function's return values. If so, add the + // function to the solver's set of return-tracked functions. + if (canTrackReturnsInterprocedurally(&F)) Solver.AddTrackedFunction(&F); - // If this function only has direct calls that we can see, we can track its - // arguments and return value aggressively, and can assume it is not called - // unless we see evidence to the contrary. - if (F.hasLocalLinkage()) { - if (F.hasAddressTaken()) { - AddressTakenFunctions.insert(&F); - } - else { - Solver.AddArgumentTrackedFunction(&F); - continue; - } + // Determine if we can track the function's arguments. If so, add the + // function to the solver's set of argument-tracked functions. + if (canTrackArgumentsInterprocedurally(&F)) { + Solver.AddArgumentTrackedFunction(&F); + continue; } // Assume the function is called. @@ -1786,13 +1749,14 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, Solver.markOverdefined(&AI); } - // Loop over global variables. We inform the solver about any internal global - // variables that do not have their 'addresses taken'. If they don't have - // their addresses taken, we can propagate constants through them. - for (GlobalVariable &G : M.globals()) - if (!G.isConstant() && G.hasLocalLinkage() && - G.hasDefinitiveInitializer() && !AddressIsTaken(&G)) + // Determine if we can track any of the module's global variables. If so, add + // the global variables we can track to the solver's set of tracked global + // variables. + for (GlobalVariable &G : M.globals()) { + G.removeDeadConstantUsers(); + if (canTrackGlobalVariableInterprocedurally(&G)) Solver.TrackValueOfGlobalVariable(&G); + } // Solve for constants. bool ResolvedUndefs = true; @@ -1809,7 +1773,6 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, // Iterate over all of the instructions in the module, replacing them with // constants if we have found them to be of constant values. - // SmallVector BlocksToErase; for (Function &F : M) { @@ -1897,7 +1860,7 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, Function *F = I.first; if (I.second.isOverdefined() || F->getReturnType()->isVoidTy()) continue; - findReturnsToZap(*F, AddressTakenFunctions, ReturnsToZap); + findReturnsToZap(*F, ReturnsToZap, Solver); } for (const auto &F : Solver.getMRVFunctionsTracked()) { @@ -1905,7 +1868,7 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, "The return type should be a struct"); StructType *STy = cast(F->getReturnType()); if (Solver.isStructLatticeConstant(F, STy)) - findReturnsToZap(*F, AddressTakenFunctions, ReturnsToZap); + findReturnsToZap(*F, ReturnsToZap, Solver); } // Zap all returns which we've identified as zap to change. @@ -1943,6 +1906,7 @@ PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) { } namespace { + //===--------------------------------------------------------------------===// // /// IPSCCP Class - This class implements interprocedural Sparse Conditional @@ -1969,9 +1933,11 @@ class IPSCCPLegacyPass : public ModulePass { AU.addRequired(); } }; + } // end anonymous namespace char IPSCCPLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(IPSCCPLegacyPass, "ipsccp", "Interprocedural Sparse Conditional Constant Propagation", false, false) diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index d11855f2f3a93..34ed126155be7 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -1,4 +1,4 @@ -//===--- Scalarizer.cpp - Scalarize vector operations ---------------------===// +//===- Scalarizer.cpp - Scalarize vector operations -----------------------===// // // The LLVM Compiler Infrastructure // @@ -14,36 +14,59 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Options.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "scalarizer" namespace { + // Used to store the scattered form of a vector. -typedef SmallVector ValueVector; +using ValueVector = SmallVector; // Used to map a vector Value to its scattered form. We use std::map // because we want iterators to persist across insertion and because the // values are relatively large. -typedef std::map ScatterMap; +using ScatterMap = std::map; // Lists Instructions that have been replaced with scalar implementations, // along with a pointer to their scattered forms. -typedef SmallVector, 16> GatherList; +using GatherList = SmallVector, 16>; // Provides a very limited vector-like interface for lazily accessing one // component of a scattered vector or vector pointer. class Scatterer { public: - Scatterer() {} + Scatterer() = default; // Scatter V into Size components. If new instructions are needed, // insert them before BBI in BB. If Cache is nonnull, use it to cache @@ -71,10 +94,12 @@ class Scatterer { // called Name that compares X and Y in the same way as FCI. struct FCmpSplitter { FCmpSplitter(FCmpInst &fci) : FCI(fci) {} + Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, const Twine &Name) const { return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name); } + FCmpInst &FCI; }; @@ -82,10 +107,12 @@ struct FCmpSplitter { // called Name that compares X and Y in the same way as ICI. struct ICmpSplitter { ICmpSplitter(ICmpInst &ici) : ICI(ici) {} + Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, const Twine &Name) const { return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name); } + ICmpInst &ICI; }; @@ -93,16 +120,18 @@ struct ICmpSplitter { // a binary operator like BO called Name with operands X and Y. struct BinarySplitter { BinarySplitter(BinaryOperator &bo) : BO(bo) {} + Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, const Twine &Name) const { return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name); } + BinaryOperator &BO; }; // Information about a load or store that we're scalarizing. struct VectorLayout { - VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {} + VectorLayout() = default; // Return the alignment of element I. uint64_t getElemAlign(unsigned I) { @@ -110,16 +139,16 @@ struct VectorLayout { } // The type of the vector. - VectorType *VecTy; + VectorType *VecTy = nullptr; // The type of each element. - Type *ElemTy; + Type *ElemTy = nullptr; // The alignment of the vector. - uint64_t VecAlign; + uint64_t VecAlign = 0; // The size of each element. - uint64_t ElemSize; + uint64_t ElemSize = 0; }; class Scalarizer : public FunctionPass, @@ -127,8 +156,7 @@ class Scalarizer : public FunctionPass, public: static char ID; - Scalarizer() : - FunctionPass(ID) { + Scalarizer() : FunctionPass(ID) { initializeScalarizerPass(*PassRegistry::getPassRegistry()); } @@ -137,19 +165,19 @@ class Scalarizer : public FunctionPass, // InstVisitor methods. They return true if the instruction was scalarized, // false if nothing changed. - bool visitInstruction(Instruction &) { return false; } + bool visitInstruction(Instruction &I) { return false; } bool visitSelectInst(SelectInst &SI); - bool visitICmpInst(ICmpInst &); - bool visitFCmpInst(FCmpInst &); - bool visitBinaryOperator(BinaryOperator &); - bool visitGetElementPtrInst(GetElementPtrInst &); - bool visitCastInst(CastInst &); - bool visitBitCastInst(BitCastInst &); - bool visitShuffleVectorInst(ShuffleVectorInst &); - bool visitPHINode(PHINode &); - bool visitLoadInst(LoadInst &); - bool visitStoreInst(StoreInst &); - bool visitCallInst(CallInst &I); + bool visitICmpInst(ICmpInst &ICI); + bool visitFCmpInst(FCmpInst &FCI); + bool visitBinaryOperator(BinaryOperator &BO); + bool visitGetElementPtrInst(GetElementPtrInst &GEPI); + bool visitCastInst(CastInst &CI); + bool visitBitCastInst(BitCastInst &BCI); + bool visitShuffleVectorInst(ShuffleVectorInst &SVI); + bool visitPHINode(PHINode &PHI); + bool visitLoadInst(LoadInst &LI); + bool visitStoreInst(StoreInst &SI); + bool visitCallInst(CallInst &ICI); static void registerOptions() { // This is disabled by default because having separate loads and stores @@ -162,11 +190,12 @@ class Scalarizer : public FunctionPass, } private: - Scatterer scatter(Instruction *, Value *); - void gather(Instruction *, const ValueVector &); + Scatterer scatter(Instruction *Point, Value *V); + void gather(Instruction *Op, const ValueVector &CV); bool canTransferMetadata(unsigned Kind); - void transferMetadata(Instruction *, const ValueVector &); - bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &); + void transferMetadata(Instruction *Op, const ValueVector &CV); + bool getVectorLayout(Type *Ty, unsigned Alignment, VectorLayout &Layout, + const DataLayout &DL); bool finish(); template bool splitBinary(Instruction &, const T &); @@ -179,9 +208,10 @@ class Scalarizer : public FunctionPass, bool ScalarizeLoadStore; }; -char Scalarizer::ID = 0; } // end anonymous namespace +char Scalarizer::ID = 0; + INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer", "Scalarize vector operations", false, false) @@ -222,7 +252,7 @@ Value *Scatterer::operator[](unsigned I) { // Search through a chain of InsertElementInsts looking for element I. // Record other elements in the cache. The new V is still suitable // for all uncached indices. - for (;;) { + while (true) { InsertElementInst *Insert = dyn_cast(V); if (!Insert) break; diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 84675f41cdd5e..4593f2351229e 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1,4 +1,4 @@ -//===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===// +//===- SeparateConstOffsetFromGEP.cpp -------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -156,27 +156,44 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" +#include +#include +#include using namespace llvm; using namespace llvm::PatternMatch; @@ -185,6 +202,7 @@ static cl::opt DisableSeparateConstOffsetFromGEP( "disable-separate-const-offset-from-gep", cl::init(false), cl::desc("Do not separate the constant offset from a GEP instruction"), cl::Hidden); + // Setting this flag may emit false positives when the input module already // contains dead instructions. Therefore, we set it only in unit tests that are // free of dead code. @@ -219,6 +237,7 @@ class ConstantOffsetExtractor { /// garbage-collect unused instructions in UserChain. static Value *Extract(Value *Idx, GetElementPtrInst *GEP, User *&UserChainTail, const DominatorTree *DT); + /// Looks for a constant offset from the given GEP index without extracting /// it. It returns the numeric value of the extracted constant offset (0 if /// failed). The meaning of the arguments are the same as Extract. @@ -229,6 +248,7 @@ class ConstantOffsetExtractor { ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT) : IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) { } + /// Searches the expression that computes V for a non-zero constant C s.t. /// V can be reassociated into the form V' + C. If the searching is /// successful, returns C and update UserChain as a def-use chain from C to V; @@ -244,9 +264,11 @@ class ConstantOffsetExtractor { /// non-negative. Levaraging this, we can better split /// inbounds GEPs. APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative); + /// A helper function to look into both operands of a binary operator. APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended, bool ZeroExtended); + /// After finding the constant offset C from the GEP index I, we build a new /// index I' s.t. I' + C = I. This function builds and returns the new /// index I' according to UserChain produced by function "find". @@ -263,6 +285,7 @@ class ConstantOffsetExtractor { /// (sext(a) + sext(b)) + 5. /// Given this form, we know I' is sext(a) + sext(b). Value *rebuildWithoutConstOffset(); + /// After the first step of rebuilding the GEP index without the constant /// offset, distribute s/zext to the operands of all operators in UserChain. /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) => @@ -279,8 +302,10 @@ class ConstantOffsetExtractor { /// UserChain.size() - 1, and is decremented during /// the recursion. Value *distributeExtsAndCloneChain(unsigned ChainIndex); + /// Reassociates the GEP index to the form I' + C and returns I'. Value *removeConstOffset(unsigned ChainIndex); + /// A helper function to apply ExtInsts, a list of s/zext, to value V. /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function /// returns "sext i32 (zext i16 V to i32) to i64". @@ -303,10 +328,14 @@ class ConstantOffsetExtractor { /// /// This path helps to rebuild the new GEP index. SmallVector UserChain; + /// A data structure used in rebuildWithoutConstOffset. Contains all /// sext/zext instructions along UserChain. SmallVector ExtInsts; - Instruction *IP; /// Insertion position of cloned instructions. + + /// Insertion position of cloned instructions. + Instruction *IP; + const DataLayout &DL; const DominatorTree *DT; }; @@ -317,9 +346,10 @@ class ConstantOffsetExtractor { class SeparateConstOffsetFromGEP : public FunctionPass { public: static char ID; + SeparateConstOffsetFromGEP(const TargetMachine *TM = nullptr, bool LowerGEP = false) - : FunctionPass(ID), DL(nullptr), DT(nullptr), TM(TM), LowerGEP(LowerGEP) { + : FunctionPass(ID), TM(TM), LowerGEP(LowerGEP) { initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry()); } @@ -336,12 +366,14 @@ class SeparateConstOffsetFromGEP : public FunctionPass { DL = &M.getDataLayout(); return false; } + bool runOnFunction(Function &F) override; private: /// Tries to split the given GEP into a variadic base and a constant offset, /// and returns true if the splitting succeeds. bool splitGEP(GetElementPtrInst *GEP); + /// Lower a GEP with multiple indices into multiple GEPs with a single index. /// Function splitGEP already split the original GEP into a variadic part and /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the @@ -351,6 +383,7 @@ class SeparateConstOffsetFromGEP : public FunctionPass { /// \p AccumulativeByteOffset The constant offset. void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset); + /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form. /// Function splitGEP already split the original GEP into a variadic part and /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the @@ -360,12 +393,14 @@ class SeparateConstOffsetFromGEP : public FunctionPass { /// \p AccumulativeByteOffset The constant offset. void lowerToArithmetics(GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset); + /// Finds the constant offset within each index and accumulates them. If /// LowerGEP is true, it finds in indices of both sequential and structure /// types, otherwise it only finds in sequential indices. The output /// NeedsExtraction indicates whether we successfully find a non-zero constant /// offset. int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction); + /// Canonicalize array indices to pointer-size integers. This helps to /// simplify the logic of splitting a GEP. For example, if a + b is a /// pointer-size integer, we have @@ -382,6 +417,7 @@ class SeparateConstOffsetFromGEP : public FunctionPass { /// /// Verified in @i32_add in split-gep.ll bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP); + /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow. /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting /// the constant offset. After extraction, it becomes desirable to reunion the @@ -392,8 +428,10 @@ class SeparateConstOffsetFromGEP : public FunctionPass { /// => constant extraction &a[sext(i) + sext(j)] + 5 /// => reunion &a[sext(i +nsw j)] + 5 bool reuniteExts(Function &F); + /// A helper that reunites sexts in an instruction. bool reuniteExts(Instruction *I); + /// Find the closest dominator of that is equivalent to . Instruction *findClosestMatchingDominator(const SCEV *Key, Instruction *Dominatee); @@ -401,27 +439,33 @@ class SeparateConstOffsetFromGEP : public FunctionPass { void verifyNoDeadCode(Function &F); bool hasMoreThanOneUseInLoop(Value *v, Loop *L); + // Swap the index operand of two GEP. void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second); + // Check if it is safe to swap operand of two GEP. bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second, Loop *CurLoop); - const DataLayout *DL; - DominatorTree *DT; + const DataLayout *DL = nullptr; + DominatorTree *DT = nullptr; ScalarEvolution *SE; const TargetMachine *TM; LoopInfo *LI; TargetLibraryInfo *TLI; + /// Whether to lower a GEP with multiple indices into arithmetic operations or /// multiple GEPs with a single index. bool LowerGEP; + DenseMap> DominatingExprs; }; -} // anonymous namespace + +} // end anonymous namespace char SeparateConstOffsetFromGEP::ID = 0; + INITIALIZE_PASS_BEGIN( SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", "Split GEPs to a variadic base and a constant offset for better CSE", false, diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 3ef119ec05de3..6f38e5d11b58b 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -129,7 +129,6 @@ static bool mergeEmptyReturnBlocks(Function &F) { /// Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, - AssumptionCache *AC, const SimplifyCFGOptions &Options) { bool Changed = false; bool LocalChange = true; @@ -145,7 +144,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { - if (SimplifyCFG(&*BBIt++, TTI, AC, Options, &LoopHeaders)) { + if (simplifyCFG(&*BBIt++, TTI, Options, &LoopHeaders)) { LocalChange = true; ++NumSimpl; } @@ -156,11 +155,10 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, } static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, - AssumptionCache *AC, const SimplifyCFGOptions &Options) { bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); - EverChanged |= iterativelySimplifyCFG(F, TTI, AC, Options); + EverChanged |= iterativelySimplifyCFG(F, TTI, Options); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -174,15 +172,17 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, return true; do { - EverChanged = iterativelySimplifyCFG(F, TTI, AC, Options); + EverChanged = iterativelySimplifyCFG(F, TTI, Options); EverChanged |= removeUnreachableBlocks(F); } while (EverChanged); return true; } +// FIXME: The new pass manager always creates a "late" simplifycfg pass using +// this default constructor. SimplifyCFGPass::SimplifyCFGPass() - : Options(UserBonusInstThreshold, true, false) {} + : Options(UserBonusInstThreshold, true, true, false) {} SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &PassOptions) : Options(PassOptions) {} @@ -190,9 +190,8 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &PassOptions) PreservedAnalyses SimplifyCFGPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TTI = AM.getResult(F); - auto &AC = AM.getResult(F); - - if (!simplifyFunctionCFG(F, TTI, &AC, Options)) + Options.AC = &AM.getResult(F); + if (!simplifyFunctionCFG(F, TTI, Options)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); @@ -203,12 +202,15 @@ namespace { struct BaseCFGSimplifyPass : public FunctionPass { std::function PredicateFtor; int BonusInstThreshold; + bool ForwardSwitchCondToPhi; bool ConvertSwitchToLookupTable; bool KeepCanonicalLoops; - BaseCFGSimplifyPass(int T, bool ConvertSwitch, bool KeepLoops, + BaseCFGSimplifyPass(int T, bool ForwardSwitchCond, bool ConvertSwitch, + bool KeepLoops, std::function Ftor, char &ID) : FunctionPass(ID), PredicateFtor(std::move(Ftor)), + ForwardSwitchCondToPhi(ForwardSwitchCond), ConvertSwitchToLookupTable(ConvertSwitch), KeepCanonicalLoops(KeepLoops) { BonusInstThreshold = (T == -1) ? UserBonusInstThreshold : T; @@ -221,9 +223,10 @@ struct BaseCFGSimplifyPass : public FunctionPass { &getAnalysis().getAssumptionCache(F); const TargetTransformInfo &TTI = getAnalysis().getTTI(F); - return simplifyFunctionCFG( - F, TTI, AC, - {BonusInstThreshold, ConvertSwitchToLookupTable, KeepCanonicalLoops}); + return simplifyFunctionCFG(F, TTI, + {BonusInstThreshold, ForwardSwitchCondToPhi, + ConvertSwitchToLookupTable, KeepCanonicalLoops, + AC}); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -238,7 +241,7 @@ struct CFGSimplifyPass : public BaseCFGSimplifyPass { CFGSimplifyPass(int T = -1, std::function Ftor = nullptr) - : BaseCFGSimplifyPass(T, false, true, Ftor, ID) { + : BaseCFGSimplifyPass(T, false, false, true, Ftor, ID) { initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); } }; @@ -248,7 +251,7 @@ struct LateCFGSimplifyPass : public BaseCFGSimplifyPass { LateCFGSimplifyPass(int T = -1, std::function Ftor = nullptr) - : BaseCFGSimplifyPass(T, true, false, Ftor, ID) { + : BaseCFGSimplifyPass(T, true, true, false, Ftor, ID) { initializeLateCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); } }; diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 0cccb415efdb1..2972e1cff9a47 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -1,4 +1,4 @@ -//===-- StructurizeCFG.cpp ------------------------------------------------===// +//===- StructurizeCFG.cpp -------------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,49 +7,72 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPass.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include +#include +#include using namespace llvm; using namespace llvm::PatternMatch; #define DEBUG_TYPE "structurizecfg" +// The name for newly created blocks. +static const char *const FlowBlockName = "Flow"; + namespace { // Definition of the complex types used in this pass. -typedef std::pair BBValuePair; +using BBValuePair = std::pair; -typedef SmallVector RNVector; -typedef SmallVector BBVector; -typedef SmallVector BranchVector; -typedef SmallVector BBValueVector; +using RNVector = SmallVector; +using BBVector = SmallVector; +using BranchVector = SmallVector; +using BBValueVector = SmallVector; -typedef SmallPtrSet BBSet; +using BBSet = SmallPtrSet; -typedef MapVector PhiMap; -typedef MapVector BB2BBVecMap; +using PhiMap = MapVector; +using BB2BBVecMap = MapVector; -typedef DenseMap BBPhiMap; -typedef DenseMap BBPredicates; -typedef DenseMap PredMap; -typedef DenseMap BB2BBMap; - -// The name for newly created blocks. -static const char *const FlowBlockName = "Flow"; +using BBPhiMap = DenseMap; +using BBPredicates = DenseMap; +using PredMap = DenseMap; +using BB2BBMap = DenseMap; /// Finds the nearest common dominator of a set of BasicBlocks. /// @@ -736,7 +759,6 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed, changeExit(PrevNode, Node->getEntry(), true); } PrevNode = Node; - } else { // Insert extra prefix node (or reuse last one) BasicBlock *Flow = needPrefix(false); diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 8a57e71509fde..2376867555578 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -60,7 +60,7 @@ #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" @@ -255,8 +255,10 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls, } if (SafeToTail) { using namespace ore; - ORE->emit(OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI) - << "marked as tail call candidate (readnone)"); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI) + << "marked as tail call candidate (readnone)"; + }); CI->setTailCall(); Modified = true; continue; @@ -301,8 +303,10 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls, if (Visited[CI->getParent()] != ESCAPED) { // If the escape point was part way through the block, calls after the // escape point wouldn't have been put into DeferredTails. - ORE->emit(OptimizationRemark(DEBUG_TYPE, "tailcall", CI) - << "marked as tail call candidate"); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "tailcall", CI) + << "marked as tail call candidate"; + }); CI->setTailCall(); Modified = true; } else { @@ -554,8 +558,10 @@ static bool eliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, Function *F = BB->getParent(); using namespace ore; - ORE->emit(OptimizationRemark(DEBUG_TYPE, "tailcall-recursion", CI) - << "transforming tail recursion into loop"); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "tailcall-recursion", CI) + << "transforming tail recursion into loop"; + }); // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index 4c9746b8c691e..0f0668f24db56 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -50,31 +50,45 @@ // // For more details about DWARF discriminators, please visit // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include using namespace llvm; #define DEBUG_TYPE "add-discriminators" +// Command line option to disable discriminator generation even in the +// presence of debug information. This is only needed when debugging +// debug info generation issues. +static cl::opt NoDiscriminators( + "no-discriminators", cl::init(false), + cl::desc("Disable generation of discriminator information.")); + namespace { + // The legacy pass of AddDiscriminators. struct AddDiscriminatorsLegacyPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid + AddDiscriminatorsLegacyPass() : FunctionPass(ID) { initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -85,18 +99,12 @@ struct AddDiscriminatorsLegacyPass : public FunctionPass { } // end anonymous namespace char AddDiscriminatorsLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators", "Add DWARF path discriminators", false, false) INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators", "Add DWARF path discriminators", false, false) -// Command line option to disable discriminator generation even in the -// presence of debug information. This is only needed when debugging -// debug info generation issues. -static cl::opt NoDiscriminators( - "no-discriminators", cl::init(false), - cl::desc("Disable generation of discriminator information.")); - // Create the legacy AddDiscriminatorsPass. FunctionPass *llvm::createAddDiscriminatorsPass() { return new AddDiscriminatorsLegacyPass(); @@ -166,11 +174,11 @@ static bool addDiscriminators(Function &F) { bool Changed = false; - typedef std::pair Location; - typedef DenseSet BBSet; - typedef DenseMap LocationBBMap; - typedef DenseMap LocationDiscriminatorMap; - typedef DenseSet LocationSet; + using Location = std::pair; + using BBSet = DenseSet; + using LocationBBMap = DenseMap; + using LocationDiscriminatorMap = DenseMap; + using LocationSet = DenseSet; LocationBBMap LBM; LocationDiscriminatorMap LDM; @@ -242,6 +250,7 @@ static bool addDiscriminators(Function &F) { bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) { return addDiscriminators(F); } + PreservedAnalyses AddDiscriminatorsPass::run(Function &F, FunctionAnalysisManager &AM) { if (!addDiscriminators(F)) diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 4aed897d64138..e9c14c93a9ad7 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -1,4 +1,4 @@ -//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// +//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===// // // The LLVM Compiler Infrastructure // @@ -17,19 +17,33 @@ #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/Local.h" +#include +#include using namespace llvm; #define DEBUG_TYPE "bypass-slow-division" namespace { + struct QuotRemPair { Value *Quotient; Value *Remainder; @@ -46,15 +60,11 @@ namespace { Value *Quotient = nullptr; Value *Remainder = nullptr; }; -} -namespace llvm { - typedef DenseMap DivCacheTy; - typedef DenseMap BypassWidthsTy; - typedef SmallPtrSet VisitedSetTy; -} +using DivCacheTy = DenseMap; +using BypassWidthsTy = DenseMap; +using VisitedSetTy = SmallPtrSet; -namespace { enum ValueRange { /// Operand definitely fits into BypassType. No runtime checks are needed. VALRNG_KNOWN_SHORT, @@ -84,17 +94,21 @@ class FastDivInsertionTask { return SlowDivOrRem->getOpcode() == Instruction::SDiv || SlowDivOrRem->getOpcode() == Instruction::SRem; } + bool isDivisionOp() { return SlowDivOrRem->getOpcode() == Instruction::SDiv || SlowDivOrRem->getOpcode() == Instruction::UDiv; } + Type *getSlowType() { return SlowDivOrRem->getType(); } public: FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); + Value *getReplacement(DivCacheTy &Cache); }; -} // anonymous namespace + +} // end anonymous namespace FastDivInsertionTask::FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths) { @@ -193,7 +207,7 @@ bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { C = dyn_cast(cast(Op1)->getOperand(0)); return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth(); } - case Instruction::PHI: { + case Instruction::PHI: // Stop IR traversal in case of a crazy input code. This limits recursion // depth. if (Visited.size() >= 16) @@ -209,7 +223,6 @@ bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { return getValueRange(V, Visited) == VALRNG_LIKELY_LONG || isa(V); }); - } default: return false; } @@ -339,6 +352,11 @@ Optional FastDivInsertionTask::insertFastDivAndRem() { Value *Dividend = SlowDivOrRem->getOperand(0); Value *Divisor = SlowDivOrRem->getOperand(1); + if (isa(Divisor)) { + // Keep division by a constant for DAGCombiner. + return None; + } + VisitedSetTy SetL; ValueRange DividendRange = getValueRange(Dividend, SetL); if (DividendRange == VALRNG_LIKELY_LONG) @@ -354,9 +372,7 @@ Optional FastDivInsertionTask::insertFastDivAndRem() { if (DividendShort && DivisorShort) { // If both operands are known to be short then just replace the long - // division with a short one in-place. Since we're not introducing control - // flow in this case, narrowing the division is always a win, even if the - // divisor is a constant (and will later get replaced by a multiplication). + // division with a short one in-place. IRBuilder<> Builder(SlowDivOrRem); Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); @@ -366,16 +382,7 @@ Optional FastDivInsertionTask::insertFastDivAndRem() { Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); return QuotRemPair(ExtDiv, ExtRem); - } - - if (isa(Divisor)) { - // If the divisor is not a constant, DAGCombiner will convert it to a - // multiplication by a magic constant. It isn't clear if it is worth - // introducing control flow to get a narrower multiply. - return None; - } - - if (DividendShort && !isSignedOp()) { + } else if (DividendShort && !isSignedOp()) { // If the division is unsigned and Dividend is known to be short, then // either // 1) Divisor is less or equal to Dividend, and the result can be computed diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 1189714dfab10..0bcf58bd490c8 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -14,34 +14,57 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/CodeExtractor.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionIterator.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include +#include +#include +#include +#include #include +#include +#include + using namespace llvm; #define DEBUG_TYPE "code-extractor" @@ -109,7 +132,6 @@ buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT) { // Loop over the blocks, adding them to our set-vector, and aborting with an // empty set if we encounter invalid blocks. for (BasicBlock *BB : BBs) { - // If this block is dead, don't process it. if (DT && !DT->isReachableFromEntry(BB)) continue; @@ -140,14 +162,13 @@ CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {} + BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)), - NumExitBlocks(~0U) {} + BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)) {} /// definedInRegion - Return true if the specified value is defined in the /// extracted region. @@ -202,7 +223,6 @@ bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( if (Blocks.count(&BB)) continue; for (Instruction &II : BB) { - if (isa(II)) continue; @@ -373,7 +393,6 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, // Follow the bitcast. Instruction *MarkerAddr = nullptr; for (User *U : AI->users()) { - if (U->stripInBoundsConstantOffsets() == AI) { SinkLifeStart = false; HoistLifeEnd = false; @@ -407,7 +426,6 @@ void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, void CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &SinkCands) const { - for (BasicBlock *BB : Blocks) { // If a used value is defined outside the region, it's an input. If an // instruction is used outside the region, it's an output. @@ -457,7 +475,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. - BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); + BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. @@ -525,7 +543,6 @@ void CodeExtractor::splitReturnBlocks() { /// constructFunction - make a function based on inputs and outputs, as follows: /// f(in0, ..., inN, out0, ..., outN) -/// Function *CodeExtractor::constructFunction(const ValueSet &inputs, const ValueSet &outputs, BasicBlock *header, @@ -544,7 +561,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, default: RetTy = Type::getInt16Ty(header->getContext()); break; } - std::vector paramTy; + std::vector paramTy; // Add the types of the input values to the function's argument list for (Value *value : inputs) { @@ -620,7 +637,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, } else RewriteVal = &*AI++; - std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); + std::vector Users(inputs[i]->user_begin(), inputs[i]->user_end()); for (User *use : Users) if (Instruction *inst = dyn_cast(use)) if (Blocks.count(inst->getParent())) @@ -639,7 +656,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, // Rewrite branches to basic blocks outside of the loop to new dummy blocks // within the new function. This must be done before we lose track of which // blocks were originally in the code region. - std::vector Users(header->user_begin(), header->user_end()); + std::vector Users(header->user_begin(), header->user_end()); for (unsigned i = 0, e = Users.size(); i != e; ++i) // The BasicBlock which contains the branch is not in the region // modify the branch target to a new block @@ -651,19 +668,6 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, return newFunction; } -/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI -/// that uses the value within the basic block, and return the predecessor -/// block associated with that use, or return 0 if none is found. -static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { - for (Use &U : Used->uses()) { - PHINode *P = dyn_cast(U.getUser()); - if (P && P->getParent() == BB) - return P->getIncomingBlock(U); - } - - return nullptr; -} - /// emitCallAndSwitchStatement - This method sets up the caller side by adding /// the call instruction, splitting any PHI nodes in the header block as /// necessary. @@ -672,7 +676,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, ValueSet &inputs, ValueSet &outputs) { // Emit a call to the new function, passing in: *pointer to struct (if // aggregating parameters), or plan inputs and allocated memory for outputs - std::vector params, StructValues, ReloadOutputs, Reloads; + std::vector params, StructValues, ReloadOutputs, Reloads; Module *M = newFunction->getParent(); LLVMContext &Context = M->getContext(); @@ -702,7 +706,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, StructType *StructArgTy = nullptr; AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { - std::vector ArgTypes; + std::vector ArgTypes; for (ValueSet::iterator v = StructValues.begin(), ve = StructValues.end(); v != ve; ++v) ArgTypes.push_back((*v)->getType()); @@ -736,7 +740,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, if (!AggregateArgs) std::advance(OutputArgBegin, inputs.size()); - // Reload the outputs passed in by reference + // Reload the outputs passed in by reference. + Function::arg_iterator OAI = OutputArgBegin; for (unsigned i = 0, e = outputs.size(); i != e; ++i) { Value *Output = nullptr; if (AggregateArgs) { @@ -753,12 +758,40 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); Reloads.push_back(load); codeReplacer->getInstList().push_back(load); - std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); + std::vector Users(outputs[i]->user_begin(), outputs[i]->user_end()); for (unsigned u = 0, e = Users.size(); u != e; ++u) { Instruction *inst = cast(Users[u]); if (!Blocks.count(inst->getParent())) inst->replaceUsesOfWith(outputs[i], load); } + + // Store to argument right after the definition of output value. + auto *OutI = dyn_cast(outputs[i]); + if (!OutI) + continue; + // Find proper insertion point. + Instruction *InsertPt = OutI->getNextNode(); + // Let's assume that there is no other guy interleave non-PHI in PHIs. + if (isa(InsertPt)) + InsertPt = InsertPt->getParent()->getFirstNonPHI(); + + assert(OAI != newFunction->arg_end() && + "Number of output arguments should match " + "the amount of defined values"); + if (AggregateArgs) { + Value *Idx[2]; + Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); + Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); + GetElementPtrInst *GEP = GetElementPtrInst::Create( + StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertPt); + new StoreInst(outputs[i], GEP, InsertPt); + // Since there should be only one struct argument aggregating + // all the output values, we shouldn't increment OAI, which always + // points to the struct argument, in this case. + } else { + new StoreInst(outputs[i], &*OAI, InsertPt); + ++OAI; + } } // Now we can emit a switch statement using the call as a value. @@ -771,7 +804,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // over all of the blocks in the extracted region, updating any terminator // instructions in the to-be-extracted region that branch to blocks that are // not in the region to be extracted. - std::map ExitBlockMap; + std::map ExitBlockMap; unsigned switchVal = 0; for (BasicBlock *Block : Blocks) { @@ -801,75 +834,12 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, break; } - ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget); + ReturnInst::Create(Context, brVal, NewTarget); // Update the switch instruction. TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), SuccNum), OldTarget); - - // Restore values just before we exit - Function::arg_iterator OAI = OutputArgBegin; - for (unsigned out = 0, e = outputs.size(); out != e; ++out) { - // For an invoke, the normal destination is the only one that is - // dominated by the result of the invocation - BasicBlock *DefBlock = cast(outputs[out])->getParent(); - - bool DominatesDef = true; - - BasicBlock *NormalDest = nullptr; - if (auto *Invoke = dyn_cast(outputs[out])) - NormalDest = Invoke->getNormalDest(); - - if (NormalDest) { - DefBlock = NormalDest; - - // Make sure we are looking at the original successor block, not - // at a newly inserted exit block, which won't be in the dominator - // info. - for (const auto &I : ExitBlockMap) - if (DefBlock == I.second) { - DefBlock = I.first; - break; - } - - // In the extract block case, if the block we are extracting ends - // with an invoke instruction, make sure that we don't emit a - // store of the invoke value for the unwind block. - if (!DT && DefBlock != OldTarget) - DominatesDef = false; - } - - if (DT) { - DominatesDef = DT->dominates(DefBlock, OldTarget); - - // If the output value is used by a phi in the target block, - // then we need to test for dominance of the phi's predecessor - // instead. Unfortunately, this a little complicated since we - // have already rewritten uses of the value to uses of the reload. - BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], - OldTarget); - if (pred && DT && DT->dominates(DefBlock, pred)) - DominatesDef = true; - } - - if (DominatesDef) { - if (AggregateArgs) { - Value *Idx[2]; - Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); - Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), - FirstOut+out); - GetElementPtrInst *GEP = GetElementPtrInst::Create( - StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(), - NTRet); - new StoreInst(outputs[out], GEP, NTRet); - } else { - new StoreInst(outputs[out], &*OAI, NTRet); - } - } - // Advance output iterator even if we don't emit a store - if (!AggregateArgs) ++OAI; - } } // rewrite the original branch instruction with this new target @@ -940,8 +910,8 @@ void CodeExtractor::calculateNewCallTerminatorWeights( BasicBlock *CodeReplacer, DenseMap &ExitWeights, BranchProbabilityInfo *BPI) { - typedef BlockFrequencyInfoImplBase::Distribution Distribution; - typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; + using Distribution = BlockFrequencyInfoImplBase::Distribution; + using BlockNode = BlockFrequencyInfoImplBase::BlockNode; // Update the branch weights for the exit block. TerminatorInst *TI = CodeReplacer->getTerminator(); @@ -1044,7 +1014,7 @@ Function *CodeExtractor::extractCodeRegion() { } // Calculate the exit blocks for the extracted region and the total exit - // weights for each of those blocks. + // weights for each of those blocks. DenseMap ExitWeights; SmallPtrSet ExitBlocks; for (BasicBlock *Block : Blocks) { @@ -1097,8 +1067,8 @@ Function *CodeExtractor::extractCodeRegion() { // Look at all successors of the codeReplacer block. If any of these blocks // had PHI nodes in them, we need to update the "from" block to be the code // replacer, not the original block in the extracted region. - std::vector Succs(succ_begin(codeReplacer), - succ_end(codeReplacer)); + std::vector Succs(succ_begin(codeReplacer), + succ_end(codeReplacer)); for (unsigned i = 0, e = Succs.size(); i != e; ++i) for (BasicBlock::iterator I = Succs[i]->begin(); isa(I); ++I) { PHINode *PN = cast(I); diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index 1328f2f3ec012..a65c3bac5e549 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -12,19 +12,33 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Evaluator.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include #define DEBUG_TYPE "evaluator" @@ -193,7 +207,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB) { // This is the main evaluation loop. - while (1) { + while (true) { Constant *InstResult = nullptr; DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); @@ -318,7 +332,6 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n"); } else if (LoadInst *LI = dyn_cast(CurInst)) { - if (!LI->isSimple()) { DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); return false; // no volatile/atomic accesses. @@ -344,9 +357,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; // Cannot handle array allocs. } Type *Ty = AI->getAllocatedType(); - AllocaTmps.push_back( - make_unique(Ty, false, GlobalValue::InternalLinkage, - UndefValue::get(Ty), AI->getName())); + AllocaTmps.push_back(llvm::make_unique( + Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), + AI->getName())); InstResult = AllocaTmps.back().get(); DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa(CurInst) || isa(CurInst)) { @@ -559,7 +572,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, BasicBlock::iterator CurInst = CurBB->begin(); - while (1) { + while (true) { BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); @@ -594,4 +607,3 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, CurBB = NextBB; } } - diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 435eff3bef47e..5fdcc6d1d7279 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -1,4 +1,4 @@ -//===- FlatternCFG.cpp - Code to perform CFG flattening ---------------===// +//===- FlatternCFG.cpp - Code to perform CFG flattening -------------------===// // // The LLVM Compiler Infrastructure // @@ -14,25 +14,37 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include + using namespace llvm; #define DEBUG_TYPE "flattencfg" namespace { + class FlattenCFGOpt { AliasAnalysis *AA; + /// \brief Use parallel-and or parallel-or to generate conditions for /// conditional branches. bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder); + /// \brief If \param BB is the merge block of an if-region, attempt to merge /// the if-region with an adjacent if-region upstream if two if-regions /// contain identical instructions. bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder); + /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which /// are from two if-regions whose entry blocks are \p Head1 and \p /// Head2. \returns true if \p Block1 and \p Block2 contain identical @@ -43,9 +55,11 @@ class FlattenCFGOpt { public: FlattenCFGOpt(AliasAnalysis *AA) : AA(AA) {} + bool run(BasicBlock *BB); }; -} + +} // end anonymous namespace /// If \param [in] BB has more than one predecessor that is a conditional /// branch, attempt to use parallel and/or for the branch condition. \returns @@ -120,7 +134,6 @@ class FlattenCFGOpt { /// In Case 1, \param BB (BB4) has an unconditional branch (BB3) as /// its predecessor. In Case 2, \param BB (BB3) only has conditional branches /// as its predecessors. -/// bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { PHINode *PHI = dyn_cast(BB->begin()); if (PHI) @@ -237,8 +250,8 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { // Do branch inversion. BasicBlock *CurrBlock = LastCondBlock; bool EverChanged = false; - for (;CurrBlock != FirstCondBlock; - CurrBlock = CurrBlock->getSinglePredecessor()) { + for (; CurrBlock != FirstCondBlock; + CurrBlock = CurrBlock->getSinglePredecessor()) { BranchInst *BI = dyn_cast(CurrBlock->getTerminator()); CmpInst *CI = dyn_cast(BI->getCondition()); if (!CI) @@ -309,7 +322,6 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) { // in the 2nd if-region to compare. \returns true if \param Block1 and \param /// Block2 have identical instructions and do not have memory reference alias /// with \param Head2. -/// bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, BasicBlock *Block1, BasicBlock *Block2) { @@ -330,7 +342,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, BasicBlock::iterator iter2 = Block2->begin(); BasicBlock::iterator end2 = Block2->getTerminator()->getIterator(); - while (1) { + while (true) { if (iter1 == end1) { if (iter2 != end2) return false; @@ -384,7 +396,6 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2, /// To: /// if (a || b) /// statement; -/// bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { BasicBlock *IfTrue2, *IfFalse2; Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2); @@ -475,8 +486,7 @@ bool FlattenCFGOpt::run(BasicBlock *BB) { /// FlattenCFG - This function is used to flatten a CFG. For /// example, it uses parallel-and and parallel-or mode to collapse -// if-conditions and merge if-regions with identical statements. -/// +/// if-conditions and merge if-regions with identical statements. bool llvm::FlattenCFG(BasicBlock *BB, AliasAnalysis *AA) { return FlattenCFGOpt(AA).run(BB); } diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp index 4a2be3a531767..bddcbd86e914d 100644 --- a/lib/Transforms/Utils/FunctionComparator.cpp +++ b/lib/Transforms/Utils/FunctionComparator.cpp @@ -13,13 +13,41 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/FunctionComparator.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include using namespace llvm; @@ -160,7 +188,6 @@ int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, /// For more details see declaration comments. int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) const { - Type *TyL = L->getType(); Type *TyR = R->getType(); @@ -226,8 +253,8 @@ int FunctionComparator::cmpConstants(const Constant *L, if (!L->isNullValue() && R->isNullValue()) return -1; - auto GlobalValueL = const_cast(dyn_cast(L)); - auto GlobalValueR = const_cast(dyn_cast(R)); + auto GlobalValueL = const_cast(dyn_cast(L)); + auto GlobalValueR = const_cast(dyn_cast(R)); if (GlobalValueL && GlobalValueR) { return cmpGlobalValues(GlobalValueL, GlobalValueR); } @@ -401,10 +428,9 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { case Type::TokenTyID: return 0; - case Type::PointerTyID: { + case Type::PointerTyID: assert(PTyL && PTyR && "Both types must be pointers here."); return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); - } case Type::StructTyID: { StructType *STyL = cast(TyL); @@ -637,7 +663,6 @@ int FunctionComparator::cmpOperations(const Instruction *L, // Read method declaration comments for more details. int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) const { - unsigned int ASL = GEPL->getPointerAddressSpace(); unsigned int ASR = GEPR->getPointerAddressSpace(); @@ -869,15 +894,19 @@ namespace { // buffer. class HashAccumulator64 { uint64_t Hash; + public: // Initialize to random constant, so the state isn't zero. HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } + void add(uint64_t V) { - Hash = llvm::hashing::detail::hash_16_bytes(Hash, V); + Hash = hashing::detail::hash_16_bytes(Hash, V); } + // No finishing is required, because the entire hash value is used. uint64_t getHash() { return Hash; } }; + } // end anonymous namespace // A function hash is calculated by considering only the number of arguments and @@ -919,5 +948,3 @@ FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { } return H.getHash(); } - - diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 2a18c140c7886..6b1391e0c80ee 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -12,11 +12,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -26,25 +30,46 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Attributes.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include +#include +#include +#include +#include +#include +#include +#include using namespace llvm; @@ -62,28 +87,37 @@ bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime) { return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime); } + bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, AAResults *CalleeAAR, bool InsertLifetime) { return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime); } namespace { + /// A class for recording information about inlining a landing pad. class LandingPadInliningInfo { - BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind. - BasicBlock *InnerResumeDest; ///< Destination for the callee's resume. - LandingPadInst *CallerLPad; ///< LandingPadInst associated with the invoke. - PHINode *InnerEHValuesPHI; ///< PHI for EH values from landingpad insts. + /// Destination of the invoke's unwind. + BasicBlock *OuterResumeDest; + + /// Destination for the callee's resume. + BasicBlock *InnerResumeDest = nullptr; + + /// LandingPadInst associated with the invoke. + LandingPadInst *CallerLPad = nullptr; + + /// PHI for EH values from landingpad insts. + PHINode *InnerEHValuesPHI = nullptr; + SmallVector UnwindDestPHIValues; public: LandingPadInliningInfo(InvokeInst *II) - : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr), - CallerLPad(nullptr), InnerEHValuesPHI(nullptr) { + : OuterResumeDest(II->getUnwindDest()) { // If there are PHI nodes in the unwind destination block, we need to keep // track of which values came into them from the invoke before removing // the edge from this block. - llvm::BasicBlock *InvokeBB = II->getParent(); + BasicBlock *InvokeBB = II->getParent(); BasicBlock::iterator I = OuterResumeDest->begin(); for (; isa(I); ++I) { // Save the value to use for this edge. @@ -126,7 +160,8 @@ namespace { } } }; -} // anonymous namespace + +} // end anonymous namespace /// Get or create a target for the branch from ResumeInsts. BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { @@ -189,7 +224,7 @@ static Value *getParentPad(Value *EHPad) { return cast(EHPad)->getParentPad(); } -typedef DenseMap UnwindDestMemoTy; +using UnwindDestMemoTy = DenseMap; /// Helper for getUnwindDestToken that does the descendant-ward part of /// the search. @@ -617,7 +652,7 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, // track of which values came into them from the invoke before removing the // edge from this block. SmallVector UnwindDestPHIValues; - llvm::BasicBlock *InvokeBB = II->getParent(); + BasicBlock *InvokeBB = II->getParent(); for (Instruction &I : *UnwindDest) { // Save the value to use for this edge. PHINode *PHI = dyn_cast(&I); @@ -1359,6 +1394,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, } } } + /// Update the block frequencies of the caller after a callee has been inlined. /// /// Each block cloned into the caller has its block frequency scaled by the @@ -1848,8 +1884,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. - if (AllocaArraySize != ~0ULL && - UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + if (AllocaArraySize != std::numeric_limits::max() && + std::numeric_limits::max() / AllocaArraySize >= + AllocaTypeSize) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } @@ -1980,7 +2017,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // match the callee's return type, we also need to change the return type of // the intrinsic. if (Caller->getReturnType() == TheCall->getType()) { - auto NewEnd = remove_if(Returns, [](ReturnInst *RI) { + auto NewEnd = llvm::remove_if(Returns, [](ReturnInst *RI) { return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; }); Returns.erase(NewEnd, Returns.end()); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 21412dcf68e41..fd3367710f347 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1,4 +1,4 @@ -//===-- Local.cpp - Functions to perform local transformations ------------===// +//===- Local.cpp - Functions to perform local transformations -------------===// // // The LLVM Compiler Infrastructure // @@ -13,42 +13,74 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include +#include + using namespace llvm; using namespace llvm::PatternMatch; @@ -282,7 +314,6 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, return false; } - //===----------------------------------------------------------------------===// // Local dead code elimination. // @@ -541,7 +572,6 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, // Control Flow Graph Restructuring. // - /// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this /// method is called when we're about to delete Pred as a predecessor of BB. If /// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred. @@ -578,12 +608,10 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { } } - /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its /// predecessor is known to have one successor (DestBB!). Eliminate the edge /// between them, moving the instructions in the predecessor into DestBB and /// deleting the predecessor block. -/// void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { // If BB has single-entry PHI nodes, fold them. while (PHINode *PN = dyn_cast(DestBB->begin())) { @@ -602,7 +630,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { if (DestBB->hasAddressTaken()) { BlockAddress *BA = BlockAddress::get(DestBB); Constant *Replacement = - ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1); + ConstantInt::get(Type::getInt32Ty(BA->getContext()), 1); BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement, BA->getType())); BA->destroyConstant(); @@ -640,7 +668,6 @@ static bool CanMergeValues(Value *First, Value *Second) { /// almost-empty BB ending in an unconditional branch to Succ, into Succ. /// /// Assumption: Succ is the single successor for BB. -/// static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); @@ -696,8 +723,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { return true; } -typedef SmallVector PredBlockVector; -typedef DenseMap IncomingValueMap; +using PredBlockVector = SmallVector; +using IncomingValueMap = DenseMap; /// \brief Determines the value to use as the phi node input for a block. /// @@ -927,7 +954,6 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { /// nodes in this block. This doesn't try to be clever about PHI nodes /// which differ only in the order of the incoming values, but instcombine /// orders them so it usually won't matter. -/// bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { // This implementation doesn't currently consider undef operands // specially. Theoretically, two phis which are identical except for @@ -937,9 +963,11 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { static PHINode *getEmptyKey() { return DenseMapInfo::getEmptyKey(); } + static PHINode *getTombstoneKey() { return DenseMapInfo::getTombstoneKey(); } + static unsigned getHashValue(PHINode *PN) { // Compute a hash value on the operands. Instcombine will likely have // sorted them, which helps expose duplicates, but we have to check all @@ -948,6 +976,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { hash_combine_range(PN->value_op_begin(), PN->value_op_end()), hash_combine_range(PN->block_begin(), PN->block_end()))); } + static bool isEqual(PHINode *LHS, PHINode *RHS) { if (LHS == getEmptyKey() || LHS == getTombstoneKey() || RHS == getEmptyKey() || RHS == getTombstoneKey()) @@ -984,7 +1013,6 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { /// often possible though. If alignment is important, a more reliable approach /// is to simply align all global variables and allocation instructions to /// their preferred alignment from the beginning. -/// static unsigned enforceKnownAlignment(Value *V, unsigned Align, unsigned PrefAlign, const DataLayout &DL) { @@ -1068,7 +1096,7 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr, // Since we can't guarantee that the original dbg.declare instrinsic // is removed by LowerDbgDeclare(), we need to make sure that we are // not inserting the same dbg.value intrinsic over and over. - llvm::BasicBlock::InstListType::iterator PrevI(I); + BasicBlock::InstListType::iterator PrevI(I); if (PrevI != I->getParent()->getInstList().begin()) { --PrevI; if (DbgValueInst *DVI = dyn_cast(PrevI)) @@ -1353,7 +1381,6 @@ void llvm::salvageDebugInfo(Instruction &I) { // need to mark the expression with a DW_OP_stack_value. if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) { auto *DIExpr = DVI->getExpression(); - DIBuilder DIB(M, /*AllowUnresolved*/ false); // GEP offsets are i32 and thus always fit into an int64_t. DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset.getSExtValue(), @@ -1368,7 +1395,6 @@ void llvm::salvageDebugInfo(Instruction &I) { for (auto *DVI : DbgValues) { // Rewrite the load into DW_OP_deref. auto *DIExpr = DVI->getExpression(); - DIBuilder DIB(M, /*AllowUnresolved*/ false); DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); DVI->setOperand(0, MDWrap(I.getOperand(0))); DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); @@ -1488,7 +1514,6 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, static bool markAliveBlocks(Function &F, SmallPtrSetImpl &Reachable) { - SmallVector Worklist; BasicBlock *BB = &F.front(); Worklist.push_back(BB); @@ -1594,13 +1619,16 @@ static bool markAliveBlocks(Function &F, static CatchPadInst *getEmptyKey() { return DenseMapInfo::getEmptyKey(); } + static CatchPadInst *getTombstoneKey() { return DenseMapInfo::getTombstoneKey(); } + static unsigned getHashValue(CatchPadInst *CatchPad) { return static_cast(hash_combine_range( CatchPad->value_op_begin(), CatchPad->value_op_end())); } + static bool isEqual(CatchPadInst *LHS, CatchPadInst *RHS) { if (LHS == getEmptyKey() || LHS == getTombstoneKey() || RHS == getEmptyKey() || RHS == getTombstoneKey()) @@ -1910,6 +1938,7 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, } namespace { + /// A potential constituent of a bitreverse or bswap expression. See /// collectBitParts for a fuller explanation. struct BitPart { @@ -1919,12 +1948,14 @@ struct BitPart { /// The Value that this is a bitreverse/bswap of. Value *Provider; + /// The "provenance" of each bit. Provenance[A] = B means that bit A /// in Provider becomes bit B in the result of this expression. SmallVector Provenance; // int8_t means max size is i128. enum { Unset = -1 }; }; + } // end anonymous namespace /// Analyze the specified subexpression and see if it is capable of providing @@ -1950,7 +1981,6 @@ struct BitPart { /// /// Because we pass around references into \c BPS, we must use a container that /// does not invalidate internal references (std::map instead of DenseMap). -/// static const Optional & collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, std::map> &BPS) { diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index ab8ae73891327..2994401f3cda4 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -22,7 +22,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" @@ -460,18 +460,22 @@ LoopUnrollResult llvm::UnrollLoop( // Report the unrolling decision. if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() - << " with trip count " << TripCount << "!\n"); - ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), - L->getHeader()) - << "completely unrolled loop with " - << NV("UnrollCount", TripCount) << " iterations"); + << " with trip count " << TripCount << "!\n"); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), + L->getHeader()) + << "completely unrolled loop with " << NV("UnrollCount", TripCount) + << " iterations"; + }); } else if (PeelCount) { DEBUG(dbgs() << "PEELING loop %" << Header->getName() << " with iteration count " << PeelCount << "!\n"); - ORE->emit(OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), - L->getHeader()) - << " peeled loop by " << NV("PeelCount", PeelCount) - << " iterations"); + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), + L->getHeader()) + << " peeled loop by " << NV("PeelCount", PeelCount) + << " iterations"; + }); } else { auto DiagBuilder = [&]() { OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 1ff3811e23648..9114120bd2b0d 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -401,35 +401,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, return NewLoop; // Add unroll disable metadata to disable future unrolling for this loop. - SmallVector MDs; - // Reserve first location for self reference to the LoopID metadata node. - MDs.push_back(nullptr); - MDNode *LoopID = NewLoop->getLoopID(); - if (LoopID) { - // First remove any existing loop unrolling metadata. - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - bool IsUnrollMetadata = false; - MDNode *MD = dyn_cast(LoopID->getOperand(i)); - if (MD) { - const MDString *S = dyn_cast(MD->getOperand(0)); - IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll."); - } - if (!IsUnrollMetadata) - MDs.push_back(LoopID->getOperand(i)); - } - } - - LLVMContext &Context = NewLoop->getHeader()->getContext(); - SmallVector DisableOperands; - DisableOperands.push_back(MDString::get(Context, - "llvm.loop.unroll.disable")); - MDNode *DisableNode = MDNode::get(Context, DisableOperands); - MDs.push_back(DisableNode); - - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); - NewLoop->setLoopID(NewLoopID); + NewLoop->setLoopAlreadyUnrolled(); return NewLoop; } else diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index bd89b6b2630a3..13c0bfbcb2e95 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -1137,6 +1137,128 @@ llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { return Worklist; } +void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr, + ScalarEvolution *SE = nullptr, + LoopInfo *LI = nullptr) { + assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); + auto *Preheader = L->getLoopPreheader(); + assert(Preheader && "Preheader should exist!"); + + // Now that we know the removal is safe, remove the loop by changing the + // branch from the preheader to go to the single exit block. + // + // Because we're deleting a large chunk of code at once, the sequence in which + // we remove things is very important to avoid invalidation issues. + + // Tell ScalarEvolution that the loop is deleted. Do this before + // deleting the loop so that ScalarEvolution can look at the loop + // to determine what it needs to clean up. + if (SE) + SE->forgetLoop(L); + + auto *ExitBlock = L->getUniqueExitBlock(); + assert(ExitBlock && "Should have a unique exit block!"); + assert(L->hasDedicatedExits() && "Loop should have dedicated exits!"); + + auto *OldBr = dyn_cast(Preheader->getTerminator()); + assert(OldBr && "Preheader must end with a branch"); + assert(OldBr->isUnconditional() && "Preheader must have a single successor"); + // Connect the preheader to the exit block. Keep the old edge to the header + // around to perform the dominator tree update in two separate steps + // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge + // preheader -> header. + // + // + // 0. Preheader 1. Preheader 2. Preheader + // | | | | + // V | V | + // Header <--\ | Header <--\ | Header <--\ + // | | | | | | | | | | | + // | V | | | V | | | V | + // | Body --/ | | Body --/ | | Body --/ + // V V V V V + // Exit Exit Exit + // + // By doing this is two separate steps we can perform the dominator tree + // update without using the batch update API. + // + // Even when the loop is never executed, we cannot remove the edge from the + // source block to the exit block. Consider the case where the unexecuted loop + // branches back to an outer loop. If we deleted the loop and removed the edge + // coming to this inner loop, this will break the outer loop structure (by + // deleting the backedge of the outer loop). If the outer loop is indeed a + // non-loop, it will be deleted in a future iteration of loop deletion pass. + IRBuilder<> Builder(OldBr); + Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock); + // Remove the old branch. The conditional branch becomes a new terminator. + OldBr->eraseFromParent(); + + // Rewrite phis in the exit block to get their inputs from the Preheader + // instead of the exiting block. + BasicBlock::iterator BI = ExitBlock->begin(); + while (PHINode *P = dyn_cast(BI)) { + // Set the zero'th element of Phi to be from the preheader and remove all + // other incoming values. Given the loop has dedicated exits, all other + // incoming values must be from the exiting blocks. + int PredIndex = 0; + P->setIncomingBlock(PredIndex, Preheader); + // Removes all incoming values from all other exiting blocks (including + // duplicate values from an exiting block). + // Nuke all entries except the zero'th entry which is the preheader entry. + // NOTE! We need to remove Incoming Values in the reverse order as done + // below, to keep the indices valid for deletion (removeIncomingValues + // updates getNumIncomingValues and shifts all values down into the operand + // being deleted). + for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i) + P->removeIncomingValue(e - i, false); + + assert((P->getNumIncomingValues() == 1 && + P->getIncomingBlock(PredIndex) == Preheader) && + "Should have exactly one value and that's from the preheader!"); + ++BI; + } + + // Disconnect the loop body by branching directly to its exit. + Builder.SetInsertPoint(Preheader->getTerminator()); + Builder.CreateBr(ExitBlock); + // Remove the old branch. + Preheader->getTerminator()->eraseFromParent(); + + if (DT) { + // Update the dominator tree by informing it about the new edge from the + // preheader to the exit. + DT->insertEdge(Preheader, ExitBlock); + // Inform the dominator tree about the removed edge. + DT->deleteEdge(Preheader, L->getHeader()); + } + + // Remove the block from the reference counting scheme, so that we can + // delete it freely later. + for (auto *Block : L->blocks()) + Block->dropAllReferences(); + + if (LI) { + // Erase the instructions and the blocks without having to worry + // about ordering because we already dropped the references. + // NOTE: This iteration is safe because erasing the block does not remove + // its entry from the loop's block list. We do that in the next section. + for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end(); + LpI != LpE; ++LpI) + (*LpI)->eraseFromParent(); + + // Finally, the blocks from loopinfo. This has to happen late because + // otherwise our loop iterators won't work. + + SmallPtrSet blocks; + blocks.insert(L->block_begin(), L->block_end()); + for (BasicBlock *BB : blocks) + LI->removeBlock(BB); + + // The last step is to update LoopInfo now that we've eliminated this loop. + LI->erase(L); + } +} + /// Returns true if the instruction in a loop is guaranteed to execute at least /// once. bool llvm::isGuaranteedToExecute(const Instruction &Inst, diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 890afbc46e636..344cb35df9869 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -13,46 +13,65 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "lower-switch" namespace { + struct IntRange { int64_t Low, High; }; - // Return true iff R is covered by Ranges. - static bool IsInRanges(const IntRange &R, - const std::vector &Ranges) { - // Note: Ranges must be sorted, non-overlapping and non-adjacent. - - // Find the first range whose High field is >= R.High, - // then check if the Low field is <= R.Low. If so, we - // have a Range that covers R. - auto I = std::lower_bound( - Ranges.begin(), Ranges.end(), R, - [](const IntRange &A, const IntRange &B) { return A.High < B.High; }); - return I != Ranges.end() && I->Low <= R.Low; - } + +} // end anonymous namespace + +// Return true iff R is covered by Ranges. +static bool IsInRanges(const IntRange &R, + const std::vector &Ranges) { + // Note: Ranges must be sorted, non-overlapping and non-adjacent. + + // Find the first range whose High field is >= R.High, + // then check if the Low field is <= R.Low. If so, we + // have a Range that covers R. + auto I = std::lower_bound( + Ranges.begin(), Ranges.end(), R, + [](const IntRange &A, const IntRange &B) { return A.High < B.High; }); + return I != Ranges.end() && I->Low <= R.Low; +} + +namespace { /// Replace all SwitchInst instructions with chained branch instructions. class LowerSwitch : public FunctionPass { public: - static char ID; // Pass identification, replacement for typeid + // Pass identification, replacement for typeid + static char ID; + LowerSwitch() : FunctionPass(ID) { initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); } @@ -68,8 +87,9 @@ namespace { : Low(low), High(high), BB(bb) {} }; - typedef std::vector CaseVector; - typedef std::vector::iterator CaseItr; + using CaseVector = std::vector; + using CaseItr = std::vector::iterator; + private: void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl &DeleteList); @@ -86,22 +106,24 @@ namespace { /// The comparison function for sorting the switch case values in the vector. /// WARNING: Case ranges should be disjoint! struct CaseCmp { - bool operator () (const LowerSwitch::CaseRange& C1, - const LowerSwitch::CaseRange& C2) { - + bool operator()(const LowerSwitch::CaseRange& C1, + const LowerSwitch::CaseRange& C2) { const ConstantInt* CI1 = cast(C1.Low); const ConstantInt* CI2 = cast(C2.High); return CI1->getValue().slt(CI2->getValue()); } }; -} + +} // end anonymous namespace char LowerSwitch::ID = 0; -INITIALIZE_PASS(LowerSwitch, "lowerswitch", - "Lower SwitchInst's to branches", false, false) // Publicly exposed interface to pass... char &llvm::LowerSwitchID = LowerSwitch::ID; + +INITIALIZE_PASS(LowerSwitch, "lowerswitch", + "Lower SwitchInst's to branches", false, false) + // createLowerSwitchPass - Interface to this file... FunctionPass *llvm::createLowerSwitchPass() { return new LowerSwitch(); @@ -136,6 +158,7 @@ bool LowerSwitch::runOnFunction(Function &F) { static raw_ostream& operator<<(raw_ostream &O, const LowerSwitch::CaseVector &C) LLVM_ATTRIBUTE_USED; + static raw_ostream& operator<<(raw_ostream &O, const LowerSwitch::CaseVector &C) { O << "["; @@ -186,7 +209,7 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB, } // Remove incoming values in the reverse order to prevent invalidating // *successive* index. - for (unsigned III : reverse(Indices)) + for (unsigned III : llvm::reverse(Indices)) PN->removeIncomingValue(III); } } @@ -294,8 +317,7 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound, /// value, so the jump to the "default" branch is warranted. BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, BasicBlock* OrigBlock, - BasicBlock* Default) -{ + BasicBlock* Default) { Function* F = OrigBlock->getParent(); BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock"); F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf); @@ -442,7 +464,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, unsigned MaxPop = 0; BasicBlock *PopSucc = nullptr; - IntRange R = { INT64_MIN, INT64_MAX }; + IntRange R = {std::numeric_limits::min(), + std::numeric_limits::max()}; UnreachableRanges.push_back(R); for (const auto &I : Cases) { int64_t Low = I.Low->getSExtValue(); @@ -457,8 +480,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, assert(Low > LastRange.Low); LastRange.High = Low - 1; } - if (High != INT64_MAX) { - IntRange R = { High + 1, INT64_MAX }; + if (High != std::numeric_limits::max()) { + IntRange R = { High + 1, std::numeric_limits::max() }; UnreachableRanges.push_back(R); } @@ -487,8 +510,8 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI, assert(MaxPop > 0 && PopSucc); Default = PopSucc; Cases.erase( - remove_if(Cases, - [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }), + llvm::remove_if( + Cases, [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }), Cases.end()); // If there are no cases left, just branch. diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index b659a2e4463ff..29f289b62da0e 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -15,12 +15,17 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" +#include + using namespace llvm; #define DEBUG_TYPE "mem2reg" @@ -33,7 +38,7 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function bool Changed = false; - while (1) { + while (true) { Allocas.clear(); // Find allocas that are safe to promote, by looking at all instructions in @@ -65,15 +70,17 @@ PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) { } namespace { + struct PromoteLegacyPass : public FunctionPass { - static char ID; // Pass identification, replacement for typeid + // Pass identification, replacement for typeid + static char ID; + PromoteLegacyPass() : FunctionPass(ID) { initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry()); } // runOnFunction - To run this pass, first we calculate the alloca // instructions that are safe for promotion, then we promote each one. - // bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; @@ -89,10 +96,12 @@ struct PromoteLegacyPass : public FunctionPass { AU.addRequired(); AU.setPreservesCFG(); } - }; -} // end of anonymous namespace +}; + +} // end anonymous namespace char PromoteLegacyPass::ID = 0; + INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to " "Register", false, false) @@ -102,7 +111,6 @@ INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register", false, false) // createPromoteMemoryToRegister - Provide an entry point to create this pass. -// FunctionPass *llvm::createPromoteMemoryToRegisterPass() { return new PromoteLegacyPass(); } diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index 2ef3d6336ae2b..ba4b7f3cc2639 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -243,7 +243,7 @@ std::string llvm::getUniqueModuleId(Module *M) { bool ExportsSymbols = false; auto AddGlobal = [&](GlobalValue &GV) { if (GV.isDeclaration() || GV.getName().startswith("llvm.") || - !GV.hasExternalLinkage()) + !GV.hasExternalLinkage() || GV.hasComdat()) return; ExportsSymbols = true; Md5.update(GV.getName()); diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index e2ba5c4cfbbd7..fcd3bd08482a5 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -21,25 +21,38 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Metadata.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "mem2reg" @@ -103,7 +116,7 @@ struct AllocaInfo { bool OnlyUsedInOneBlock; Value *AllocaPointerVal; - TinyPtrVector DbgDeclares; + TinyPtrVector DbgDeclares; void clear() { DefiningBlocks.clear(); @@ -154,10 +167,11 @@ struct AllocaInfo { // Data package used by RenamePass() class RenamePassData { public: - typedef std::vector ValVector; + using ValVector = std::vector; RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V) : BB(B), Pred(P), Values(std::move(V)) {} + BasicBlock *BB; BasicBlock *Pred; ValVector Values; @@ -216,12 +230,15 @@ class LargeBlockInfo { struct PromoteMem2Reg { /// The alloca instructions being promoted. std::vector Allocas; + DominatorTree &DT; DIBuilder DIB; + /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. AssumptionCache *AC; const SimplifyQuery SQ; + /// Reverse mapping of Allocas. DenseMap AllocaLookup; @@ -248,7 +265,6 @@ struct PromoteMem2Reg { SmallVector, 8> AllocaDbgDeclares; /// The set of basic blocks the renamer has already visited. - /// SmallPtrSet Visited; /// Contains a stable numbering of basic blocks to avoid non-determinstic @@ -291,7 +307,7 @@ struct PromoteMem2Reg { bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); }; -} // end of anonymous namespace +} // end anonymous namespace /// Given a LoadInst LI this adds assume(LI != null) after it. static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { @@ -373,7 +389,6 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, Info.UsingBlocks.push_back(StoreBB); continue; } - } else if (LI->getParent() != StoreBB && !DT.dominates(StoreBB, LI->getParent())) { // If the load and store are in different blocks, use BB dominance to @@ -395,7 +410,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // that information when we erase this Load. So we preserve // it with an assume. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) + !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); LI->replaceAllUsesWith(ReplVal); @@ -451,7 +466,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // make it efficient to get the index of various operations in the block. // Walk the use-def list of the alloca, getting the locations of all stores. - typedef SmallVector, 64> StoresByIndexTy; + using StoresByIndexTy = SmallVector, 64>; StoresByIndexTy StoresByIndex; for (User *U : AI->users()) @@ -491,7 +506,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // information when we erase it. So we preserve it with an assume. Value *ReplVal = std::prev(I)->second->getOperand(0); if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) + !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); LI->replaceAllUsesWith(ReplVal); @@ -598,7 +613,6 @@ void PromoteMem2Reg::run() { // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. - // Unique the set of defining blocks for efficient lookup. SmallPtrSet DefBlocks; DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); @@ -635,14 +649,12 @@ void PromoteMem2Reg::run() { // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. - // RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary - // std::vector RenamePassWorkList; RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values)); do { @@ -705,7 +717,6 @@ void PromoteMem2Reg::run() { // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. - // for (DenseMap, PHINode *>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); @@ -770,7 +781,6 @@ void PromoteMem2Reg::ComputeLiveInBlocks( AllocaInst *AI, AllocaInfo &Info, const SmallPtrSetImpl &DefBlocks, SmallPtrSetImpl &LiveInBlocks) { - // To determine liveness, we must iterate through the predecessors of blocks // where the def is live. Blocks are added to the worklist if we need to // check their predecessors. Start with all the using blocks. @@ -932,7 +942,7 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, // that information when we erase this Load. So we preserve // it with an assume. if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && - !llvm::isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT)) + !isKnownNonZero(V, SQ.DL, 0, AC, LI, &DT)) addAssumeNonNull(AC, LI); // Anything using the load now uses the current value. diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 6ccf54e49dd31..e4b20b0faa15c 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -15,7 +15,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/BasicBlock.h" @@ -39,12 +38,13 @@ using namespace llvm; #define DEBUG_TYPE "ssaupdater" -typedef DenseMap AvailableValsTy; +using AvailableValsTy = DenseMap; + static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast(AV); } -SSAUpdater::SSAUpdater(SmallVectorImpl *NewPHI) +SSAUpdater::SSAUpdater(SmallVectorImpl *NewPHI) : InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { @@ -72,7 +72,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { } static bool IsEquivalentPHI(PHINode *PHI, - SmallDenseMap &ValueMapping) { + SmallDenseMap &ValueMapping) { unsigned PHINumValues = PHI->getNumIncomingValues(); if (PHINumValues != ValueMapping.size()) return false; @@ -100,7 +100,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // Otherwise, we have the hard case. Get the live-in values for each // predecessor. - SmallVector, 8> PredValues; + SmallVector, 8> PredValues; Value *SingularValue = nullptr; // We can get our predecessor info by walking the pred_iterator list, but it @@ -145,8 +145,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // Otherwise, we do need a PHI: check to see if we already have one available // in this block that produces the right value. if (isa(BB->begin())) { - SmallDenseMap ValueMapping(PredValues.begin(), - PredValues.end()); + SmallDenseMap ValueMapping(PredValues.begin(), + PredValues.end()); PHINode *SomePHI; for (BasicBlock::iterator It = BB->begin(); (SomePHI = dyn_cast(It)); ++It) { @@ -218,11 +218,11 @@ namespace llvm { template<> class SSAUpdaterTraits { public: - typedef BasicBlock BlkT; - typedef Value *ValT; - typedef PHINode PhiT; + using BlkT = BasicBlock; + using ValT = Value *; + using PhiT = PHINode; + using BlkSucc_iterator = succ_iterator; - typedef succ_iterator BlkSucc_iterator; static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); } static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); } @@ -253,7 +253,7 @@ class SSAUpdaterTraits { /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds /// vector, set Info->NumPreds, and allocate space in Info->Preds. static void FindPredecessorBlocks(BasicBlock *BB, - SmallVectorImpl *Preds) { + SmallVectorImpl *Preds) { // We can get our predecessor info by walking the pred_iterator list, // but it is relatively slow. If we already have PHI nodes in this // block, walk one of them to get the predecessor list instead. @@ -293,7 +293,6 @@ class SSAUpdaterTraits { } /// ValueIsPHI - Check if a value is a PHI. - /// static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) { return dyn_cast(Val); } @@ -333,7 +332,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { //===----------------------------------------------------------------------===// LoadAndStorePromoter:: -LoadAndStorePromoter(ArrayRef Insts, +LoadAndStorePromoter(ArrayRef Insts, SSAUpdater &S, StringRef BaseName) : SSA(S) { if (Insts.empty()) return; @@ -349,11 +348,11 @@ LoadAndStorePromoter(ArrayRef Insts, } void LoadAndStorePromoter:: -run(const SmallVectorImpl &Insts) const { +run(const SmallVectorImpl &Insts) const { // First step: bucket up uses of the alloca by the block they occur in. // This is important because we have to handle multiple defs/uses in a block // ourselves: SSAUpdater is purely for cross-block references. - DenseMap> UsesByBlock; + DenseMap> UsesByBlock; for (Instruction *User : Insts) UsesByBlock[User->getParent()].push_back(User); @@ -361,12 +360,12 @@ run(const SmallVectorImpl &Insts) const { // Okay, now we can iterate over all the blocks in the function with uses, // processing them. Keep track of which loads are loading a live-in value. // Walk the uses in the use-list order to be determinstic. - SmallVector LiveInLoads; - DenseMap ReplacedLoads; + SmallVector LiveInLoads; + DenseMap ReplacedLoads; for (Instruction *User : Insts) { BasicBlock *BB = User->getParent(); - TinyPtrVector &BlockUses = UsesByBlock[BB]; + TinyPtrVector &BlockUses = UsesByBlock[BB]; // If this block has already been processed, ignore this repeat use. if (BlockUses.empty()) continue; @@ -489,7 +488,7 @@ run(const SmallVectorImpl &Insts) const { bool LoadAndStorePromoter::isInstInList(Instruction *I, - const SmallVectorImpl &Insts) + const SmallVectorImpl &Insts) const { return is_contained(Insts, I); } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index d3e7d70b1a9f6..5e38e0e7ca430 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -22,12 +22,14 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" @@ -35,8 +37,8 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" @@ -53,6 +55,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" @@ -73,6 +76,7 @@ #include #include #include +#include #include #include @@ -141,12 +145,13 @@ namespace { // The first field contains the value that the switch produces when a certain // case group is selected, and the second field is a vector containing the // cases composing the case group. -typedef SmallVector>, 2> - SwitchCaseResultVectorTy; +using SwitchCaseResultVectorTy = + SmallVector>, 2>; + // The first field contains the phi node that generates a result of the switch // and the second field contains the value generated for a certain case in the // switch for that PHI. -typedef SmallVector, 4> SwitchCaseResultsTy; +using SwitchCaseResultsTy = SmallVector, 4>; /// ValueEqualityComparisonCase - Represents a case of a switch. struct ValueEqualityComparisonCase { @@ -167,7 +172,6 @@ struct ValueEqualityComparisonCase { class SimplifyCFGOpt { const TargetTransformInfo &TTI; const DataLayout &DL; - AssumptionCache *AC; SmallPtrSetImpl *LoopHeaders; const SimplifyCFGOptions &Options; @@ -193,10 +197,9 @@ class SimplifyCFGOpt { public: SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL, - AssumptionCache *AC, SmallPtrSetImpl *LoopHeaders, const SimplifyCFGOptions &Opts) - : TTI(TTI), DL(DL), AC(AC), LoopHeaders(LoopHeaders), Options(Opts) {} + : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {} bool run(BasicBlock *BB); }; @@ -436,18 +439,24 @@ namespace { /// fail. struct ConstantComparesGatherer { const DataLayout &DL; - Value *CompValue; /// Value found for the switch comparison - Value *Extra; /// Extra clause to be checked before the switch - SmallVector Vals; /// Set of integers to match in switch - unsigned UsedICmps; /// Number of comparisons matched in the and/or chain + + /// Value found for the switch comparison + Value *CompValue = nullptr; + + /// Extra clause to be checked before the switch + Value *Extra = nullptr; + + /// Set of integers to match in switch + SmallVector Vals; + + /// Number of comparisons matched in the and/or chain + unsigned UsedICmps = 0; /// Construct and compute the result for the comparison instruction Cond - ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) - : DL(DL), CompValue(nullptr), Extra(nullptr), UsedICmps(0) { + ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) { gather(Cond); } - /// Prevent copy ConstantComparesGatherer(const ConstantComparesGatherer &) = delete; ConstantComparesGatherer & operator=(const ConstantComparesGatherer &) = delete; @@ -485,7 +494,6 @@ struct ConstantComparesGatherer { // (x & ~2^z) == y --> x == y || x == y|2^z // This undoes a transformation done by instcombine to fuse 2 compares. if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) { - // It's a little bit hard to see why the following transformations are // correct. Here is a CVC3 program to verify them for 64-bit values: @@ -1277,9 +1285,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, // I1 and I2 are being combined into a single instruction. Its debug // location is the merged locations of the original instructions. - if (!isa(I1)) - I1->setDebugLoc( - DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc())); + I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); I2->eraseFromParent(); Changed = true; @@ -1533,20 +1539,20 @@ static bool sinkLastInstruction(ArrayRef Blocks) { I0->getOperandUse(O).set(NewOperands[O]); I0->moveBefore(&*BBEnd->getFirstInsertionPt()); - // The debug location for the "common" instruction is the merged locations of - // all the commoned instructions. We start with the original location of the - // "common" instruction and iteratively merge each location in the loop below. - const DILocation *Loc = I0->getDebugLoc(); - // Update metadata and IR flags, and merge debug locations. for (auto *I : Insts) if (I != I0) { - Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc()); + // The debug location for the "common" instruction is the merged locations + // of all the commoned instructions. We start with the original location + // of the "common" instruction and iteratively merge each location in the + // loop below. + // This is an N-way merge, which will be inefficient if I0 is a CallInst. + // However, as N-way merge for CallInst is rare, so we use simplified API + // instead of using complex API for N-way merge. + I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc()); combineMetadataForCSE(I0, I); I0->andIRFlags(I); } - if (!isa(I0)) - I0->setDebugLoc(Loc); if (!isa(I0)) { // canSinkLastInstruction checked that all instructions were used by @@ -1580,9 +1586,9 @@ namespace { ArrayRef Blocks; SmallVector Insts; bool Fail; + public: - LockstepReverseIterator(ArrayRef Blocks) : - Blocks(Blocks) { + LockstepReverseIterator(ArrayRef Blocks) : Blocks(Blocks) { reset(); } @@ -1606,7 +1612,7 @@ namespace { return !Fail; } - void operator -- () { + void operator--() { if (Fail) return; for (auto *&Inst : Insts) { @@ -2030,9 +2036,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, Value *S = Builder.CreateSelect( BrCond, TrueV, FalseV, TrueV->getName() + "." + FalseV->getName(), BI); SpeculatedStore->setOperand(0, S); - SpeculatedStore->setDebugLoc( - DILocation::getMergedLocation( - BI->getDebugLoc(), SpeculatedStore->getDebugLoc())); + SpeculatedStore->applyMergedLocation(BI->getDebugLoc(), + SpeculatedStore->getDebugLoc()); } // Metadata can be dependent on the condition we are hoisting above. @@ -3024,7 +3029,6 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, // We model triangles as a type of diamond with a nullptr "true" block. // Triangles are canonicalized so that the fallthrough edge is represented by // a true condition, as in the diagram above. - // BasicBlock *PTB = PBI->getSuccessor(0); BasicBlock *PFB = PBI->getSuccessor(1); BasicBlock *QTB = QBI->getSuccessor(0); @@ -3487,10 +3491,9 @@ static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { /// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. -static bool TryToSimplifyUncondBranchWithICmpInIt( +static bool tryToSimplifyUncondBranchWithICmpInIt( ICmpInst *ICI, IRBuilder<> &Builder, const DataLayout &DL, - const TargetTransformInfo &TTI, AssumptionCache *AC, - const SimplifyCFGOptions &Options) { + const TargetTransformInfo &TTI, const SimplifyCFGOptions &Options) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too @@ -3525,7 +3528,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; } // Ok, the block is reachable from the default dest. If the constant we're @@ -3541,7 +3544,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; } // The use of the icmp has to be in the 'end' block, by the only PHI node in @@ -4339,7 +4342,7 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { /// Compute masked bits for the condition of a switch /// and use it to remove dead cases. -static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, +static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); @@ -4452,38 +4455,59 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, /// Try to forward the condition of a switch instruction to a phi node /// dominated by the switch, if that would mean that some of the destination -/// blocks of the switch can be folded away. -/// Returns true if a change is made. +/// blocks of the switch can be folded away. Return true if a change is made. static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { - typedef DenseMap> ForwardingNodesMap; - ForwardingNodesMap ForwardingNodes; + using ForwardingNodesMap = DenseMap>; - for (auto Case : SI->cases()) { + ForwardingNodesMap ForwardingNodes; + BasicBlock *SwitchBlock = SI->getParent(); + bool Changed = false; + for (auto &Case : SI->cases()) { ConstantInt *CaseValue = Case.getCaseValue(); BasicBlock *CaseDest = Case.getCaseSuccessor(); - int PhiIndex; - PHINode *PHI = - FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIndex); - if (!PHI) - continue; + // Replace phi operands in successor blocks that are using the constant case + // value rather than the switch condition variable: + // switchbb: + // switch i32 %x, label %default [ + // i32 17, label %succ + // ... + // succ: + // %r = phi i32 ... [ 17, %switchbb ] ... + // --> + // %r = phi i32 ... [ %x, %switchbb ] ... + + for (Instruction &InstInCaseDest : *CaseDest) { + auto *Phi = dyn_cast(&InstInCaseDest); + if (!Phi) break; + + // This only works if there is exactly 1 incoming edge from the switch to + // a phi. If there is >1, that means multiple cases of the switch map to 1 + // value in the phi, and that phi value is not the switch condition. Thus, + // this transform would not make sense (the phi would be invalid because + // a phi can't have different incoming values from the same block). + int SwitchBBIdx = Phi->getBasicBlockIndex(SwitchBlock); + if (Phi->getIncomingValue(SwitchBBIdx) == CaseValue && + count(Phi->blocks(), SwitchBlock) == 1) { + Phi->setIncomingValue(SwitchBBIdx, SI->getCondition()); + Changed = true; + } + } - ForwardingNodes[PHI].push_back(PhiIndex); + // Collect phi nodes that are indirectly using this switch's case constants. + int PhiIdx; + if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx)) + ForwardingNodes[Phi].push_back(PhiIdx); } - bool Changed = false; - - for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(), - E = ForwardingNodes.end(); - I != E; ++I) { - PHINode *Phi = I->first; - SmallVectorImpl &Indexes = I->second; - + for (auto &ForwardingNode : ForwardingNodes) { + PHINode *Phi = ForwardingNode.first; + SmallVectorImpl &Indexes = ForwardingNode.second; if (Indexes.size() < 2) continue; - for (size_t I = 0, E = Indexes.size(); I != E; ++I) - Phi->setIncomingValue(Indexes[I], SI->getCondition()); + for (int Index : Indexes) + Phi->setIncomingValue(Index, SI->getCondition()); Changed = true; } @@ -4766,8 +4790,8 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, /// If the switch is only used to initialize one or more /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. -static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, - AssumptionCache *AC, const DataLayout &DL, +static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, const TargetTransformInfo &TTI) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; @@ -4839,18 +4863,18 @@ class SwitchLookupTable { } Kind; // For SingleValueKind, this is the single value. - Constant *SingleValue; + Constant *SingleValue = nullptr; // For BitMapKind, this is the bitmap. - ConstantInt *BitMap; - IntegerType *BitMapElementTy; + ConstantInt *BitMap = nullptr; + IntegerType *BitMapElementTy = nullptr; // For LinearMapKind, these are the constants used to derive the value. - ConstantInt *LinearOffset; - ConstantInt *LinearMultiplier; + ConstantInt *LinearOffset = nullptr; + ConstantInt *LinearMultiplier = nullptr; // For ArrayKind, this is the array. - GlobalVariable *Array; + GlobalVariable *Array = nullptr; }; } // end anonymous namespace @@ -4858,9 +4882,7 @@ class SwitchLookupTable { SwitchLookupTable::SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl> &Values, - Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) - : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), - LinearOffset(nullptr), LinearMultiplier(nullptr), Array(nullptr) { + Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) { assert(Values.size() && "Can't build lookup table without values!"); assert(TableSize >= Values.size() && "Can't fit values in table!"); @@ -5106,7 +5128,6 @@ static void reuseTableCompare( User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl> &Values) { - ICmpInst *CmpInst = dyn_cast(PhiUser); if (!CmpInst) return; @@ -5201,8 +5222,10 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, ConstantInt *MaxCaseVal = CI->getCaseValue(); BasicBlock *CommonDest = nullptr; - typedef SmallVector, 4> ResultListTy; + + using ResultListTy = SmallVector, 4>; SmallDenseMap ResultLists; + SmallDenseMap DefaultResults; SmallDenseMap ResultTypes; SmallVector PHIs; @@ -5215,7 +5238,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, MaxCaseVal = CaseVal; // Resulting value at phi nodes for this case value. - typedef SmallVector, 4> ResultsTy; + using ResultsTy = SmallVector, 4>; ResultsTy Results; if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest, Results, DL, TTI)) @@ -5455,7 +5478,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, // First, transform the values such that they start at zero and ascend. int64_t Base = Values[0]; for (auto &V : Values) - V -= Base; + V -= (uint64_t)(Base); // Now we have signed numbers that have been shifted so that, given enough // precision, there are no negative values. Since the rest of the transform @@ -5520,12 +5543,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; Value *Cond = SI->getCondition(); if (SelectInst *Select = dyn_cast(Cond)) if (SimplifySwitchOnSelect(SI, Select)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; // If the block only contains the switch, see if we can fold the block // away into any preds. @@ -5535,22 +5558,22 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { ++BBI; if (SI == &*BBI) if (FoldValueComparisonIntoPredecessors(SI, Builder)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; } // Try to transform the switch into an icmp and a branch. if (TurnSwitchRangeIntoICmp(SI, Builder)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; // Remove unreachable cases. - if (EliminateDeadSwitchCases(SI, AC, DL)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + if (eliminateDeadSwitchCases(SI, Options.AC, DL)) + return simplifyCFG(BB, TTI, Options) | true; - if (SwitchToSelect(SI, Builder, AC, DL, TTI)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + if (switchToSelect(SI, Builder, DL, TTI)) + return simplifyCFG(BB, TTI, Options) | true; - if (ForwardSwitchConditionToPHI(SI)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) + return simplifyCFG(BB, TTI, Options) | true; // The conversion from switch to lookup tables results in difficult-to-analyze // code and makes pruning branches much harder. This is a problem if the @@ -5559,10 +5582,10 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { // optimisation pipeline. if (Options.ConvertSwitchToLookupTable && SwitchToLookupTable(SI, Builder, DL, TTI)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; if (ReduceSwitchRange(SI, Builder, DL, TTI)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; return false; } @@ -5600,7 +5623,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { if (SelectInst *SI = dyn_cast(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; } return Changed; } @@ -5642,8 +5665,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, LandingPadInst *LPad2 = dyn_cast(I); if (!LPad2 || !LPad2->isIdenticalTo(LPad)) continue; - for (++I; isa(I); ++I) { - } + for (++I; isa(I); ++I) + ; BranchInst *BI2 = dyn_cast(I); if (!BI2 || !BI2->isIdenticalTo(BI)) continue; @@ -5710,16 +5733,15 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, for (++I; isa(I); ++I) ; if (I->isTerminator() && - TryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, AC, - Options)) + tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder, DL, TTI, Options)) return true; } // See if we can merge an empty landing pad block with another which is // equivalent. if (LandingPadInst *LPad = dyn_cast(I)) { - for (++I; isa(I); ++I) { - } + for (++I; isa(I); ++I) + ; if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB)) return true; } @@ -5729,7 +5751,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; return false; } @@ -5754,7 +5776,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg intrinsics. @@ -5764,14 +5786,14 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { ++I; if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; } else if (&*I == cast(BI->getCondition())) { ++I; // Ignore dbg intrinsics. while (isa(I)) ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; } } @@ -5798,7 +5820,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { : ConstantInt::getFalse(BB->getContext()); BI->setCondition(CI); RecursivelyDeleteTriviallyDeadInstructions(OldCond); - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; } } } @@ -5807,7 +5829,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. if (FoldBranchToCommonDest(BI, Options.BonusInstThreshold)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if @@ -5816,7 +5838,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { if (HoistThenElseCodeToIf(BI, TTI)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. @@ -5824,7 +5846,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally @@ -5833,22 +5855,22 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; } // If this is a branch on a phi node in the current block, thread control // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast(BI->getCondition())) if (PN->getParent() == BI->getParent()) - if (FoldCondBranchOnPHI(BI, DL, AC)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + if (FoldCondBranchOnPHI(BI, DL, Options.AC)) + return simplifyCFG(BB, TTI, Options) | true; // Scan predecessor blocks for conditional branches. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; // Look for diamond patterns. if (MergeCondStores) @@ -5856,7 +5878,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) if (mergeConditionalStores(PBI, BI, DL)) - return SimplifyCFG(BB, TTI, AC, Options) | true; + return simplifyCFG(BB, TTI, Options) | true; return false; } @@ -5965,7 +5987,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. - // if (MergeBlockIntoPredecessor(BB)) return true; @@ -6012,10 +6033,10 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { return Changed; } -bool llvm::SimplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, - AssumptionCache *AC, const SimplifyCFGOptions &Options, +bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, + const SimplifyCFGOptions &Options, SmallPtrSetImpl *LoopHeaders) { - return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), AC, LoopHeaders, + return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders, Options) .run(BB); } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index cef8fe1a614ab..08b84927c674d 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -19,7 +19,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" @@ -55,15 +55,17 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; - + SCEVExpander &Rewriter; SmallVectorImpl &DeadInsts; bool Changed; public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, - LoopInfo *LI, SmallVectorImpl &Dead) - : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) { + LoopInfo *LI, SCEVExpander &Rewriter, + SmallVectorImpl &Dead) + : L(Loop), LI(LI), SE(SE), DT(DT), Rewriter(Rewriter), DeadInsts(Dead), + Changed(false) { assert(LI && "IV simplification requires LoopInfo"); } @@ -77,7 +79,7 @@ namespace { Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand); - bool foldConstantSCEV(Instruction *UseInst); + bool replaceIVUserWithLoopInvariant(Instruction *UseInst); bool eliminateOverflowIntrinsic(CallInst *CI); bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); @@ -536,28 +538,38 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst, return false; } -/// Replace the UseInst with a constant if possible -bool SimplifyIndvar::foldConstantSCEV(Instruction *I) { +static Instruction *GetLoopInvariantInsertPosition(Loop *L, Instruction *Hint) { + if (auto *BB = L->getLoopPreheader()) + return BB->getTerminator(); + + return Hint; +} + +/// Replace the UseInst with a constant if possible. +bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) { if (!SE->isSCEVable(I->getType())) return false; // Get the symbolic expression for this instruction. const SCEV *S = SE->getSCEV(I); - const Loop *L = LI->getLoopFor(I->getParent()); - S = SE->getSCEVAtScope(S, L); + if (!SE->isLoopInvariant(S, L)) + return false; - if (auto *C = dyn_cast(S)) { - I->replaceAllUsesWith(C->getValue()); - DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I - << " with constant: " << *C << '\n'); - ++NumFoldedUser; - Changed = true; - DeadInsts.emplace_back(I); - return true; - } + // Do not generate something ridiculous even if S is loop invariant. + if (Rewriter.isHighCostExpansion(S, L, I)) + return false; - return false; + auto *IP = GetLoopInvariantInsertPosition(L, I); + auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP); + + I->replaceAllUsesWith(Invariant); + DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I + << " with loop invariant: " << *S << '\n'); + ++NumFoldedUser; + Changed = true; + DeadInsts.emplace_back(I); + return true; } /// Eliminate any operation that SCEV can prove is an identity function. @@ -695,7 +707,7 @@ bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, /// Add all uses of Def to the current IV's worklist. static void pushIVUsers( - Instruction *Def, + Instruction *Def, Loop *L, SmallPtrSet &Simplified, SmallVectorImpl< std::pair > &SimpleIVUsers) { @@ -706,8 +718,19 @@ static void pushIVUsers( // Also ensure unique worklist users. // If Def is a LoopPhi, it may not be in the Simplified set, so check for // self edges first. - if (UI != Def && Simplified.insert(UI).second) - SimpleIVUsers.push_back(std::make_pair(UI, Def)); + if (UI == Def) + continue; + + // Only change the current Loop, do not change the other parts (e.g. other + // Loops). + if (!L->contains(UI)) + continue; + + // Do not push the same instruction more than once. + if (!Simplified.insert(UI).second) + continue; + + SimpleIVUsers.push_back(std::make_pair(UI, Def)); } } @@ -757,7 +780,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { // Push users of the current LoopPhi. In rare cases, pushIVUsers may be // called multiple times for the same LoopPhi. This is the proper thing to // do for loop header phis that use each other. - pushIVUsers(CurrIV, Simplified, SimpleIVUsers); + pushIVUsers(CurrIV, L, Simplified, SimpleIVUsers); while (!SimpleIVUsers.empty()) { std::pair UseOper = @@ -767,8 +790,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { // Bypass back edges to avoid extra work. if (UseInst == CurrIV) continue; - // Try to replace UseInst with a constant before any other simplifications - if (foldConstantSCEV(UseInst)) + // Try to replace UseInst with a loop invariant before any other + // simplifications. + if (replaceIVUserWithLoopInvariant(UseInst)) continue; Instruction *IVOperand = UseOper.second; @@ -784,7 +808,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { continue; if (eliminateIVUser(UseOper.first, IVOperand)) { - pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); continue; } @@ -794,7 +818,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { (isa(BO) && strengthenRightShift(BO, IVOperand))) { // re-queue uses of the now modified binary operator and fall // through to the checks that remain. - pushIVUsers(IVOperand, Simplified, SimpleIVUsers); + pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers); } } @@ -804,7 +828,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { continue; } if (isSimpleIVUser(UseOper.first, L, SE)) { - pushIVUsers(UseOper.first, Simplified, SimpleIVUsers); + pushIVUsers(UseOper.first, L, Simplified, SimpleIVUsers); } } } @@ -817,8 +841,9 @@ void IVVisitor::anchor() { } /// by using ScalarEvolution to analyze the IV's recurrence. bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl &Dead, - IVVisitor *V) { - SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead); + SCEVExpander &Rewriter, IVVisitor *V) { + SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Rewriter, + Dead); SIV.simplifyUsers(CurrIV, V); return SIV.hasChanged(); } @@ -827,9 +852,13 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, /// loop. This does not actually change or add IVs. bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, LoopInfo *LI, SmallVectorImpl &Dead) { + SCEVExpander Rewriter(*SE, SE->getDataLayout(), "indvars"); +#ifndef NDEBUG + Rewriter.setDebugType(DEBUG_TYPE); +#endif bool Changed = false; for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { - Changed |= simplifyUsersOfIV(cast(I), SE, DT, LI, Dead); + Changed |= simplifyUsersOfIV(cast(I), SE, DT, LI, Dead, Rewriter); } return Changed; } diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 2ea15f65cef9a..f3d4f2ef38d78 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -20,7 +20,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 22c078a8d2fae..33117659489cf 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -19,7 +19,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" @@ -485,8 +485,10 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, uint64_t LenTrue = GetStringLength(SI->getTrueValue(), CharSize); uint64_t LenFalse = GetStringLength(SI->getFalseValue(), CharSize); if (LenTrue && LenFalse) { - ORE.emit(OptimizationRemark("instcombine", "simplify-libcalls", CI) - << "folded strlen(select) to select of constants"); + ORE.emit([&]() { + return OptimizationRemark("instcombine", "simplify-libcalls", CI) + << "folded strlen(select) to select of constants"; + }); return B.CreateSelect(SI->getCondition(), ConstantInt::get(CI->getType(), LenTrue - 1), ConstantInt::get(CI->getType(), LenFalse - 1)); diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp index e9a368f4faa4e..07157069518ad 100644 --- a/lib/Transforms/Utils/SplitModule.cpp +++ b/lib/Transforms/Utils/SplitModule.cpp @@ -13,32 +13,51 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "split-module" - #include "llvm/Transforms/Utils/SplitModule.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalIndirectSymbol.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Module.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include +#include +#include +#include #include +#include +#include using namespace llvm; +#define DEBUG_TYPE "split-module" + namespace { -typedef EquivalenceClasses ClusterMapType; -typedef DenseMap ComdatMembersType; -typedef DenseMap ClusterIDMapType; -} + +using ClusterMapType = EquivalenceClasses; +using ComdatMembersType = DenseMap; +using ClusterIDMapType = DenseMap; + +} // end anonymous namespace static void addNonConstUser(ClusterMapType &GVtoClusterMap, const GlobalValue *GV, const User *U) { @@ -147,7 +166,8 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, for (unsigned i = 0; i < N; ++i) BalancinQueue.push(std::make_pair(i, 0)); - typedef std::pair SortType; + using SortType = std::pair; + SmallVector Sets; SmallPtrSet Visited; diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp index 20107553665f6..9da862db6a78a 100644 --- a/lib/Transforms/Utils/SymbolRewriter.cpp +++ b/lib/Transforms/Utils/SymbolRewriter.cpp @@ -1,4 +1,4 @@ -//===- SymbolRewriter.cpp - Symbol Rewriter ---------------------*- C++ -*-===// +//===- SymbolRewriter.cpp - Symbol Rewriter -------------------------------===// // // The LLVM Compiler Infrastructure // @@ -57,22 +57,37 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "symbol-rewriter" #include "llvm/Transforms/Utils/SymbolRewriter.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" -#include "llvm/IR/LegacyPassManager.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/YAMLParser.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include using namespace llvm; using namespace SymbolRewriter; +#define DEBUG_TYPE "symbol-rewriter" + static cl::list RewriteMapFiles("rewrite-map-file", cl::desc("Symbol Rewrite Map"), cl::value_desc("filename")); @@ -92,8 +107,9 @@ static void rewriteComdat(Module &M, GlobalObject *GO, } namespace { + template + ValueType *(Module::*Get)(StringRef) const> class ExplicitRewriteDescriptor : public RewriteDescriptor { public: const std::string Source; @@ -110,8 +126,10 @@ class ExplicitRewriteDescriptor : public RewriteDescriptor { } }; +} // end anonymous namespace + template + ValueType *(Module::*Get)(StringRef) const> bool ExplicitRewriteDescriptor::performOnModule(Module &M) { bool Changed = false; if (ValueType *S = (M.*Get)(Source)) { @@ -128,10 +146,12 @@ bool ExplicitRewriteDescriptor::performOnModule(Module &M) { return Changed; } +namespace { + template ::iterator> - (llvm::Module::*Iterator)()> + (Module::*Iterator)()> class PatternRewriteDescriptor : public RewriteDescriptor { public: const std::string Pattern; @@ -147,10 +167,12 @@ class PatternRewriteDescriptor : public RewriteDescriptor { } }; +} // end anonymous namespace + template ::iterator> - (llvm::Module::*Iterator)()> + (Module::*Iterator)()> bool PatternRewriteDescriptor:: performOnModule(Module &M) { bool Changed = false; @@ -178,55 +200,52 @@ performOnModule(Module &M) { return Changed; } +namespace { + /// Represents a rewrite for an explicitly named (function) symbol. Both the /// source function name and target function name of the transformation are /// explicitly spelt out. -typedef ExplicitRewriteDescriptor - ExplicitRewriteFunctionDescriptor; +using ExplicitRewriteFunctionDescriptor = + ExplicitRewriteDescriptor; /// Represents a rewrite for an explicitly named (global variable) symbol. Both /// the source variable name and target variable name are spelt out. This /// applies only to module level variables. -typedef ExplicitRewriteDescriptor - ExplicitRewriteGlobalVariableDescriptor; +using ExplicitRewriteGlobalVariableDescriptor = + ExplicitRewriteDescriptor; /// Represents a rewrite for an explicitly named global alias. Both the source /// and target name are explicitly spelt out. -typedef ExplicitRewriteDescriptor - ExplicitRewriteNamedAliasDescriptor; +using ExplicitRewriteNamedAliasDescriptor = + ExplicitRewriteDescriptor; /// Represents a rewrite for a regular expression based pattern for functions. /// A pattern for the function name is provided and a transformation for that /// pattern to determine the target function name create the rewrite rule. -typedef PatternRewriteDescriptor - PatternRewriteFunctionDescriptor; +using PatternRewriteFunctionDescriptor = + PatternRewriteDescriptor; /// Represents a rewrite for a global variable based upon a matching pattern. /// Each global variable matching the provided pattern will be transformed as /// described in the transformation pattern for the target. Applies only to /// module level variables. -typedef PatternRewriteDescriptor - PatternRewriteGlobalVariableDescriptor; +using PatternRewriteGlobalVariableDescriptor = + PatternRewriteDescriptor; /// PatternRewriteNamedAliasDescriptor - represents a rewrite for global /// aliases which match a given pattern. The provided transformation will be /// applied to each of the matching names. -typedef PatternRewriteDescriptor - PatternRewriteNamedAliasDescriptor; -} // namespace +using PatternRewriteNamedAliasDescriptor = + PatternRewriteDescriptor; + +} // end anonymous namespace bool RewriteMapParser::parse(const std::string &MapFile, RewriteDescriptorList *DL) { @@ -497,6 +516,7 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K, } namespace { + class RewriteSymbolsLegacyPass : public ModulePass { public: static char ID; // Pass identification, replacement for typeid @@ -510,9 +530,11 @@ class RewriteSymbolsLegacyPass : public ModulePass { RewriteSymbolPass Impl; }; +} // end anonymous namespace + char RewriteSymbolsLegacyPass::ID = 0; -RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID), Impl() { +RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) { initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -523,9 +545,7 @@ RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass( bool RewriteSymbolsLegacyPass::runOnModule(Module &M) { return Impl.runImpl(M); } -} -namespace llvm { PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) { if (!runImpl(M)) return PreservedAnalyses::all(); @@ -550,7 +570,6 @@ void RewriteSymbolPass::loadAndParseMapFiles() { for (const auto &MapFile : MapFiles) Parser.parse(MapFile, &Descriptors); } -} INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols", false, false) diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 930972924c3c0..8c9ecbc3503e2 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -13,17 +13,36 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include +#include +#include +#include + using namespace llvm; // Out of line method to get vtable etc for class. @@ -85,7 +104,6 @@ struct MappingContext { : VM(&VM), Materializer(Materializer) {} }; -class MDNodeMapper; class Mapper { friend class MDNodeMapper; @@ -175,7 +193,7 @@ class MDNodeMapper { /// Data about a node in \a UniquedGraph. struct Data { bool HasChanged = false; - unsigned ID = ~0u; + unsigned ID = std::numeric_limits::max(); TempMDNode Placeholder; }; @@ -316,7 +334,7 @@ class MDNodeMapper { void remapOperands(MDNode &N, OperandMapper mapOperand); }; -} // end namespace +} // end anonymous namespace Value *Mapper::mapValue(const Value *V) { ValueToValueMapTy::iterator I = getVM().find(V); @@ -579,6 +597,7 @@ void MDNodeMapper::remapOperands(MDNode &N, OperandMapper mapOperand) { } namespace { + /// An entry in the worklist for the post-order traversal. struct POTWorklistEntry { MDNode *N; ///< Current node. @@ -590,7 +609,8 @@ struct POTWorklistEntry { POTWorklistEntry(MDNode &N) : N(&N), Op(N.op_begin()) {} }; -} // end namespace + +} // end anonymous namespace bool MDNodeMapper::createPOT(UniquedGraph &G, const MDNode &FirstN) { assert(G.Info.empty() && "Expected a fresh traversal"); @@ -653,7 +673,7 @@ void MDNodeMapper::UniquedGraph::propagateChanges() { if (D.HasChanged) continue; - if (none_of(N->operands(), [&](const Metadata *Op) { + if (llvm::none_of(N->operands(), [&](const Metadata *Op) { auto Where = Info.find(Op); return Where != Info.end() && Where->second.HasChanged; })) @@ -752,10 +772,11 @@ struct MapMetadataDisabler { MapMetadataDisabler(ValueToValueMapTy &VM) : VM(VM) { VM.disableMapMetadata(); } + ~MapMetadataDisabler() { VM.enableMapMetadata(); } }; -} // end namespace +} // end anonymous namespace Optional Mapper::mapSimpleMetadata(const Metadata *MD) { // If the value already exists in the map, use it. @@ -1037,11 +1058,13 @@ class FlushingMapper { explicit FlushingMapper(void *pImpl) : M(*getAsMapper(pImpl)) { assert(!M.hasWorkToDo() && "Expected to be flushed"); } + ~FlushingMapper() { M.flush(); } + Mapper *operator->() const { return &M; } }; -} // end namespace +} // end anonymous namespace ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags, ValueMapTypeRemapper *TypeMapper, diff --git a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index 9cf66382b5817..2ec4f6ca9e7f0 100644 --- a/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -1,4 +1,4 @@ -//===----- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer ----------===// +//===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===// // // The LLVM Compiler Infrastructure // @@ -6,47 +6,66 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Triple.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Vectorize.h" +#include +#include +#include +#include +#include using namespace llvm; #define DEBUG_TYPE "load-store-vectorizer" + STATISTIC(NumVectorInstructions, "Number of vector accesses generated"); STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized"); -namespace { - // FIXME: Assuming stack alignment of 4 is always good enough static const unsigned StackAdjustedAlignment = 4; -typedef SmallVector InstrList; -typedef MapVector InstrListMap; + +namespace { + +using InstrList = SmallVector; +using InstrListMap = MapVector; class Vectorizer { Function &F; @@ -163,7 +182,10 @@ class LoadStoreVectorizer : public FunctionPass { AU.setPreservesCFG(); } }; -} + +} // end anonymous namespace + +char LoadStoreVectorizer::ID = 0; INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE, "Vectorize load and Store instructions", false, false) @@ -175,8 +197,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE, "Vectorize load and store instructions", false, false) -char LoadStoreVectorizer::ID = 0; - Pass *llvm::createLoadStoreVectorizerPass() { return new LoadStoreVectorizer(); } @@ -605,7 +625,7 @@ Vectorizer::collectInstructions(BasicBlock *BB) { continue; // Make sure all the users of a vector are constant-index extracts. - if (isa(Ty) && !all_of(LI->users(), [](const User *U) { + if (isa(Ty) && !llvm::all_of(LI->users(), [](const User *U) { const ExtractElementInst *EEI = dyn_cast(U); return EEI && isa(EEI->getOperand(1)); })) @@ -614,7 +634,6 @@ Vectorizer::collectInstructions(BasicBlock *BB) { // Save the load locations. Value *ObjPtr = GetUnderlyingObject(Ptr, DL); LoadRefs[ObjPtr].push_back(LI); - } else if (StoreInst *SI = dyn_cast(&I)) { if (!SI->isSimple()) continue; @@ -639,7 +658,7 @@ Vectorizer::collectInstructions(BasicBlock *BB) { if (TySize > VecRegSize / 2) continue; - if (isa(Ty) && !all_of(SI->users(), [](const User *U) { + if (isa(Ty) && !llvm::all_of(SI->users(), [](const User *U) { const ExtractElementInst *EEI = dyn_cast(U); return EEI && isa(EEI->getOperand(1)); })) diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 2b3ea8bfdbf7e..0e380322c0033 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -48,63 +48,95 @@ #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "VPlan.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" -#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/Verifier.h" #include "llvm/Pass.h" -#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" -#include "llvm/Transforms/Vectorize.h" #include +#include +#include +#include #include -#include +#include +#include +#include +#include #include +#include +#include using namespace llvm; -using namespace llvm::PatternMatch; #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME @@ -247,15 +279,14 @@ createMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, namespace { -// Forward declarations. -class LoopVectorizeHints; class LoopVectorizationLegality; class LoopVectorizationCostModel; class LoopVectorizationRequirements; class VPInterleaveRecipe; class VPReplicateRecipe; class VPWidenIntOrFpInductionRecipe; -class VPWidenRecipe; + +} // end anonymous namespace /// Returns true if the given loop body has a cycle, excluding the loop /// itself. @@ -330,7 +361,6 @@ static unsigned getMemInstAddressSpace(Value *I) { /// type is irregular if its allocated size doesn't equal the store size of an /// element of the corresponding vector type at the given vectorization factor. static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) { - // Determine if an array of VF elements of type Ty is "bitcast compatible" // with a vector. if (VF > 1) { @@ -368,9 +398,8 @@ static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) { : ConstantFP::get(Ty, C); } -} // end anonymous namespace - namespace llvm { + /// InnerLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). /// This class performs the widening of scalars into vectors, or multiple @@ -396,10 +425,9 @@ class InnerLoopVectorizer { LoopVectorizationCostModel *CM) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor), - Builder(PSE.getSE()->getContext()), Induction(nullptr), - OldInduction(nullptr), VectorLoopValueMap(UnrollFactor, VecWidth), - TripCount(nullptr), VectorTripCount(nullptr), Legal(LVL), Cost(CM), - AddedSafetyChecks(false) {} + Builder(PSE.getSE()->getContext()), + VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM) {} + virtual ~InnerLoopVectorizer() = default; /// Create a new empty loop. Unlink the old loop and connect the new one. /// Return the pre-header block of the new loop. @@ -414,12 +442,10 @@ class InnerLoopVectorizer { // Return true if any runtime check is added. bool areSafetyChecksAdded() { return AddedSafetyChecks; } - virtual ~InnerLoopVectorizer() {} - /// A type for vectorized values in the new loop. Each value from the /// original loop, when vectorized, is represented by UF vector values in the /// new unrolled loop, where UF is the unroll factor. - typedef SmallVector VectorParts; + using VectorParts = SmallVector; /// A helper function that computes the predicate of the block BB, assuming /// that the header block of the loop is set to True. It returns the *entry* @@ -479,20 +505,22 @@ class InnerLoopVectorizer { void vectorizeInterleaveGroup(Instruction *Instr); protected: + friend class LoopVectorizationPlanner; + /// A small list of PHINodes. - typedef SmallVector PhiVector; + using PhiVector = SmallVector; /// A type for scalarized values in the new loop. Each value from the /// original loop, when scalarized, is represented by UF x VF scalar values /// in the new unrolled loop, where UF is the unroll factor and VF is the /// vectorization factor. - typedef SmallVector, 2> ScalarParts; + using ScalarParts = SmallVector, 2>; // When we if-convert we need to create edge masks. We have to cache values // so that we don't end up with exponential recursion/IR. - typedef DenseMap, VectorParts> - EdgeMaskCacheTy; - typedef DenseMap BlockMaskCacheTy; + using EdgeMaskCacheTy = + DenseMap, VectorParts>; + using BlockMaskCacheTy = DenseMap; /// Set up the values of the IVs correctly when exiting the vector loop. void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, @@ -593,9 +621,11 @@ class InnerLoopVectorizer { /// Emit a bypass check to see if the vector trip count is zero, including if /// it overflows. void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass); + /// Emit a bypass check to see if all of the SCEV assumptions we've /// had to make are correct. void emitSCEVChecks(Loop *L, BasicBlock *Bypass); + /// Emit bypass checks to check any memory assumptions we may have made. void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass); @@ -623,22 +653,30 @@ class InnerLoopVectorizer { /// The original loop. Loop *OrigLoop; + /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies /// dynamic knowledge to simplify SCEV expressions and converts them to a /// more usable form. PredicatedScalarEvolution &PSE; + /// Loop Info. LoopInfo *LI; + /// Dominator Tree. DominatorTree *DT; + /// Alias Analysis. AliasAnalysis *AA; + /// Target Library Info. const TargetLibraryInfo *TLI; + /// Target Transform Info. const TargetTransformInfo *TTI; + /// Assumption Cache. AssumptionCache *AC; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; @@ -664,23 +702,30 @@ class InnerLoopVectorizer { /// The vector-loop preheader. BasicBlock *LoopVectorPreHeader; + /// The scalar-loop preheader. BasicBlock *LoopScalarPreHeader; + /// Middle Block between the vector and the scalar. BasicBlock *LoopMiddleBlock; + /// The ExitBlock of the scalar loop. BasicBlock *LoopExitBlock; + /// The vector loop body. BasicBlock *LoopVectorBody; + /// The scalar loop body. BasicBlock *LoopScalarBody; + /// A list of all bypass blocks. The first block is the entry of the loop. SmallVector LoopBypassBlocks; /// The new Induction variable which was added to the new block. - PHINode *Induction; + PHINode *Induction = nullptr; + /// The induction variable of the old basic block. - PHINode *OldInduction; + PHINode *OldInduction = nullptr; /// Maps values from the original loop to their corresponding values in the /// vectorized loop. A key value can map to either vector values, scalar @@ -690,12 +735,15 @@ class InnerLoopVectorizer { /// Store instructions that were predicated. SmallVector PredicatedInstructions; + EdgeMaskCacheTy EdgeMaskCache; BlockMaskCacheTy BlockMaskCache; + /// Trip count of the original loop. - Value *TripCount; + Value *TripCount = nullptr; + /// Trip count of the widened loop (TripCount - TripCount % (VF*UF)) - Value *VectorTripCount; + Value *VectorTripCount = nullptr; /// The legality analysis. LoopVectorizationLegality *Legal; @@ -704,13 +752,11 @@ class InnerLoopVectorizer { LoopVectorizationCostModel *Cost; // Record whether runtime checks are added. - bool AddedSafetyChecks; + bool AddedSafetyChecks = false; // Holds the end values for each induction variable. We save the end values // so we can later fix-up the external users of the induction variables. DenseMap IVEndValues; - - friend class LoopVectorizationPlanner; }; class InnerLoopUnroller : public InnerLoopVectorizer { @@ -733,6 +779,8 @@ class InnerLoopUnroller : public InnerLoopVectorizer { Value *reverseVector(Value *Vec) override; }; +} // end namespace llvm + /// \brief Look for a meaningful debug location on the instruction or it's /// operands. static Instruction *getDebugLocFromInstOrOperands(Instruction *I) { @@ -802,8 +850,6 @@ void InnerLoopVectorizer::addMetadata(ArrayRef To, } } -} // namespace llvm - namespace { /// \brief The group of interleaved loads/stores sharing the same stride and @@ -835,7 +881,7 @@ namespace { class InterleaveGroup { public: InterleaveGroup(Instruction *Instr, int Stride, unsigned Align) - : Align(Align), SmallestKey(0), LargestKey(0), InsertPos(Instr) { + : Align(Align), InsertPos(Instr) { assert(Align && "The alignment should be non-zero"); Factor = std::abs(Stride); @@ -913,8 +959,8 @@ class InterleaveGroup { bool Reverse; unsigned Align; DenseMap Members; - int SmallestKey; - int LargestKey; + int SmallestKey = 0; + int LargestKey = 0; // To avoid breaking dependences, vectorized instructions of an interleave // group should be inserted at either the first load or the last store in @@ -942,8 +988,7 @@ class InterleavedAccessInfo { public: InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI) - : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(nullptr), - RequiresScalarEpilogue(false) {} + : PSE(PSE), TheLoop(L), DT(DT), LI(LI) {} ~InterleavedAccessInfo() { SmallSet DelSet; @@ -985,15 +1030,16 @@ class InterleavedAccessInfo { /// The interleaved access analysis can also add new predicates (for example /// by versioning strides of pointers). PredicatedScalarEvolution &PSE; + Loop *TheLoop; DominatorTree *DT; LoopInfo *LI; - const LoopAccessInfo *LAI; + const LoopAccessInfo *LAI = nullptr; /// True if the loop may contain non-reversed interleaved groups with /// out-of-bounds accesses. We ensure we don't speculatively access memory /// out-of-bounds by executing at least one scalar epilogue iteration. - bool RequiresScalarEpilogue; + bool RequiresScalarEpilogue = false; /// Holds the relationships between the members and the interleave group. DenseMap InterleaveGroupMap; @@ -1004,21 +1050,26 @@ class InterleavedAccessInfo { /// \brief The descriptor for a strided memory access. struct StrideDescriptor { + StrideDescriptor() = default; StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size, unsigned Align) : Stride(Stride), Scev(Scev), Size(Size), Align(Align) {} - StrideDescriptor() = default; - // The access's stride. It is negative for a reverse access. int64_t Stride = 0; - const SCEV *Scev = nullptr; // The scalar expression of this access - uint64_t Size = 0; // The size of the memory object. - unsigned Align = 0; // The alignment of this access. + + // The scalar expression of this access. + const SCEV *Scev = nullptr; + + // The size of the memory object. + uint64_t Size = 0; + + // The alignment of this access. + unsigned Align = 0; }; /// \brief A type for holding instructions and their stride descriptors. - typedef std::pair StrideEntry; + using StrideEntry = std::pair; /// \brief Create a new interleave group with the given instruction \p Instr, /// stride \p Stride and alignment \p Align. @@ -1069,7 +1120,6 @@ class InterleavedAccessInfo { /// not necessary or is prevented because \p A and \p B may be dependent. bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A, StrideEntry *B) const { - // Code motion for interleaved accesses can potentially hoist strided loads // and sink strided stores. The code below checks the legality of the // following two conditions: @@ -1162,18 +1212,21 @@ class LoopVectorizeHints { /// Vectorization width. Hint Width; + /// Vectorization interleave factor. Hint Interleave; + /// Vectorization forced Hint Force; /// Already Vectorized Hint IsVectorized; + /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } /// True if there is any unsafe math in the loop. - bool PotentiallyUnsafe; + bool PotentiallyUnsafe = false; public: enum ForceKind { @@ -1188,8 +1241,7 @@ class LoopVectorizeHints { HK_WIDTH), Interleave("interleave.count", DisableInterleaving, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), - IsVectorized("isvectorized", 0, HK_ISVECTORIZED), - PotentiallyUnsafe(false), TheLoop(L), ORE(ORE) { + IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); @@ -1248,25 +1300,30 @@ class LoopVectorizeHints { /// Dumps all the hint information. void emitRemarkWithHints() const { using namespace ore; - if (Force.Value == LoopVectorizeHints::FK_Disabled) - ORE.emit(OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", + + ORE.emit([&]() { + if (Force.Value == LoopVectorizeHints::FK_Disabled) + return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", TheLoop->getStartLoc(), TheLoop->getHeader()) - << "loop not vectorized: vectorization is explicitly disabled"); - else { - OptimizationRemarkMissed R(LV_NAME, "MissedDetails", - TheLoop->getStartLoc(), TheLoop->getHeader()); - R << "loop not vectorized"; - if (Force.Value == LoopVectorizeHints::FK_Enabled) { - R << " (Force=" << NV("Force", true); - if (Width.Value != 0) - R << ", Vector Width=" << NV("VectorWidth", Width.Value); - if (Interleave.Value != 0) - R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); - R << ")"; + << "loop not vectorized: vectorization is explicitly disabled"; + else { + OptimizationRemarkMissed R(LV_NAME, "MissedDetails", + TheLoop->getStartLoc(), + TheLoop->getHeader()); + R << "loop not vectorized"; + if (Force.Value == LoopVectorizeHints::FK_Enabled) { + R << " (Force=" << NV("Force", true); + if (Width.Value != 0) + R << ", Vector Width=" << NV("VectorWidth", Width.Value); + if (Interleave.Value != 0) + R << ", Interleave Count=" + << NV("InterleaveCount", Interleave.Value); + R << ")"; + } + return R; } - ORE.emit(R); - } + }); } unsigned getWidth() const { return Width.Value; } @@ -1390,7 +1447,7 @@ class LoopVectorizeHints { /// Sets current hints into loop metadata, keeping other values intact. void writeHintsToMetadata(ArrayRef HintTypes) { - if (HintTypes.size() == 0) + if (HintTypes.empty()) return; // Reserve the first element to LoopID (see below). @@ -1426,6 +1483,8 @@ class LoopVectorizeHints { OptimizationRemarkEmitter &ORE; }; +} // end anonymous namespace + static void emitMissedWarning(Function *F, Loop *L, const LoopVectorizeHints &LH, OptimizationRemarkEmitter *ORE) { @@ -1447,6 +1506,8 @@ static void emitMissedWarning(Function *F, Loop *L, } } +namespace { + /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. /// This class does not look at the profitability of vectorization, only the @@ -1469,22 +1530,20 @@ class LoopVectorizationLegality { std::function *GetLAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H) - : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT), - GetLAA(GetLAA), LAI(nullptr), ORE(ORE), InterleaveInfo(PSE, L, DT, LI), - PrimaryInduction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false), - Requirements(R), Hints(H) {} + : TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT), GetLAA(GetLAA), + ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. - typedef DenseMap ReductionList; + using ReductionList = DenseMap; /// InductionList saves induction variables and maps them to the /// induction descriptor. - typedef MapVector InductionList; + using InductionList = MapVector; /// RecurrenceSet contains the phi nodes that are recurrences other than /// inductions and reductions. - typedef SmallPtrSet RecurrenceSet; + using RecurrenceSet = SmallPtrSet; /// Returns true if it is legal to vectorize this loop. /// This does not mean that it is profitable to vectorize this @@ -1571,21 +1630,25 @@ class LoopVectorizationLegality { bool isLegalMaskedStore(Type *DataType, Value *Ptr) { return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType); } + /// Returns true if the target machine supports masked load operation /// for the given \p DataType and kind of access to \p Ptr. bool isLegalMaskedLoad(Type *DataType, Value *Ptr) { return isConsecutivePtr(Ptr) && TTI->isLegalMaskedLoad(DataType); } + /// Returns true if the target machine supports masked scatter operation /// for the given \p DataType. bool isLegalMaskedScatter(Type *DataType) { return TTI->isLegalMaskedScatter(DataType); } + /// Returns true if the target machine supports masked gather operation /// for the given \p DataType. bool isLegalMaskedGather(Type *DataType) { return TTI->isLegalMaskedGather(DataType); } + /// Returns true if the target machine can represent \p V as a masked gather /// or scatter operation. bool isLegalGatherOrScatter(Value *V) { @@ -1601,6 +1664,7 @@ class LoopVectorizationLegality { /// Returns true if vector representation of the instruction \p I /// requires mask. bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); } + unsigned getNumStores() const { return LAI->getNumStores(); } unsigned getNumLoads() const { return LAI->getNumLoads(); } unsigned getNumPredStores() const { return NumPredStores; } @@ -1666,27 +1730,34 @@ class LoopVectorizationLegality { return LAI ? &LAI->getSymbolicStrides() : nullptr; } - unsigned NumPredStores; + unsigned NumPredStores = 0; /// The loop that we evaluate. Loop *TheLoop; + /// A wrapper around ScalarEvolution used to add runtime SCEV checks. /// Applies dynamic knowledge to simplify SCEV expressions in the context /// of existing SCEV assumptions. The analysis will also add a minimal set /// of new predicates if this is required to enable vectorization and /// unrolling. PredicatedScalarEvolution &PSE; + /// Target Library Info. TargetLibraryInfo *TLI; + /// Target Transform Info const TargetTransformInfo *TTI; + /// Dominator Tree. DominatorTree *DT; + // LoopAccess analysis. std::function *GetLAA; + // And the loop-accesses info corresponding to this loop. This pointer is // null until canVectorizeMemory sets it up. - const LoopAccessInfo *LAI; + const LoopAccessInfo *LAI = nullptr; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; @@ -1698,27 +1769,32 @@ class LoopVectorizationLegality { /// Holds the primary induction variable. This is the counter of the /// loop. - PHINode *PrimaryInduction; + PHINode *PrimaryInduction = nullptr; + /// Holds the reduction variables. ReductionList Reductions; + /// Holds all of the induction variables that we found in the loop. /// Notice that inductions don't need to start at zero and that induction /// variables can be pointers. InductionList Inductions; + /// Holds the phi nodes that are first-order recurrences. RecurrenceSet FirstOrderRecurrences; + /// Holds instructions that need to sink past other instructions to handle /// first-order recurrences. DenseMap SinkAfter; + /// Holds the widest induction type encountered. - Type *WidestIndTy; + Type *WidestIndTy = nullptr; /// Allowed outside users. This holds the induction and reduction /// vars which can be accessed from outside the loop. SmallPtrSet AllowedExit; /// Can we assume the absence of NaNs. - bool HasFunNoNaNAttr; + bool HasFunNoNaNAttr = false; /// Vectorization requirements that will go through late-evaluation. LoopVectorizationRequirements *Requirements; @@ -1756,9 +1832,13 @@ class LoopVectorizationCostModel { /// Information about vectorization costs struct VectorizationFactor { - unsigned Width; // Vector width with best cost - unsigned Cost; // Cost of the loop with that width + // Vector width with best cost + unsigned Width; + + // Cost of the loop with that width + unsigned Cost; }; + /// \return The most profitable vectorization factor and the cost of that VF. /// This method checks every power of two up to MaxVF. If UserVF is not ZERO /// then this vectorization factor will be selected if vectorization is @@ -1797,8 +1877,10 @@ class LoopVectorizationCostModel { struct RegisterUsage { /// Holds the number of loop invariant values that are used in the loop. unsigned LoopInvariantRegs; + /// Holds the maximum number of concurrent live intervals in the loop. unsigned MaxLocalUsers; + /// Holds the number of instructions in the loop. unsigned NumInstructions; }; @@ -1911,7 +1993,6 @@ class LoopVectorizationCostModel { /// is an induction variable. Such a truncate will be removed by adding a new /// induction variable with the destination type. bool isOptimizableIVTruncate(Instruction *I, unsigned VF) { - // If the instruction is not a truncate, return false. auto *Trunc = dyn_cast(I); if (!Trunc) @@ -1962,7 +2043,7 @@ class LoopVectorizationCostModel { /// is /// false, then all operations will be scalarized (i.e. no vectorization has /// actually taken place). - typedef std::pair VectorizationCostTy; + using VectorizationCostTy = std::pair; /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different @@ -2019,7 +2100,7 @@ class LoopVectorizationCostModel { /// A type representing the costs for instructions if they were to be /// scalarized rather than vectorized. The entries are Instruction-Cost /// pairs. - typedef DenseMap ScalarCostsTy; + using ScalarCostsTy = DenseMap; /// A set containing all BasicBlocks that are known to present after /// vectorization as a predicated block. @@ -2071,37 +2152,47 @@ class LoopVectorizationCostModel { /// Keeps cost model vectorization decision and cost for instructions. /// Right now it is used for memory instructions only. - typedef DenseMap, - std::pair> - DecisionList; + using DecisionList = DenseMap, + std::pair>; DecisionList WideningDecisions; public: /// The loop that we evaluate. Loop *TheLoop; + /// Predicated scalar evolution analysis. PredicatedScalarEvolution &PSE; + /// Loop Info analysis. LoopInfo *LI; + /// Vectorization legality. LoopVectorizationLegality *Legal; + /// Vector target information. const TargetTransformInfo &TTI; + /// Target Library Info. const TargetLibraryInfo *TLI; + /// Demanded bits analysis. DemandedBits *DB; + /// Assumption cache. AssumptionCache *AC; + /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; const Function *TheFunction; + /// Loop Vectorize Hint. const LoopVectorizeHints *Hints; + /// Values to ignore in the cost model. SmallPtrSet ValuesToIgnore; + /// Values to ignore in the cost model when VF > 1. SmallPtrSet VecValuesToIgnore; }; @@ -2109,6 +2200,7 @@ class LoopVectorizationCostModel { } // end anonymous namespace namespace llvm { + /// InnerLoopVectorizer vectorizes loops which contain only one basic /// LoopVectorizationPlanner - drives the vectorization process after having /// passed Legality checks. @@ -2137,16 +2229,15 @@ class LoopVectorizationPlanner { SmallVector VPlans; - unsigned BestVF; - unsigned BestUF; + unsigned BestVF = 0; + unsigned BestUF = 0; public: LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM) - : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), - BestVF(0), BestUF(0) {} + : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM) {} ~LoopVectorizationPlanner() { while (!VPlans.empty()) { @@ -2182,8 +2273,11 @@ class LoopVectorizationPlanner { /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 9) = {1, 2, 4, 8} struct VFRange { - const unsigned Start; // A power of 2. - unsigned End; // Need not be a power of 2. If End <= Start range is empty. + // A power of 2. + const unsigned Start; + + // Need not be a power of 2. If End <= Start range is empty. + unsigned End; }; /// Test a \p Predicate on a \p Range of VF's. Return the value of applying @@ -2215,14 +2309,13 @@ class LoopVectorizationPlanner { VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I, VFRange &Range); - /// Check if \I can be widened within the given VF \p Range. If \I can be - /// widened for Range.Start, extend \p LastWidenRecipe to include \p I if - /// possible or else build a new VPWidenRecipe for it, and return the - /// VPWidenRecipe that includes \p I. If \p I cannot be widened for - /// Range.Start \return null. Range.End may be decreased to ensure same - /// decision from \p Range.Start to \p Range.End. - VPWidenRecipe *tryToWiden(Instruction *I, VPWidenRecipe *LastWidenRecipe, - VFRange &Range); + /// Check if \p I can be widened within the given VF \p Range. If \p I can be + /// widened for \p Range.Start, check if the last recipe of \p VPBB can be + /// extended to include \p I or else build a new VPWidenRecipe for it and + /// append it to \p VPBB. Return true if \p I can be widened for Range.Start, + /// false otherwise. Range.End may be decreased to ensure same decision from + /// \p Range.Start to \p Range.End. + bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range); /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it /// is predicated. \return \p VPBB augmented with this new recipe if \p I is @@ -2245,7 +2338,7 @@ class LoopVectorizationPlanner { VPlan *buildVPlan(VFRange &Range); }; -} // namespace llvm +} // end namespace llvm namespace { @@ -2263,8 +2356,7 @@ namespace { /// followed by a non-expert user. class LoopVectorizationRequirements { public: - LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) - : NumRuntimePointerChecks(0), UnsafeAlgebraInst(nullptr), ORE(ORE) {} + LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {} void addUnsafeAlgebraInst(Instruction *I) { // First unsafe algebra instruction. @@ -2278,12 +2370,14 @@ class LoopVectorizationRequirements { const char *PassName = Hints.vectorizeAnalysisPassName(); bool Failed = false; if (UnsafeAlgebraInst && !Hints.allowReordering()) { - ORE.emit( - OptimizationRemarkAnalysisFPCommute(PassName, "CantReorderFPOps", - UnsafeAlgebraInst->getDebugLoc(), - UnsafeAlgebraInst->getParent()) - << "loop not vectorized: cannot prove it is safe to reorder " - "floating-point operations"); + ORE.emit([&]() { + return OptimizationRemarkAnalysisFPCommute( + PassName, "CantReorderFPOps", + UnsafeAlgebraInst->getDebugLoc(), + UnsafeAlgebraInst->getParent()) + << "loop not vectorized: cannot prove it is safe to reorder " + "floating-point operations"; + }); Failed = true; } @@ -2294,11 +2388,13 @@ class LoopVectorizationRequirements { NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; if ((ThresholdReached && !Hints.allowReordering()) || PragmaThresholdReached) { - ORE.emit(OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", + ORE.emit([&]() { + return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", L->getStartLoc(), L->getHeader()) << "loop not vectorized: cannot prove it is safe to reorder " - "memory operations"); + "memory operations"; + }); DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); Failed = true; } @@ -2307,13 +2403,15 @@ class LoopVectorizationRequirements { } private: - unsigned NumRuntimePointerChecks; - Instruction *UnsafeAlgebraInst; + unsigned NumRuntimePointerChecks = 0; + Instruction *UnsafeAlgebraInst = nullptr; /// Interface to emit optimization remarks. OptimizationRemarkEmitter &ORE; }; +} // end anonymous namespace + static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl &V) { if (L.empty()) { if (!hasCyclesInLoopBody(L)) @@ -2324,11 +2422,15 @@ static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl &V) { addAcyclicInnerLoop(*InnerL, V); } +namespace { + /// The LoopVectorize Pass. struct LoopVectorize : public FunctionPass { /// Pass identification, replacement for typeid static char ID; + LoopVectorizePass Impl; + explicit LoopVectorize(bool NoUnrolling = false, bool AlwaysVectorize = true) : FunctionPass(ID) { Impl.DisableUnrolling = NoUnrolling; @@ -2336,8 +2438,6 @@ struct LoopVectorize : public FunctionPass { initializeLoopVectorizePass(*PassRegistry::getPassRegistry()); } - LoopVectorizePass Impl; - bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; @@ -2486,11 +2586,10 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const { auto *I = cast(U); return (OrigLoop->contains(I) && shouldScalarizeInstruction(I)); }; - return any_of(IV->users(), isScalarInst); + return llvm::any_of(IV->users(), isScalarInst); } void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) { - assert((IV->getType()->isIntegerTy() || IV != OldInduction) && "Primary induction variable must have an integer type"); @@ -2644,7 +2743,6 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step, void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal, const InductionDescriptor &ID) { - // We shouldn't have to build scalar steps if we aren't vectorizing. assert(VF > 1 && "VF should be greater than one"); @@ -2683,7 +2781,6 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step, } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { - const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); @@ -2714,7 +2811,6 @@ Value *InnerLoopVectorizer::getOrCreateVectorValue(Value *V, unsigned Part) { // instead. If it has been scalarized, and we actually need the value in // vector form, we will construct the vector values on demand. if (VectorLoopValueMap.hasAnyScalarValue(V)) { - Value *ScalarValue = VectorLoopValueMap.getScalarValue(V, {Part, 0}); // If we've scalarized a value, that value should be an instruction. @@ -2912,7 +3008,6 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) { // Vectorize the interleaved load group. if (isa(Instr)) { - // For each unroll part, create a wide load for the group. SmallVector NewLoads; for (unsigned Part = 0; Part < UF; Part++) { @@ -3645,22 +3740,27 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, } namespace { + struct CSEDenseMapInfo { static bool canHandle(const Instruction *I) { return isa(I) || isa(I) || isa(I) || isa(I); } + static inline Instruction *getEmptyKey() { return DenseMapInfo::getEmptyKey(); } + static inline Instruction *getTombstoneKey() { return DenseMapInfo::getTombstoneKey(); } + static unsigned getHashValue(const Instruction *I) { assert(canHandle(I) && "Unknown instruction!"); return hash_combine(I->getOpcode(), hash_combine_range(I->value_op_begin(), I->value_op_end())); } + static bool isEqual(const Instruction *LHS, const Instruction *RHS) { if (LHS == getEmptyKey() || RHS == getEmptyKey() || LHS == getTombstoneKey() || RHS == getTombstoneKey()) @@ -3668,7 +3768,8 @@ struct CSEDenseMapInfo { return LHS->isIdenticalTo(RHS); } }; -} + +} // end anonymous namespace ///\brief Perform cse of induction variable instructions. static void cse(BasicBlock *BB) { @@ -3800,7 +3901,6 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() { // For every instruction `I` in MinBWs, truncate the operands, create a // truncated version of `I` and reextend its result. InstCombine runs // later and will remove any ext/trunc pairs. - // SmallPtrSet Erased; for (const auto &KV : Cost->getMinimalBitwidths()) { // If the value wasn't vectorized, we must maintain the original scalar @@ -3977,7 +4077,6 @@ void InnerLoopVectorizer::fixCrossIterationPHIs() { } void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) { - // This is the second phase of vectorizing first-order recurrences. An // overview of the transformation is described below. Suppose we have the // following loop. @@ -4235,7 +4334,8 @@ void InnerLoopVectorizer::fixReduction(PHINode *Phi) { // entire expression in the smaller type. if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) { Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF); - Builder.SetInsertPoint(LoopVectorBody->getTerminator()); + Builder.SetInsertPoint( + LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator()); VectorParts RdxParts(UF); for (unsigned Part = 0; Part < UF; ++Part) { RdxParts[Part] = VectorLoopValueMap.getVectorValue(LoopExitInst, Part); @@ -4341,7 +4441,6 @@ void InnerLoopVectorizer::fixLCSSAPHIs() { } void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { - // The basic block and loop containing the predicated instruction. auto *PredBB = PredInst->getParent(); auto *VectorLoop = LI->getLoopFor(PredBB); @@ -4370,7 +4469,6 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { // through the worklist doesn't sink a single instruction. bool Changed; do { - // Add the instructions that need to be reanalyzed to the worklist, and // reset the changed indicator. Worklist.insert(InstsToReanalyze.begin(), InstsToReanalyze.end()); @@ -4389,7 +4487,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { // It's legal to sink the instruction if all its uses occur in the // predicated block. Otherwise, there's nothing to do yet, and we may // need to reanalyze the instruction. - if (!all_of(I->uses(), isBlockOfUsePredicated)) { + if (!llvm::all_of(I->uses(), isBlockOfUsePredicated)) { InstsToReanalyze.push_back(I); continue; } @@ -4635,7 +4733,6 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) { // values in the vector mapping with initVector, as we do for other // instructions. for (unsigned Part = 0; Part < UF; ++Part) { - // The pointer operand of the new GEP. If it's loop-invariant, we // won't broadcast it. auto *Ptr = @@ -5168,7 +5265,6 @@ void LoopVectorizationLegality::addInductionPhi( } DEBUG(dbgs() << "LV: Found an induction variable.\n"); - return; } bool LoopVectorizationLegality::canVectorizeInstrs() { @@ -5319,7 +5415,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { << "value cannot be used outside the loop"); return false; } - } // next instr. } @@ -5342,7 +5437,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { } void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { - // We should not collect Scalars more than once per VF. Right now, this // function is called from collectUniformsAndScalars(), which already does // this check. Collecting Scalars for VF=1 does not make any sense. @@ -5385,7 +5479,6 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // place the pointer in ScalarPtrs. Otherwise, the pointer is placed in // PossibleNonScalarPtrs. auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) { - // We only care about bitcast and getelementptr instructions contained in // the loop. if (!isLoopVaryingBitCastOrGEP(Ptr)) @@ -5400,7 +5493,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // If the use of the pointer will be a scalar use, and all users of the // pointer are memory accesses, place the pointer in ScalarPtrs. Otherwise, // place the pointer in PossibleNonScalarPtrs. - if (isScalarUse(MemAccess, Ptr) && all_of(I->users(), [&](User *U) { + if (isScalarUse(MemAccess, Ptr) && llvm::all_of(I->users(), [&](User *U) { return isa(U) || isa(U); })) ScalarPtrs.insert(I); @@ -5472,7 +5565,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { if (!isLoopVaryingBitCastOrGEP(Dst->getOperand(0))) continue; auto *Src = cast(Dst->getOperand(0)); - if (all_of(Src->users(), [&](User *U) -> bool { + if (llvm::all_of(Src->users(), [&](User *U) -> bool { auto *J = cast(U); return !TheLoop->contains(J) || Worklist.count(J) || ((isa(J) || isa(J)) && @@ -5499,7 +5592,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // Determine if all users of the induction variable are scalar after // vectorization. - auto ScalarInd = all_of(Ind->users(), [&](User *U) -> bool { + auto ScalarInd = llvm::all_of(Ind->users(), [&](User *U) -> bool { auto *I = cast(U); return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I); }); @@ -5508,10 +5601,11 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { // Determine if all users of the induction variable update instruction are // scalar after vectorization. - auto ScalarIndUpdate = all_of(IndUpdate->users(), [&](User *U) -> bool { - auto *I = cast(U); - return I == Ind || !TheLoop->contains(I) || Worklist.count(I); - }); + auto ScalarIndUpdate = + llvm::all_of(IndUpdate->users(), [&](User *U) -> bool { + auto *I = cast(U); + return I == Ind || !TheLoop->contains(I) || Worklist.count(I); + }); if (!ScalarIndUpdate) continue; @@ -5571,7 +5665,6 @@ bool LoopVectorizationLegality::memoryInstructionCanBeWidened(Instruction *I, } void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { - // We should not collect Uniforms more than once per VF. Right now, // this function is called from collectUniformsAndScalars(), which // already does this check. Collecting Uniforms for VF=1 does not make any @@ -5634,7 +5727,6 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // the getelementptr won't remain uniform. for (auto *BB : TheLoop->blocks()) for (auto &I : *BB) { - // If there's no pointer operand, there's nothing to do. auto *Ptr = dyn_cast_or_null(getPointerOperand(&I)); if (!Ptr) @@ -5642,9 +5734,10 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // True if all users of Ptr are memory accesses that have Ptr as their // pointer operand. - auto UsersAreMemAccesses = all_of(Ptr->users(), [&](User *U) -> bool { - return getPointerOperand(U) == Ptr; - }); + auto UsersAreMemAccesses = + llvm::all_of(Ptr->users(), [&](User *U) -> bool { + return getPointerOperand(U) == Ptr; + }); // Ensure the memory instruction will not be scalarized or used by // gather/scatter, making its pointer operand non-uniform. If the pointer @@ -5680,7 +5773,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { if (isOutOfScope(OV)) continue; auto *OI = cast(OV); - if (all_of(OI->users(), [&](User *U) -> bool { + if (llvm::all_of(OI->users(), [&](User *U) -> bool { auto *J = cast(U); return !TheLoop->contains(J) || Worklist.count(J) || (OI == getPointerOperand(J) && isUniformDecision(J, VF)); @@ -5709,7 +5802,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // Determine if all users of the induction variable are uniform after // vectorization. - auto UniformInd = all_of(Ind->users(), [&](User *U) -> bool { + auto UniformInd = llvm::all_of(Ind->users(), [&](User *U) -> bool { auto *I = cast(U); return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I) || isVectorizedMemAccessUse(I, Ind); @@ -5719,11 +5812,12 @@ void LoopVectorizationCostModel::collectLoopUniforms(unsigned VF) { // Determine if all users of the induction variable update instruction are // uniform after vectorization. - auto UniformIndUpdate = all_of(IndUpdate->users(), [&](User *U) -> bool { - auto *I = cast(U); - return I == Ind || !TheLoop->contains(I) || Worklist.count(I) || - isVectorizedMemAccessUse(I, IndUpdate); - }); + auto UniformIndUpdate = + llvm::all_of(IndUpdate->users(), [&](User *U) -> bool { + auto *I = cast(U); + return I == Ind || !TheLoop->contains(I) || Worklist.count(I) || + isVectorizedMemAccessUse(I, IndUpdate); + }); if (!UniformIndUpdate) continue; @@ -5742,9 +5836,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() { InterleaveInfo.setLAI(LAI); const OptimizationRemarkAnalysis *LAR = LAI->getReport(); if (LAR) { - OptimizationRemarkAnalysis VR(Hints->vectorizeAnalysisPassName(), - "loop not vectorized: ", *LAR); - ORE->emit(VR); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), + "loop not vectorized: ", *LAR); + }); } if (!LAI->canVectorizeMemory()) return false; @@ -5840,7 +5935,6 @@ bool LoopVectorizationLegality::blockCanBePredicated( void InterleavedAccessInfo::collectConstStrideAccesses( MapVector &AccessStrideInfo, const ValueToValueMap &Strides) { - auto &DL = TheLoop->getHeader()->getModule()->getDataLayout(); // Since it's desired that the load/store instructions be maintained in @@ -5994,7 +6088,6 @@ void InterleavedAccessInfo::analyzeInterleaving( // but not with (4). If we did, the dependent access (3) would be within // the boundaries of the (2, 4) group. if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) { - // If a dependence exists and A is already in a group, we know that A // must be a store since A precedes B and WAR dependences are allowed. // Thus, A would be sunk below B. We release A's group to prevent this @@ -6092,9 +6185,7 @@ void InterleavedAccessInfo::analyzeInterleaving( // This means that we can forcefully peel the loop in order to only have to // check the first pointer for no-wrap. When we'll change to use Assume=true // we'll only need at most one runtime check per interleaved group. - // for (InterleaveGroup *Group : LoadGroups) { - // Case 1: A full group. Can Skip the checks; For full groups, if the wide // load would wrap around the address space we would do a memory access at // nullptr even without the transformation. @@ -6375,7 +6466,6 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() { unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, unsigned VF, unsigned LoopCost) { - // -- The interleave heuristics -- // We interleave the loop in order to expose ILP and reduce the loop overhead. // There are many micro-architectural considerations that we can't predict @@ -6463,7 +6553,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // Interleave if we vectorized this loop and there is a reduction that could // benefit from interleaving. - if (VF > 1 && Legal->getReductionVars()->size()) { + if (VF > 1 && !Legal->getReductionVars()->empty()) { DEBUG(dbgs() << "LV: Interleaving because of reductions.\n"); return IC; } @@ -6494,7 +6584,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // by this point), we can increase the critical path length if the loop // we're interleaving is inside another loop. Limit, by default to 2, so the // critical path only gets increased by one reduction operation. - if (Legal->getReductionVars()->size() && TheLoop->getLoopDepth() > 1) { + if (!Legal->getReductionVars()->empty() && TheLoop->getLoopDepth() > 1) { unsigned F = static_cast(MaxNestedScalarReductionIC); SmallIC = std::min(SmallIC, F); StoresIC = std::min(StoresIC, F); @@ -6513,7 +6603,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, // Interleave if this is a large loop (small loops are already dealt with by // this point) that could benefit from interleaving. - bool HasReductions = (Legal->getReductionVars()->size() > 0); + bool HasReductions = !Legal->getReductionVars()->empty(); if (TTI.enableAggressiveInterleaving(HasReductions)) { DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n"); return IC; @@ -6551,7 +6641,8 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { // Each 'key' in the map opens a new interval. The values // of the map are the index of the 'last seen' usage of the // instruction that is the key. - typedef DenseMap IntervalMap; + using IntervalMap = DenseMap; + // Maps instruction to its index. DenseMap IdxToInstr; // Marks the end of each interval. @@ -6590,7 +6681,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { } // Saves the list of intervals that end with the index in 'key'. - typedef SmallVector InstrList; + using InstrList = SmallVector; DenseMap TransposeEnds; // Transpose the EndPoints to a list of values that end at each index. @@ -6685,7 +6776,6 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef VFs) { } void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) { - // If we aren't vectorizing the loop, or if we've already collected the // instructions to scalarize, there's nothing to do. Collection may already // have occurred if we have a user-selected VF and are now computing the @@ -6719,7 +6809,6 @@ void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) { int LoopVectorizationCostModel::computePredInstDiscount( Instruction *PredInst, DenseMap &ScalarCosts, unsigned VF) { - assert(!isUniformAfterVectorization(PredInst, VF) && "Instruction marked uniform-after-vectorization will be predicated"); @@ -6734,7 +6823,6 @@ int LoopVectorizationCostModel::computePredInstDiscount( // Returns true if the given instruction can be scalarized. auto canBeScalarized = [&](Instruction *I) -> bool { - // We only attempt to scalarize instructions forming a single-use chain // from the original predicated block that would otherwise be vectorized. // Although not strictly necessary, we give up on instructions we know will @@ -7028,7 +7116,6 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, unsigned LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, unsigned VF) { - // Calculate scalar cost only. Vectorization cost should be ready at this // moment. if (VF == 1) { @@ -7090,7 +7177,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { } // Choose between Interleaving, Gather/Scatter or Scalarization. - unsigned InterleaveCost = UINT_MAX; + unsigned InterleaveCost = std::numeric_limits::max(); unsigned NumAccesses = 1; if (Legal->isAccessInterleaved(&I)) { auto Group = Legal->getInterleavedAccessGroup(&I); @@ -7107,7 +7194,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { unsigned GatherScatterCost = Legal->isLegalGatherOrScatter(&I) ? getGatherScatterCost(&I, VF) * NumAccesses - : UINT_MAX; + : std::numeric_limits::max(); unsigned ScalarizationCost = getMemInstScalarizationCost(&I, VF) * NumAccesses; @@ -7165,7 +7252,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { for (auto &Op : I->operands()) if (auto *InstOp = dyn_cast(Op)) if ((InstOp->getParent() == I->getParent()) && !isa(InstOp) && - AddrDefs.insert(InstOp).second == true) + AddrDefs.insert(InstOp).second) Worklist.push_back(InstOp); } @@ -7434,7 +7521,9 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } char LoopVectorize::ID = 0; + static const char lv_name[] = "Loop Vectorization"; + INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) @@ -7451,13 +7540,14 @@ INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { + Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) { return new LoopVectorize(NoUnrolling, AlwaysVectorize); } -} -bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { +} // end namespace llvm +bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) { // Check if the pointer operand of a load or store instruction is // consecutive. if (auto *Ptr = getPointerOperand(Inst)) @@ -7480,7 +7570,6 @@ void LoopVectorizationCostModel::collectValuesToIgnore() { LoopVectorizationCostModel::VectorizationFactor LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) { - // Width 1 means no vectorize, cost 0 means uncomputed cost. const LoopVectorizationCostModel::VectorizationFactor NoVectorization = {1U, 0U}; @@ -7582,12 +7671,13 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions( for (auto &Induction : *Legal->getInductionVars()) { PHINode *Ind = Induction.first; auto *IndUpdate = cast(Ind->getIncomingValueForBlock(Latch)); - if (all_of(IndUpdate->users(), [&](User *U) -> bool { + if (llvm::all_of(IndUpdate->users(), [&](User *U) -> bool { return U == Ind || DeadInstructions.count(cast(U)); })) DeadInstructions.insert(IndUpdate); } } + Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; } Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; } @@ -7644,6 +7734,7 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) { } namespace { + /// VPWidenRecipe is a recipe for producing a copy of vector type for each /// Instruction in its ingredients independently, in order. This recipe covers /// most of the traditional vectorization cases where each ingredient transforms @@ -7660,7 +7751,7 @@ class VPWidenRecipe : public VPRecipeBase { Begin = End++; } - ~VPWidenRecipe() {} + ~VPWidenRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *V) { @@ -7699,8 +7790,7 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase { public: VPWidenIntOrFpInductionRecipe(PHINode *IV, TruncInst *Trunc = nullptr) : VPRecipeBase(VPWidenIntOrFpInductionSC), IV(IV), Trunc(Trunc) {} - - ~VPWidenIntOrFpInductionRecipe() {} + ~VPWidenIntOrFpInductionRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *V) { @@ -7733,8 +7823,7 @@ class VPWidenPHIRecipe : public VPRecipeBase { public: VPWidenPHIRecipe(PHINode *Phi) : VPRecipeBase(VPWidenPHISC), Phi(Phi) {} - - ~VPWidenPHIRecipe() {} + ~VPWidenPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *V) { @@ -7761,8 +7850,7 @@ class VPInterleaveRecipe : public VPRecipeBase { public: VPInterleaveRecipe(const InterleaveGroup *IG) : VPRecipeBase(VPInterleaveSC), IG(IG) {} - - ~VPInterleaveRecipe() {} + ~VPInterleaveRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *V) { @@ -7811,7 +7899,7 @@ class VPReplicateRecipe : public VPRecipeBase { AlsoPack = IsPredicated && !I->use_empty(); } - ~VPReplicateRecipe() {} + ~VPReplicateRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *V) { @@ -7878,8 +7966,7 @@ class VPPredInstPHIRecipe : public VPRecipeBase { /// nodes after merging back from a Branch-on-Mask. VPPredInstPHIRecipe(Instruction *PredInst) : VPRecipeBase(VPPredInstPHISC), PredInst(PredInst) {} - - ~VPPredInstPHIRecipe() {} + ~VPPredInstPHIRecipe() override = default; /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPRecipeBase *V) { @@ -7896,6 +7983,7 @@ class VPPredInstPHIRecipe : public VPRecipeBase { << "\\l\""; } }; + } // end anonymous namespace bool LoopVectorizationPlanner::getDecisionAndClampRange( @@ -7987,11 +8075,10 @@ LoopVectorizationPlanner::tryToOptimizeInduction(Instruction *I, return nullptr; } -VPWidenRecipe *LoopVectorizationPlanner::tryToWiden( - Instruction *I, VPWidenRecipe *LastWidenRecipe, VFRange &Range) { - +bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB, + VFRange &Range) { if (Legal->isScalarWithPredication(I)) - return nullptr; + return false; auto IsVectorizableOpcode = [](unsigned Opcode) { switch (Opcode) { @@ -8040,13 +8127,13 @@ VPWidenRecipe *LoopVectorizationPlanner::tryToWiden( }; if (!IsVectorizableOpcode(I->getOpcode())) - return nullptr; + return false; if (CallInst *CI = dyn_cast(I)) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || ID == Intrinsic::lifetime_start)) - return nullptr; + return false; } auto willWiden = [&](unsigned VF) -> bool { @@ -8078,19 +8165,23 @@ VPWidenRecipe *LoopVectorizationPlanner::tryToWiden( }; if (!getDecisionAndClampRange(willWiden, Range)) - return nullptr; + return false; // Success: widen this instruction. We optimize the common case where // consecutive instructions can be represented by a single recipe. - if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I)) - return LastWidenRecipe; - return new VPWidenRecipe(I); + if (!VPBB->empty()) { + VPWidenRecipe *LastWidenRecipe = dyn_cast(&VPBB->back()); + if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I)) + return true; + } + + VPBB->appendRecipe(new VPWidenRecipe(I)); + return true; } VPBasicBlock *LoopVectorizationPlanner::handleReplication( Instruction *I, VFRange &Range, VPBasicBlock *VPBB, DenseMap &PredInst2Recipe) { - bool IsUniform = getDecisionAndClampRange( [&](unsigned VF) { return CM.isUniformAfterVectorization(I, VF); }, Range); @@ -8147,7 +8238,6 @@ LoopVectorizationPlanner::createReplicateRegion(Instruction *Instr, } VPlan *LoopVectorizationPlanner::buildVPlan(VFRange &Range) { - DenseMap &SinkAfter = Legal->getSinkAfter(); DenseMap SinkAfterInverse; @@ -8181,7 +8271,6 @@ VPlan *LoopVectorizationPlanner::buildVPlan(VFRange &Range) { auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); VPBB->setOneSuccessor(FirstVPBBForBB); VPBB = FirstVPBBForBB; - VPWidenRecipe *LastWidenRecipe = nullptr; std::vector Ingredients; @@ -8202,8 +8291,11 @@ VPlan *LoopVectorizationPlanner::buildVPlan(VFRange &Range) { if (IG && Instr != IG->getInsertPos() && Range.Start >= 2 && // Query is illegal for VF == 1 CM.getWideningDecision(Instr, Range.Start) == - LoopVectorizationCostModel::CM_Interleave) + LoopVectorizationCostModel::CM_Interleave) { + if (SinkAfterInverse.count(Instr)) + Ingredients.push_back(SinkAfterInverse.find(Instr)->second); continue; + } // Move instructions to handle first-order recurrences, step 1: avoid // handling this instruction until after we've handled the instruction it @@ -8249,12 +8341,8 @@ VPlan *LoopVectorizationPlanner::buildVPlan(VFRange &Range) { // Check if Instr is to be widened by a general VPWidenRecipe, after // having first checked for specific widening recipes that deal with // Interleave Groups, Inductions and Phi nodes. - if ((Recipe = tryToWiden(Instr, LastWidenRecipe, Range))) { - if (Recipe != LastWidenRecipe) - VPBB->appendRecipe(Recipe); - LastWidenRecipe = cast(Recipe); + if (tryToWiden(Instr, VPBB, Range)) continue; - } // Otherwise, if all widening options failed, Instruction is to be // replicated. This may create a successor for VPBB. @@ -8305,7 +8393,6 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const { } void VPReplicateRecipe::execute(VPTransformState &State) { - if (State.Instance) { // Generate a single instance. State.ILV->scalarizeInstruction(Ingredient, *State.Instance, IsPredicated); // Insert scalar instance packing it into a vector. @@ -8570,24 +8657,32 @@ bool LoopVectorizePass::processLoop(Loop *L) { const char *VAPassName = Hints.vectorizeAnalysisPassName(); if (!VectorizeLoop && !InterleaveLoop) { // Do not vectorize or interleaving the loop. - ORE->emit(OptimizationRemarkMissed(VAPassName, VecDiagMsg.first, - L->getStartLoc(), L->getHeader()) - << VecDiagMsg.second); - ORE->emit(OptimizationRemarkMissed(LV_NAME, IntDiagMsg.first, - L->getStartLoc(), L->getHeader()) - << IntDiagMsg.second); + ORE->emit([&]() { + return OptimizationRemarkMissed(VAPassName, VecDiagMsg.first, + L->getStartLoc(), L->getHeader()) + << VecDiagMsg.second; + }); + ORE->emit([&]() { + return OptimizationRemarkMissed(LV_NAME, IntDiagMsg.first, + L->getStartLoc(), L->getHeader()) + << IntDiagMsg.second; + }); return false; } else if (!VectorizeLoop && InterleaveLoop) { DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); - ORE->emit(OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first, - L->getStartLoc(), L->getHeader()) - << VecDiagMsg.second); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first, + L->getStartLoc(), L->getHeader()) + << VecDiagMsg.second; + }); } else if (VectorizeLoop && !InterleaveLoop) { DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " << DebugLocStr << '\n'); - ORE->emit(OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first, - L->getStartLoc(), L->getHeader()) - << IntDiagMsg.second); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first, + L->getStartLoc(), L->getHeader()) + << IntDiagMsg.second; + }); } else if (VectorizeLoop && InterleaveLoop) { DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " << DebugLocStr << '\n'); @@ -8597,6 +8692,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { LVP.setBestPlan(VF.Width, IC); using namespace ore; + if (!VectorizeLoop) { assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop, then @@ -8605,10 +8701,12 @@ bool LoopVectorizePass::processLoop(Loop *L) { &CM); LVP.executePlan(Unroller, DT); - ORE->emit(OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), - L->getHeader()) - << "interleaved loop (interleaved count: " - << NV("InterleaveCount", IC) << ")"); + ORE->emit([&]() { + return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(), + L->getHeader()) + << "interleaved loop (interleaved count: " + << NV("InterleaveCount", IC) << ")"; + }); } else { // If we decided that it is *legal* to vectorize the loop, then do it. InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC, @@ -8623,11 +8721,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { AddRuntimeUnrollDisableMetaData(L); // Report the vectorization decision. - ORE->emit(OptimizationRemark(LV_NAME, "Vectorized", L->getStartLoc(), - L->getHeader()) - << "vectorized loop (vectorization width: " - << NV("VectorizationFactor", VF.Width) - << ", interleaved count: " << NV("InterleaveCount", IC) << ")"); + ORE->emit([&]() { + return OptimizationRemark(LV_NAME, "Vectorized", L->getStartLoc(), + L->getHeader()) + << "vectorized loop (vectorization width: " + << NV("VectorizationFactor", VF.Width) + << ", interleaved count: " << NV("InterleaveCount", IC) << ")"; + }); } // Mark the loop as already vectorized to avoid vectorizing again. @@ -8643,7 +8743,6 @@ bool LoopVectorizePass::runImpl( DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, std::function &GetLAA_, OptimizationRemarkEmitter &ORE_) { - SE = &SE_; LI = &LI_; TTI = &TTI_; @@ -8699,10 +8798,8 @@ bool LoopVectorizePass::runImpl( // Process each loop nest in the function. return Changed; - } - PreservedAnalyses LoopVectorizePass::run(Function &F, FunctionAnalysisManager &AM) { auto &SE = AM.getResult(F); diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index d201387debd33..5dcf5528ac92c 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6,6 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// // This pass implements the Bottom Up SLP vectorizer. It detects consecutive // stores that can be put together into vector-stores. Next, it attempts to // construct vectorizable tree using the use-def chains. If a profitable tree @@ -39,7 +40,7 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -361,14 +362,17 @@ static Value *isOneOf(Value *OpValue, Value *Op) { } namespace { + /// Contains data for the instructions going to be vectorized. struct RawInstructionsData { /// Main Opcode of the instructions going to be vectorized. unsigned Opcode = 0; + /// The list of instructions have some instructions with alternate opcodes. bool HasAltOpcodes = false; }; -} // namespace + +} // end anonymous namespace /// Checks the list of the vectorized instructions \p VL and returns info about /// this list. @@ -392,19 +396,24 @@ static RawInstructionsData getMainOpcode(ArrayRef VL) { } namespace { + /// Main data required for vectorization of instructions. struct InstructionsState { /// The very first instruction in the list with the main opcode. Value *OpValue = nullptr; + /// The main opcode for the list of instructions. unsigned Opcode = 0; + /// Some of the instructions in the list have alternate opcodes. bool IsAltShuffle = false; + InstructionsState() = default; InstructionsState(Value *OpValue, unsigned Opcode, bool IsAltShuffle) : OpValue(OpValue), Opcode(Opcode), IsAltShuffle(IsAltShuffle) {} }; -} // namespace + +} // end anonymous namespace /// \returns analysis of the Instructions in \p VL described in /// InstructionsState, the Opcode that we suppose the whole list @@ -973,6 +982,7 @@ class BoUpSLP { return os; } #endif + friend struct GraphTraits; friend struct DOTGraphTraits; @@ -1176,9 +1186,9 @@ class BoUpSLP { /// The ID of the scheduling region. For a new vectorization iteration this /// is incremented which "removes" all ScheduleData from the region. - int SchedulingRegionID = 1; // Make sure that the initial SchedulingRegionID is greater than the // initial SchedulingRegionID in ScheduleData (which is 0). + int SchedulingRegionID = 1; }; /// Attaches the BlockScheduling structures to basic blocks. @@ -1212,6 +1222,7 @@ class BoUpSLP { unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt. unsigned MinVecRegSize; // Set by cl::opt (default: 128). + /// Instruction builder to construct the vectorized tree. IRBuilder<> Builder; @@ -4662,6 +4673,7 @@ class HorizontalReduction { RK_Max, /// Maximum reduction data. RK_UMax, /// Unsigned maximum reduction data. }; + /// Contains info about operation, like its opcode, left and right operands. class OperationData { /// Opcode of the instruction. @@ -4672,8 +4684,10 @@ class HorizontalReduction { /// Right operand of the reduction operation. Value *RHS = nullptr; + /// Kind of the reduction operation. ReductionKind Kind = RK_None; + /// True if float point min/max reduction has no NaNs. bool NoNaN = false; @@ -4725,7 +4739,7 @@ class HorizontalReduction { /// Construction for reduced values. They are identified by opcode only and /// don't have associated LHS/RHS values. - explicit OperationData(Value *V) : Kind(RK_None) { + explicit OperationData(Value *V) { if (auto *I = dyn_cast(V)) Opcode = I->getOpcode(); } @@ -4737,6 +4751,7 @@ class HorizontalReduction { : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind), NoNaN(NoNaN) { assert(Kind != RK_None && "One of the reduction operations is expected."); } + explicit operator bool() const { return Opcode; } /// Get the index of the first operand. @@ -5421,7 +5436,6 @@ class HorizontalReduction { /// starting from the last insertelement instruction. /// /// Returns true if it matches -/// static bool findBuildVector(InsertElementInst *LastInsertElem, SmallVectorImpl &BuildVector, SmallVectorImpl &BuildVectorOpds) { diff --git a/lib/Transforms/Vectorize/VPlan.cpp b/lib/Transforms/Vectorize/VPlan.cpp index 498f4c4f7f31e..f74426e5f3019 100644 --- a/lib/Transforms/Vectorize/VPlan.cpp +++ b/lib/Transforms/Vectorize/VPlan.cpp @@ -18,12 +18,29 @@ //===----------------------------------------------------------------------===// #include "VPlan.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include +#include using namespace llvm; @@ -138,7 +155,6 @@ void VPBasicBlock::execute(VPTransformState *State) { SingleHPred->getExitBasicBlock() == PrevVPBB && PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */ !(Replica && getPredecessors().empty())) { /* C */ - NewBB = createEmptyBasicBlock(State->CFG); State->Builder.SetInsertPoint(NewBB); // Temporarily terminate with unreachable until CFG is rewired. diff --git a/lib/Transforms/Vectorize/VPlan.h b/lib/Transforms/Vectorize/VPlan.h index 3c11fdeb07630..d43774dd36eb1 100644 --- a/lib/Transforms/Vectorize/VPlan.h +++ b/lib/Transforms/Vectorize/VPlan.h @@ -1,4 +1,4 @@ -//===- VPlan.h - Represent A Vectorizer Plan ------------------------------===// +//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,7 +6,7 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// +// /// \file /// This file contains the declarations of the Vectorization Plan base classes: /// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual @@ -18,34 +18,37 @@ /// 4. The VPlan class holding a candidate for vectorization; /// 5. The VPlanPrinter class providing a way to print a plan in dot format. /// These are documented in docs/VectorizationPlan.rst. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H #define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/Support/raw_ostream.h" - -// The (re)use of existing LoopVectorize classes is subject to future VPlan -// refactoring. -namespace { -// Forward declarations. -//class InnerLoopVectorizer; -class LoopVectorizationLegality; -class LoopVectorizationCostModel; -} // namespace +#include +#include +#include +#include +#include namespace llvm { -// Forward declarations. class BasicBlock; +class DominatorTree; class InnerLoopVectorizer; +class LoopInfo; +class raw_ostream; +class Value; class VPBasicBlock; +class VPRegionBlock; /// In what follows, the term "input IR" refers to code that is fed into the /// vectorizer whereas the term "output IR" refers to code that is generated by @@ -54,8 +57,11 @@ class VPBasicBlock; /// VPIteration represents a single point in the iteration space of the output /// (vectorized and/or unrolled) IR loop. struct VPIteration { - unsigned Part; ///< in [0..UF) - unsigned Lane; ///< in [0..VF) + /// in [0..UF) + unsigned Part; + + /// in [0..VF) + unsigned Lane; }; /// This is a helper struct for maintaining vectorization state. It's used for @@ -75,7 +81,6 @@ struct VPIteration { /// /// Entries from either map can be retrieved using the getVectorValue and /// getScalarValue functions, which assert that the desired value exists. - struct VectorizerValueMap { private: /// The unroll factor. Each entry in the vector map contains UF vector values. @@ -87,8 +92,8 @@ struct VectorizerValueMap { /// The vector and scalar map storage. We use std::map and not DenseMap /// because insertions to DenseMap invalidate its iterators. - typedef SmallVector VectorParts; - typedef SmallVector, 2> ScalarParts; + using VectorParts = SmallVector; + using ScalarParts = SmallVector, 2>; std::map VectorMapStorage; std::map ScalarMapStorage; @@ -193,12 +198,11 @@ struct VectorizerValueMap { /// VPTransformState holds information passed down when "executing" a VPlan, /// needed for generating the output IR. struct VPTransformState { - - VPTransformState(unsigned VF, unsigned UF, class LoopInfo *LI, - class DominatorTree *DT, IRBuilder<> &Builder, - VectorizerValueMap &ValueMap, InnerLoopVectorizer *ILV) - : VF(VF), UF(UF), Instance(), LI(LI), DT(DT), Builder(Builder), - ValueMap(ValueMap), ILV(ILV) {} + VPTransformState(unsigned VF, unsigned UF, LoopInfo *LI, DominatorTree *DT, + IRBuilder<> &Builder, VectorizerValueMap &ValueMap, + InnerLoopVectorizer *ILV) + : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ValueMap(ValueMap), + ILV(ILV) {} /// The chosen Vectorization and Unroll Factors of the loop being vectorized. unsigned VF; @@ -213,25 +217,28 @@ struct VPTransformState { /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks. struct CFGState { /// The previous VPBasicBlock visited. Initially set to null. - VPBasicBlock *PrevVPBB; + VPBasicBlock *PrevVPBB = nullptr; + /// The previous IR BasicBlock created or used. Initially set to the new /// header BasicBlock. - BasicBlock *PrevBB; + BasicBlock *PrevBB = nullptr; + /// The last IR BasicBlock in the output IR. Set to the new latch /// BasicBlock, used for placing the newly created BasicBlocks. - BasicBlock *LastBB; + BasicBlock *LastBB = nullptr; + /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case /// of replication, maps the BasicBlock of the last replica created. SmallDenseMap VPBB2IRBB; - CFGState() : PrevVPBB(nullptr), PrevBB(nullptr), LastBB(nullptr) {} + CFGState() = default; } CFG; /// Hold a pointer to LoopInfo to register new basic blocks in the loop. - class LoopInfo *LI; + LoopInfo *LI; /// Hold a pointer to Dominator Tree to register new basic blocks in the loop. - class DominatorTree *DT; + DominatorTree *DT; /// Hold a reference to the IRBuilder used to generate output IR code. IRBuilder<> &Builder; @@ -241,7 +248,7 @@ struct VPTransformState { VectorizerValueMap &ValueMap; /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods. - class InnerLoopVectorizer *ILV; + InnerLoopVectorizer *ILV; }; /// VPBlockBase is the building block of the Hierarchical Control-Flow Graph. @@ -255,7 +262,7 @@ class VPBlockBase { /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if /// it is a topmost VPBlockBase. - class VPRegionBlock *Parent; + VPRegionBlock *Parent = nullptr; /// List of predecessor blocks. SmallVector Predecessors; @@ -291,18 +298,18 @@ class VPBlockBase { protected: VPBlockBase(const unsigned char SC, const std::string &N) - : SubclassID(SC), Name(N), Parent(nullptr) {} + : SubclassID(SC), Name(N) {} public: /// An enumeration for keeping track of the concrete subclass of VPBlockBase /// that are actually instantiated. Values of this enumeration are kept in the /// SubclassID field of the VPBlockBase objects. They are used for concrete /// type identification. - typedef enum { VPBasicBlockSC, VPRegionBlockSC } VPBlockTy; + using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC }; - typedef SmallVectorImpl VPBlocksTy; + using VPBlocksTy = SmallVectorImpl; - virtual ~VPBlockBase() {} + virtual ~VPBlockBase() = default; const std::string &getName() const { return Name; } @@ -437,14 +444,14 @@ class VPRecipeBase : public ilist_node_with_parent { const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast). /// Each VPRecipe belongs to a single VPBasicBlock. - VPBasicBlock *Parent; + VPBasicBlock *Parent = nullptr; public: /// An enumeration for keeping track of the concrete subclass of VPRecipeBase /// that is actually instantiated. Values of this enumeration are kept in the /// SubclassID field of the VPRecipeBase objects. They are used for concrete /// type identification. - typedef enum { + using VPRecipeTy = enum { VPBranchOnMaskSC, VPInterleaveSC, VPPredInstPHISC, @@ -452,11 +459,10 @@ class VPRecipeBase : public ilist_node_with_parent { VPWidenIntOrFpInductionSC, VPWidenPHISC, VPWidenSC, - } VPRecipeTy; - - VPRecipeBase(const unsigned char SC) : SubclassID(SC), Parent(nullptr) {} + }; - virtual ~VPRecipeBase() {} + VPRecipeBase(const unsigned char SC) : SubclassID(SC) {} + virtual ~VPRecipeBase() = default; /// \return an ID for the concrete type of this object. /// This is used to implement the classof checks. This should not be used @@ -480,18 +486,26 @@ class VPRecipeBase : public ilist_node_with_parent { /// output IR instructions. class VPBasicBlock : public VPBlockBase { public: - typedef iplist RecipeListTy; + using RecipeListTy = iplist; private: /// The VPRecipes held in the order of output instructions to generate. RecipeListTy Recipes; public: + VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) + : VPBlockBase(VPBasicBlockSC, Name.str()) { + if (Recipe) + appendRecipe(Recipe); + } + + ~VPBasicBlock() override { Recipes.clear(); } + /// Instruction iterators... - typedef RecipeListTy::iterator iterator; - typedef RecipeListTy::const_iterator const_iterator; - typedef RecipeListTy::reverse_iterator reverse_iterator; - typedef RecipeListTy::const_reverse_iterator const_reverse_iterator; + using iterator = RecipeListTy::iterator; + using const_iterator = RecipeListTy::const_iterator; + using reverse_iterator = RecipeListTy::reverse_iterator; + using const_reverse_iterator = RecipeListTy::const_reverse_iterator; //===--------------------------------------------------------------------===// /// Recipe iterator methods @@ -518,14 +532,6 @@ class VPBasicBlock : public VPBlockBase { return &VPBasicBlock::Recipes; } - VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr) - : VPBlockBase(VPBasicBlockSC, Name.str()) { - if (Recipe) - appendRecipe(Recipe); - } - - ~VPBasicBlock() { Recipes.clear(); } - /// Method to support type inquiry through isa, cast, and dyn_cast. static inline bool classof(const VPBlockBase *V) { return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC; @@ -581,7 +587,7 @@ class VPRegionBlock : public VPBlockBase { Exit->setParent(this); } - ~VPRegionBlock() { + ~VPRegionBlock() override { if (Entry) deleteCFG(Entry); } @@ -649,7 +655,7 @@ class VPlan { private: /// Add to the given dominator tree the header block and every new basic block /// that was created between it and the latch block, inclusive. - static void updateDominatorTree(class DominatorTree *DT, + static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB, BasicBlock *LoopLatchBB); }; @@ -667,11 +673,11 @@ class VPlanPrinter { unsigned Depth; unsigned TabWidth = 2; std::string Indent; - unsigned BID = 0; - SmallDenseMap BlockID; + VPlanPrinter(raw_ostream &O, VPlan &P) : OS(O), Plan(P) {} + /// Handle indentation. void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); } @@ -701,8 +707,6 @@ class VPlanPrinter { void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden, const Twine &Label); - VPlanPrinter(raw_ostream &O, VPlan &P) : OS(O), Plan(P) {} - void dump(); static void printAsIngredient(raw_ostream &O, Value *V); @@ -710,6 +714,7 @@ class VPlanPrinter { struct VPlanIngredient { Value *V; + VPlanIngredient(Value *V) : V(V) {} }; @@ -732,8 +737,8 @@ inline raw_ostream &operator<<(raw_ostream &OS, VPlan &Plan) { // graph of VPBlockBase nodes... template <> struct GraphTraits { - typedef VPBlockBase *NodeRef; - typedef SmallVectorImpl::iterator ChildIteratorType; + using NodeRef = VPBlockBase *; + using ChildIteratorType = SmallVectorImpl::iterator; static NodeRef getEntryNode(NodeRef N) { return N; } @@ -747,8 +752,8 @@ template <> struct GraphTraits { }; template <> struct GraphTraits { - typedef const VPBlockBase *NodeRef; - typedef SmallVectorImpl::const_iterator ChildIteratorType; + using NodeRef = const VPBlockBase *; + using ChildIteratorType = SmallVectorImpl::const_iterator; static NodeRef getEntryNode(NodeRef N) { return N; } @@ -765,11 +770,9 @@ template <> struct GraphTraits { // graph of VPBlockBase nodes... and to walk it in inverse order. Inverse order // for a VPBlockBase is considered to be when traversing the predecessors of a // VPBlockBase instead of its successors. -// - template <> struct GraphTraits> { - typedef VPBlockBase *NodeRef; - typedef SmallVectorImpl::iterator ChildIteratorType; + using NodeRef = VPBlockBase *; + using ChildIteratorType = SmallVectorImpl::iterator; static Inverse getEntryNode(Inverse B) { return B; @@ -784,6 +787,6 @@ template <> struct GraphTraits> { } }; -} // namespace llvm +} // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H diff --git a/lib/XRay/Trace.cpp b/lib/XRay/Trace.cpp index e1eb7a7f11723..e90396959fb2b 100644 --- a/lib/XRay/Trace.cpp +++ b/lib/XRay/Trace.cpp @@ -82,29 +82,59 @@ Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader, for (auto S = Data.drop_front(32); !S.empty(); S = S.drop_front(32)) { DataExtractor RecordExtractor(S, true, 8); uint32_t OffsetPtr = 0; - Records.emplace_back(); - auto &Record = Records.back(); - Record.RecordType = RecordExtractor.getU16(&OffsetPtr); - Record.CPU = RecordExtractor.getU8(&OffsetPtr); - auto Type = RecordExtractor.getU8(&OffsetPtr); - switch (Type) { - case 0: - Record.Type = RecordTypes::ENTER; - break; - case 1: - Record.Type = RecordTypes::EXIT; + switch (auto RecordType = RecordExtractor.getU16(&OffsetPtr)) { + case 0: { // Normal records. + Records.emplace_back(); + auto &Record = Records.back(); + Record.RecordType = RecordType; + Record.CPU = RecordExtractor.getU8(&OffsetPtr); + auto Type = RecordExtractor.getU8(&OffsetPtr); + switch (Type) { + case 0: + Record.Type = RecordTypes::ENTER; + break; + case 1: + Record.Type = RecordTypes::EXIT; + break; + case 2: + Record.Type = RecordTypes::TAIL_EXIT; + break; + case 3: + Record.Type = RecordTypes::ENTER_ARG; + break; + default: + return make_error( + Twine("Unknown record type '") + Twine(int{Type}) + "'", + std::make_error_code(std::errc::executable_format_error)); + } + Record.FuncId = RecordExtractor.getSigned(&OffsetPtr, sizeof(int32_t)); + Record.TSC = RecordExtractor.getU64(&OffsetPtr); + Record.TId = RecordExtractor.getU32(&OffsetPtr); break; - case 2: - Record.Type = RecordTypes::TAIL_EXIT; + } + case 1: { // Arg payload record. + auto &Record = Records.back(); + // Advance two bytes to avoid padding. + OffsetPtr += 2; + int32_t FuncId = RecordExtractor.getSigned(&OffsetPtr, sizeof(int32_t)); + auto TId = RecordExtractor.getU32(&OffsetPtr); + if (Record.FuncId != FuncId || Record.TId != TId) + return make_error( + Twine("Corrupted log, found payload following non-matching " + "function + thread record. Record for ") + + Twine(Record.FuncId) + " != " + Twine(FuncId), + std::make_error_code(std::errc::executable_format_error)); + // Advance another four bytes to avoid padding. + OffsetPtr += 4; + auto Arg = RecordExtractor.getU64(&OffsetPtr); + Record.CallArgs.push_back(Arg); break; + } default: return make_error( - Twine("Unknown record type '") + Twine(int{Type}) + "'", + Twine("Unknown record type == ") + Twine(RecordType), std::make_error_code(std::errc::executable_format_error)); } - Record.FuncId = RecordExtractor.getSigned(&OffsetPtr, sizeof(int32_t)); - Record.TSC = RecordExtractor.getU64(&OffsetPtr); - Record.TId = RecordExtractor.getU32(&OffsetPtr); } return Error::success(); } @@ -234,8 +264,8 @@ Error processCustomEventMarker(FDRState &State, uint8_t RecordFirstByte, uint32_t DataSize = RecordExtractor.getU32(&OffsetPtr); uint64_t TSC = RecordExtractor.getU64(&OffsetPtr); - // FIXME: Actually represent the record through the API. For now we only skip - // through the data. + // FIXME: Actually represent the record through the API. For now we only + // skip through the data. (void)TSC; RecordSize = 16 + DataSize; return Error::success(); @@ -507,8 +537,8 @@ Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, Records.clear(); std::transform(Trace.Records.begin(), Trace.Records.end(), std::back_inserter(Records), [&](const YAMLXRayRecord &R) { - return XRayRecord{R.RecordType, R.CPU, R.Type, - R.FuncId, R.TSC, R.TId, R.CallArgs}; + return XRayRecord{R.RecordType, R.CPU, R.Type, R.FuncId, + R.TSC, R.TId, R.CallArgs}; }); return Error::success(); } diff --git a/test/Analysis/ConstantFolding/cast-vector.ll b/test/Analysis/ConstantFolding/cast-vector.ll new file mode 100644 index 0000000000000..1aaf55a23b503 --- /dev/null +++ b/test/Analysis/ConstantFolding/cast-vector.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instsimplify -S | FileCheck %s + +; Test constant fold of constant expression GEP used by ptrtoint (the +; "offsetof-like expression" case). +; This used to hit an assert due to not supporting vectors in +; llvm::ConstantFoldCastInstruction when handling ptrtoint. +define <2 x i16> @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <2 x i16> ptrtoint (<2 x i32*> getelementptr ([10 x i32], [10 x i32]* null, <2 x i64> zeroinitializer, <2 x i64> ) to <2 x i16>) +; +entry: + %gep = getelementptr inbounds [10 x i32], [10 x i32]* null, i16 0, <2 x i16> + %vec = ptrtoint <2 x i32*> %gep to <2 x i16> + ret <2 x i16> %vec +} + +; Test constant fold of constant expression GEP used by ptrtoint (the +; "sizeof-like expression" case). +; This used to hit an assert due to not supporting vectors in +; llvm::ConstantFoldCastInstruction when handling ptrtoint. +define <2 x i16> @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret <2 x i16> ptrtoint (<2 x i32*> getelementptr (i32, i32* null, <2 x i64> ) to <2 x i16>) +; +entry: + %gep = getelementptr i32, i32* null, <2 x i16> + %vec = ptrtoint <2 x i32*> %gep to <2 x i16> + ret <2 x i16> %vec +} diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll index 9d74da4c2d3b0..12e314e24073d 100644 --- a/test/Analysis/CostModel/ARM/gep.ll +++ b/test/Analysis/CostModel/ARM/gep.ll @@ -83,5 +83,8 @@ define void @test_geps(i32 %i) { ;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>* %c12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i +;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8* + %d0 = getelementptr inbounds i8, i8* undef, i32 -1 + ret void } diff --git a/test/Analysis/CostModel/X86/costmodel.ll b/test/Analysis/CostModel/X86/costmodel.ll index 19e7128ff4493..246dc12eb590a 100644 --- a/test/Analysis/CostModel/X86/costmodel.ll +++ b/test/Analysis/CostModel/X86/costmodel.ll @@ -45,6 +45,10 @@ define i64 @foo(i64 %arg) { ; CODESIZE: cost of 1 {{.*}} call %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef) + ; LATENCY: cost of 40 {{.*}} call void undef + ; CODESIZE: cost of 1 {{.*}} call void undef + call void undef() + ; LATENCY: cost of 1 {{.*}} ret ; CODESIZE: cost of 1 {{.*}} ret ret i64 undef diff --git a/test/Analysis/CostModel/X86/interleaved-load-i8.ll b/test/Analysis/CostModel/X86/interleaved-load-i8.ll index 382e5e5301d69..f43d73e646528 100644 --- a/test/Analysis/CostModel/X86/interleaved-load-i8.ll +++ b/test/Analysis/CostModel/X86/interleaved-load-i8.ll @@ -10,8 +10,8 @@ define i32 @doit_stride3(i8* nocapture readonly %Ptr, i32 %Nels) { ;CHECK: LV: Found an estimated cost of 11 for VF 2 For instruction: %0 = load i8 ;CHECK: LV: Found an estimated cost of 5 for VF 4 For instruction: %0 = load i8 ;CHECK: LV: Found an estimated cost of 10 for VF 8 For instruction: %0 = load i8 -;CHECK: LV: Found an estimated cost of 20 for VF 16 For instruction: %0 = load i8 -;CHECK: LV: Found an estimated cost of 45 for VF 32 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 13 for VF 16 For instruction: %0 = load i8 +;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: %0 = load i8 entry: %cmp13 = icmp sgt i32 %Nels, 0 br i1 %cmp13, label %for.body.preheader, label %for.end diff --git a/test/Analysis/CostModel/X86/interleaved-store-i8.ll b/test/Analysis/CostModel/X86/interleaved-store-i8.ll index d8408c1527633..0923f131c004f 100644 --- a/test/Analysis/CostModel/X86/interleaved-store-i8.ll +++ b/test/Analysis/CostModel/X86/interleaved-store-i8.ll @@ -10,8 +10,8 @@ define void @doit_stride3(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr { ;CHECK: LV: Found an estimated cost of 8 for VF 2 For instruction: store i8 %conv4 ;CHECK: LV: Found an estimated cost of 9 for VF 4 For instruction: store i8 %conv4 ;CHECK: LV: Found an estimated cost of 12 for VF 8 For instruction: store i8 %conv4 -;CHECK: LV: Found an estimated cost of 19 for VF 16 For instruction: store i8 %conv4 -;CHECK: LV: Found an estimated cost of 35 for VF 32 For instruction: store i8 %conv4 +;CHECK: LV: Found an estimated cost of 13 for VF 16 For instruction: store i8 %conv4 +;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %conv4 entry: %cmp14 = icmp sgt i32 %Nels, 0 br i1 %cmp14, label %for.body.lr.ph, label %for.end @@ -47,9 +47,9 @@ define void @doit_stride4(i8* nocapture %Ptr, i32 %Nels) local_unnamed_addr { ;CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %conv7 ;CHECK: LV: Found an estimated cost of 13 for VF 2 For instruction: store i8 %conv7 ;CHECK: LV: Found an estimated cost of 10 for VF 4 For instruction: store i8 %conv7 -;CHECK: LV: Found an estimated cost of 17 for VF 8 For instruction: store i8 %conv7 -;CHECK: LV: Found an estimated cost of 22 for VF 16 For instruction: store i8 %conv7 -;CHECK: LV: Found an estimated cost of 44 for VF 32 For instruction: store i8 %conv7 +;CHECK: LV: Found an estimated cost of 11 for VF 8 For instruction: store i8 %conv7 +;CHECK: LV: Found an estimated cost of 12 for VF 16 For instruction: store i8 %conv7 +;CHECK: LV: Found an estimated cost of 16 for VF 32 For instruction: store i8 %conv7 entry: %cmp19 = icmp sgt i32 %Nels, 0 br i1 %cmp19, label %for.body.lr.ph, label %for.end diff --git a/test/Analysis/CostModel/X86/strided-load-i8.ll b/test/Analysis/CostModel/X86/strided-load-i8.ll index a97a32c5c9407..72c9398fe2d44 100755 --- a/test/Analysis/CostModel/X86/strided-load-i8.ll +++ b/test/Analysis/CostModel/X86/strided-load-i8.ll @@ -41,9 +41,9 @@ define void @load_i8_stride3() { ;CHECK: Found an estimated cost of 1 for VF 2 For instruction: %1 = load ;CHECK: Found an estimated cost of 1 for VF 4 For instruction: %1 = load ;CHECK: Found an estimated cost of 3 for VF 8 For instruction: %1 = load -;CHECK: Found an estimated cost of 8 for VF 16 For instruction: %1 = load -;CHECK: Found an estimated cost of 20 for VF 32 For instruction: %1 = load -;CHECK: Found an estimated cost of 39 for VF 64 For instruction: %1 = load +;CHECK: Found an estimated cost of 13 for VF 16 For instruction: %1 = load +;CHECK: Found an estimated cost of 16 for VF 32 For instruction: %1 = load +;CHECK: Found an estimated cost of 25 for VF 64 For instruction: %1 = load entry: br label %for.body diff --git a/test/Analysis/CostModel/X86/trunc.ll b/test/Analysis/CostModel/X86/trunc.ll index a270251c2b17a..8961f679c2f4b 100644 --- a/test/Analysis/CostModel/X86/trunc.ll +++ b/test/Analysis/CostModel/X86/trunc.ll @@ -36,6 +36,7 @@ define i32 @trunc_vXi32() { define i32 @trunc_vXi16() { ; SSE: cost of 0 {{.*}} %V2i64 = trunc ; AVX: cost of 0 {{.*}} %V2i64 = trunc + ; AVX512: cost of 0 {{.*}} %V2i64 = trunc %V2i64 = trunc <2 x i64> undef to <2 x i16> ; SSE: cost of 1 {{.*}} %V4i64 = trunc @@ -46,6 +47,7 @@ define i32 @trunc_vXi16() { ; SSE: cost of 3 {{.*}} %V8i64 = trunc ; AVX: cost of 0 {{.*}} %V8i64 = trunc + ; AVX512: cost of 1 {{.*}} %V8i64 = trunc %V8i64 = trunc <8 x i64> undef to <8 x i16> ; SSE2: cost of 3 {{.*}} %V4i32 = trunc @@ -79,6 +81,7 @@ define i32 @trunc_vXi16() { define i32 @trunc_vXi8() { ; SSE: cost of 0 {{.*}} %V2i64 = trunc ; AVX: cost of 0 {{.*}} %V2i64 = trunc + ; AVX512: cost of 0 {{.*}} %V2i64 = trunc %V2i64 = trunc <2 x i64> undef to <2 x i8> ; SSE: cost of 1 {{.*}} %V4i64 = trunc @@ -89,16 +92,19 @@ define i32 @trunc_vXi8() { ; SSE: cost of 3 {{.*}} %V8i64 = trunc ; AVX: cost of 0 {{.*}} %V8i64 = trunc + ; AVX512: cost of 0 {{.*}} %V8i64 = trunc %V8i64 = trunc <8 x i64> undef to <8 x i8> ; SSE: cost of 0 {{.*}} %V2i32 = trunc ; AVX: cost of 0 {{.*}} %V2i32 = trunc + ; AVX512: cost of 0 {{.*}} %V2i32 = trunc %V2i32 = trunc <2 x i32> undef to <2 x i8> ; SSE2: cost of 3 {{.*}} %V4i32 = trunc ; SSSE3: cost of 3 {{.*}} %V4i32 = trunc ; SSE42: cost of 1 {{.*}} %V4i32 = trunc ; AVX: cost of 1 {{.*}} %V4i32 = trunc + ; AVX512: cost of 1 {{.*}} %V4i32 = trunc %V4i32 = trunc <4 x i32> undef to <4 x i8> ; SSE2: cost of 4 {{.*}} %V8i32 = trunc @@ -111,30 +117,37 @@ define i32 @trunc_vXi8() { ; SSE: cost of 7 {{.*}} %V16i32 = trunc ; AVX: cost of 7 {{.*}} %V16i32 = trunc + ; AVX512: cost of 1 {{.*}} %V16i32 = trunc %V16i32 = trunc <16 x i32> undef to <16 x i8> ; SSE: cost of 0 {{.*}} %V2i16 = trunc ; AVX: cost of 0 {{.*}} %V2i16 = trunc + ; AVX512: cost of 0 {{.*}} %V2i16 = trunc %V2i16 = trunc <2 x i16> undef to <2 x i8> ; SSE2: cost of 4 {{.*}} %V4i16 = trunc ; SSSE3: cost of 4 {{.*}} %V4i16 = trunc ; SSE42: cost of 2 {{.*}} %V4i16 = trunc ; AVX: cost of 2 {{.*}} %V4i16 = trunc + ; AVX512: cost of 2 {{.*}} %V4i16 = trunc %V4i16 = trunc <4 x i16> undef to <4 x i8> ; SSE2: cost of 2 {{.*}} %V8i16 = trunc ; SSSE3: cost of 2 {{.*}} %V8i16 = trunc ; SSE42: cost of 1 {{.*}} %V8i16 = trunc ; AVX: cost of 1 {{.*}} %V8i16 = trunc + ; AVX512: cost of 1 {{.*}} %V8i16 = trunc %V8i16 = trunc <8 x i16> undef to <8 x i8> ; SSE: cost of 3 {{.*}} %V16i16 = trunc ; AVX: cost of 4 {{.*}} %V16i16 = trunc + ; AVX512: cost of 4 {{.*}} %V16i16 = trunc %V16i16 = trunc <16 x i16> undef to <16 x i8> ; SSE: cost of 7 {{.*}} %V32i16 = trunc ; AVX: cost of 9 {{.*}} %V32i16 = trunc + ; AVX512F: cost of 9 {{.*}} %V32i16 = trunc + ; AVX512BW: cost of 0 {{.*}} %V32i16 = trunc %V32i16 = trunc <32 x i16> undef to <32 x i8> ret i32 undef diff --git a/test/Analysis/GlobalsModRef/memset-escape.ll b/test/Analysis/GlobalsModRef/memset-escape.ll index 8da375ad87755..b26f31389058b 100644 --- a/test/Analysis/GlobalsModRef/memset-escape.ll +++ b/test/Analysis/GlobalsModRef/memset-escape.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -O1 -S -enable-non-lto-gmr=true | FileCheck %s +; RUN: opt < %s -O1 -S | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" diff --git a/test/Analysis/GlobalsModRef/no-escape.ll b/test/Analysis/GlobalsModRef/no-escape.ll index d813a92268c69..752763c43478e 100644 --- a/test/Analysis/GlobalsModRef/no-escape.ll +++ b/test/Analysis/GlobalsModRef/no-escape.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -globals-aa -S -enable-non-lto-gmr=true -licm | FileCheck %s +; RUN: opt < %s -basicaa -globals-aa -S -licm | FileCheck %s target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.10.0" diff --git a/test/Analysis/GlobalsModRef/pr12351.ll b/test/Analysis/GlobalsModRef/pr12351.ll index 5d299cd2e9170..5cabd6f1f1202 100644 --- a/test/Analysis/GlobalsModRef/pr12351.ll +++ b/test/Analysis/GlobalsModRef/pr12351.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -basicaa -globals-aa -gvn -S | FileCheck %s +; RUN: opt < %s -basicaa -globals-aa -gvn -S -disable-verify | FileCheck %s declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) define void @foo(i8* %x, i8* %y) { diff --git a/test/Analysis/GlobalsModRef/weak-interposition.ll b/test/Analysis/GlobalsModRef/weak-interposition.ll index 091aa74d52173..8e94d2b64943d 100644 --- a/test/Analysis/GlobalsModRef/weak-interposition.ll +++ b/test/Analysis/GlobalsModRef/weak-interposition.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -O1 -enable-non-lto-gmr=true < %s | FileCheck %s +; RUN: opt -S -O1 < %s | FileCheck %s @a = common global i32 0, align 4 diff --git a/test/Analysis/Lint/noop-cast-expr-no-pointer.ll b/test/Analysis/Lint/noop-cast-expr-no-pointer.ll new file mode 100644 index 0000000000000..b925871142824 --- /dev/null +++ b/test/Analysis/Lint/noop-cast-expr-no-pointer.ll @@ -0,0 +1,23 @@ +; RUN: opt -lint < %s + +; lint shouldn't crash on any of the below functions + +@g_1 = external global [3 x i32] +@g_2 = external global [2 x i32] + +define void @test1() { +entry: + tail call void @f1(i16 zext (i1 icmp eq (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @g_2, i64 0, i64 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* @g_1, i64 0, i64 1)) to i16)) + ret void +} + +declare void @f1(i16) + +define void @test2() { + tail call void inttoptr (i64 sext (i32 ptrtoint (void ()* @f2 to i32) to i64) to void ()*)() + + ret void +} + +declare void @f2() + diff --git a/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll b/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll new file mode 100644 index 0000000000000..b593fc269a7b0 --- /dev/null +++ b/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Previously in this case the max backedge count would be computed as 1/0, which +; is correct but undesirable. It would also not fold as a constant, tripping +; asserts in SCEV. + +define void @pluto(i32 %arg) { +; CHECK-LABEL: Classifying expressions for: @pluto +; CHECK: Loop %bb2: max backedge-taken count is 2 +bb: + %tmp = ashr i32 %arg, 31 + %tmp1 = add nsw i32 %tmp, 2 + br label %bb2 + +bb2: ; preds = %bb2, %bb + %tmp3 = phi i32 [ 0, %bb ], [ %tmp4, %bb2 ] + %tmp4 = add nuw nsw i32 %tmp1, %tmp3 + %tmp5 = icmp ult i32 %tmp4, 2 + br i1 %tmp5, label %bb2, label %bb6 + +bb6: ; preds = %bb2 + ret void +} diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll index d87e7d033a1e1..240ff8de6d6e3 100644 --- a/test/Analysis/ScalarEvolution/max-trip-count.ll +++ b/test/Analysis/ScalarEvolution/max-trip-count.ll @@ -288,3 +288,146 @@ loop.exit: exit: ret i32 0 } + +; The end bound of the loop can change between iterations, so the exact trip +; count is unknown, but SCEV can calculate the max trip count. +define void @changing_end_bound(i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is 2147483646 +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add nsw i32 %iv, 1 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; Similar test as above, but unknown start value. +; Also, there's no nsw on the iv.next, but SCEV knows +; the termination condition is LT, so the IV cannot wrap. +define void @changing_end_bound2(i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound2 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is -1 +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add i32 %iv, 1 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; changing end bound and greater than one stride +define void @changing_end_bound3(i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound3 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: max backedge-taken count is 1073741823 +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add nsw i32 %iv, 4 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; same as above test, but the IV can wrap around. +; so the max backedge taken count is unpredictable. +define void @changing_end_bound4(i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound4 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: Unpredictable max backedge-taken count. +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add i32 %iv, 4 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; unknown stride. Since it's not knownPositive, we do not estimate the max +; backedge taken count. +define void @changing_end_bound5(i32 %stride, i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound5 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: Unpredictable max backedge-taken count. +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add nsw i32 %iv, %stride + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} + +; negative stride value +define void @changing_end_bound6(i32 %start, i32* %n_addr, i32* %addr) { +; CHECK-LABEL: Determining loop execution counts for: @changing_end_bound6 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +; CHECK: Loop %loop: Unpredictable max backedge-taken count. +entry: + br label %loop + +loop: + %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop ] + %acc = phi i32 [ 0, %entry ], [ %acc.next, %loop ] + %val = load atomic i32, i32* %addr unordered, align 4 + fence acquire + %acc.next = add i32 %acc, %val + %iv.next = add nsw i32 %iv, -1 + %n = load atomic i32, i32* %n_addr unordered, align 4 + %cmp = icmp slt i32 %iv.next, %n + br i1 %cmp, label %loop, label %loop.exit + +loop.exit: + ret void +} diff --git a/test/Analysis/ValueTracking/known-signbit-shift.ll b/test/Analysis/ValueTracking/known-signbit-shift.ll index bf984cb7474ad..7e9f1c2e70cd7 100644 --- a/test/Analysis/ValueTracking/known-signbit-shift.ll +++ b/test/Analysis/ValueTracking/known-signbit-shift.ll @@ -27,28 +27,22 @@ define i1 @test_shift_negative(i32 %a, i32 %b) { } ; If sign bit is a known zero, it cannot be a known one. -; This test should not crash opt. +; This test should not crash opt. The shift produces poison. define i32 @test_no_sign_bit_conflict1(i1 %b) { ; CHECK-LABEL: @test_no_sign_bit_conflict1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEL:%.*]] = select i1 %b, i32 -2147221504, i32 -2147483648 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: ret i32 0 ; -entry: %sel = select i1 %b, i32 8193, i32 8192 %mul = shl nsw i32 %sel, 18 ret i32 %mul } ; If sign bit is a known one, it cannot be a known zero. -; This test should not crash opt. +; This test should not crash opt. The shift produces poison. define i32 @test_no_sign_bit_conflict2(i1 %b) { ; CHECK-LABEL: @test_no_sign_bit_conflict2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEL:%.*]] = select i1 %b, i32 2147221504, i32 2146959360 -; CHECK-NEXT: ret i32 [[SEL]] +; CHECK-NEXT: ret i32 0 ; -entry: %sel = select i1 %b, i32 -8193, i32 -8194 %mul = shl nsw i32 %sel, 18 ret i32 %mul diff --git a/test/Assembler/alloca-addrspace-elems.ll b/test/Assembler/alloca-addrspace-elems.ll new file mode 100644 index 0000000000000..8c02760fe95f4 --- /dev/null +++ b/test/Assembler/alloca-addrspace-elems.ll @@ -0,0 +1,25 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s + +target datalayout = "A5" +; CHECK: target datalayout = "A5" + + +; CHECK: %alloca_array_no_align = alloca i32, i32 9, addrspace(5) +; CHECK-NEXT: %alloca_array_align4 = alloca i32, i32 9, align 4, addrspace(5) +; CHECK-NEXT: %alloca_array_no_align_metadata = alloca i32, i32 9, addrspace(5), !foo !0 +; CHECK-NEXT: %alloca_array_align4_metadata = alloca i32, i32 9, align 4, addrspace(5), !foo !0 +; CHECK-NEXT: %alloca_inalloca_array_no_align = alloca inalloca i32, i32 9, addrspace(5) +; CHECK-NEXT: %alloca_inalloca_array_align4_metadata = alloca inalloca i32, i32 9, align 4, addrspace(5), !foo !0 + +define void @use_alloca() { + %alloca_array_no_align = alloca i32, i32 9, addrspace(5) + %alloca_array_align4 = alloca i32, i32 9, align 4, addrspace(5) + %alloca_array_no_align_metadata = alloca i32, i32 9, addrspace(5), !foo !0 + %alloca_array_align4_metadata = alloca i32, i32 9, align 4, addrspace(5), !foo !0 + %alloca_inalloca_array_no_align = alloca inalloca i32, i32 9, addrspace(5) + %alloca_inalloca_array_align4_metadata = alloca inalloca i32, i32 9, align 4, addrspace(5), !foo !0 + + ret void +} + +!0 = !{} diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll index 367158d206d5e..7d4167f4cb0b7 100644 --- a/test/Bitcode/compatibility.ll +++ b/test/Bitcode/compatibility.ll @@ -476,6 +476,14 @@ declare cc93 void @f.cc93() ; CHECK: declare amdgpu_hs void @f.cc93() declare amdgpu_hs void @f.amdgpu_hs() ; CHECK: declare amdgpu_hs void @f.amdgpu_hs() +declare cc95 void @f.cc95() +; CHECK: declare amdgpu_ls void @f.cc95() +declare amdgpu_ls void @f.amdgpu_ls() +; CHECK: declare amdgpu_ls void @f.amdgpu_ls() +declare cc96 void @f.cc96() +; CHECK: declare amdgpu_es void @f.cc96() +declare amdgpu_es void @f.amdgpu_es() +; CHECK: declare amdgpu_es void @f.amdgpu_es() declare cc1023 void @f.cc1023() ; CHECK: declare cc1023 void @f.cc1023() diff --git a/test/Bitcode/upgrade-section-name.ll b/test/Bitcode/upgrade-section-name.ll new file mode 100644 index 0000000000000..fcc7228f5fd99 --- /dev/null +++ b/test/Bitcode/upgrade-section-name.ll @@ -0,0 +1,31 @@ +; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s + +%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* } +%struct._objc_cache = type opaque +%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* } +%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] } +%struct._objc_method = type { i8*, i8*, i8* } +%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] } +%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8**, i8*, %struct._prop_list_t* } +%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] } +%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 } +%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] } +%struct._prop_t = type { i8*, i8* } +%struct._category_t = type { i8*, %struct._class_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._prop_list_t*, %struct._prop_list_t*, i32 } + +@OBJC_CLASS_NAME_ = private unnamed_addr constant [6 x i8] c"Robot\00", section "__TEXT,__objc_classname,cstring_literals", align 1 +@"OBJC_CLASS_$_I" = external global %struct._class_t +@"\01l_OBJC_$_CATEGORY_I_$_Robot" = private global %struct._category_t { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @OBJC_CLASS_NAME_, i32 0, i32 0), %struct._class_t* @"OBJC_CLASS_$_I", %struct.__method_list_t* null, %struct.__method_list_t* null, %struct._objc_protocol_list* null, %struct._prop_list_t* null, %struct._prop_list_t* null, i32 64 }, section "__DATA, __objc_const", align 8 +@"OBJC_LABEL_CATEGORY_$" = private global [1 x i8*] [i8* bitcast (%struct._category_t* @"\01l_OBJC_$_CATEGORY_I_$_Robot" to i8*)], section "__DATA, __objc_catlist, regular, no_dead_strip", align 8 +@llvm.compiler.used = appending global [3 x i8*] [i8* bitcast (%struct._category_t* @"\01l_OBJC_$_CATEGORY_I_$_Robot" to i8*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @OBJC_CLASS_NAME_, i32 0, i32 0), i8* bitcast ([1 x i8*]* @"OBJC_LABEL_CATEGORY_$" to i8*)], section "llvm.metadata" + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5} + +!0 = !{i32 1, !"Objective-C Version", i32 2} +!1 = !{i32 1, !"Objective-C Image Info Version", i32 0} +!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"} +!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0} +!4 = !{i32 1, !"Objective-C Class Properties", i32 64} +!5 = !{i32 1, !"PIC Level", i32 2} + +; CHECK: @"OBJC_LABEL_CATEGORY_$" = {{.*}}, section "__DATA,__objc_catlist,regular,no_dead_strip" diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 33e5da0eb1bb2..efb43955d57dc 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -8,7 +8,9 @@ llvm_canonicalize_cmake_booleans( HAVE_LIBXAR LLVM_ENABLE_DIA_SDK LLVM_ENABLE_FFI - BUILD_SHARED_LIBS) + BUILD_SHARED_LIBS + LLVM_LINK_LLVM_DYLIB + ) configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll b/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll index a70cee0efcb6c..40f65b3774ed7 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-callingconv-ios.ll @@ -4,14 +4,14 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64-apple-ios9.0" ; CHECK-LABEL: name: test_varargs -; CHECK: [[ANSWER:%[0-9]+]](s32) = G_CONSTANT i32 42 -; CHECK: [[D_ONE:%[0-9]+]](s64) = G_FCONSTANT double 1.000000e+00 -; CHECK: [[TWELVE:%[0-9]+]](s64) = G_CONSTANT i64 12 -; CHECK: [[THREE:%[0-9]+]](s8) = G_CONSTANT i8 3 -; CHECK: [[ONE:%[0-9]+]](s16) = G_CONSTANT i16 1 -; CHECK: [[FOUR:%[0-9]+]](s32) = G_CONSTANT i32 4 -; CHECK: [[F_ONE:%[0-9]+]](s32) = G_FCONSTANT float 1.000000e+00 -; CHECK: [[TWO:%[0-9]+]](s64) = G_FCONSTANT double 2.000000e+00 +; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 +; CHECK: [[D_ONE:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 +; CHECK: [[TWELVE:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[THREE:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 +; CHECK: [[ONE:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 +; CHECK: [[FOUR:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 +; CHECK: [[F_ONE:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 +; CHECK: [[TWO:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK: %w0 = COPY [[ANSWER]] ; CHECK: %d0 = COPY [[D_ONE]] diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll b/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll index 59b9bb49f0ee0..3888628fd1edd 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-callingconv.ll @@ -4,14 +4,14 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linux-gnu" ; CHECK-LABEL: name: args_i32 -; CHECK: %[[ARG0:[0-9]+]](s32) = COPY %w0 -; CHECK: %{{[0-9]+}}(s32) = COPY %w1 -; CHECK: %{{[0-9]+}}(s32) = COPY %w2 -; CHECK: %{{[0-9]+}}(s32) = COPY %w3 -; CHECK: %{{[0-9]+}}(s32) = COPY %w4 -; CHECK: %{{[0-9]+}}(s32) = COPY %w5 -; CHECK: %{{[0-9]+}}(s32) = COPY %w6 -; CHECK: %{{[0-9]+}}(s32) = COPY %w7 +; CHECK: %[[ARG0:[0-9]+]]:_(s32) = COPY %w0 +; CHECK: %{{[0-9]+}}:_(s32) = COPY %w1 +; CHECK: %{{[0-9]+}}:_(s32) = COPY %w2 +; CHECK: %{{[0-9]+}}:_(s32) = COPY %w3 +; CHECK: %{{[0-9]+}}:_(s32) = COPY %w4 +; CHECK: %{{[0-9]+}}:_(s32) = COPY %w5 +; CHECK: %{{[0-9]+}}:_(s32) = COPY %w6 +; CHECK: %{{[0-9]+}}:_(s32) = COPY %w7 ; CHECK: %w0 = COPY %[[ARG0]] define i32 @args_i32(i32 %w0, i32 %w1, i32 %w2, i32 %w3, @@ -20,14 +20,14 @@ define i32 @args_i32(i32 %w0, i32 %w1, i32 %w2, i32 %w3, } ; CHECK-LABEL: name: args_i64 -; CHECK: %[[ARG0:[0-9]+]](s64) = COPY %x0 -; CHECK: %{{[0-9]+}}(s64) = COPY %x1 -; CHECK: %{{[0-9]+}}(s64) = COPY %x2 -; CHECK: %{{[0-9]+}}(s64) = COPY %x3 -; CHECK: %{{[0-9]+}}(s64) = COPY %x4 -; CHECK: %{{[0-9]+}}(s64) = COPY %x5 -; CHECK: %{{[0-9]+}}(s64) = COPY %x6 -; CHECK: %{{[0-9]+}}(s64) = COPY %x7 +; CHECK: %[[ARG0:[0-9]+]]:_(s64) = COPY %x0 +; CHECK: %{{[0-9]+}}:_(s64) = COPY %x1 +; CHECK: %{{[0-9]+}}:_(s64) = COPY %x2 +; CHECK: %{{[0-9]+}}:_(s64) = COPY %x3 +; CHECK: %{{[0-9]+}}:_(s64) = COPY %x4 +; CHECK: %{{[0-9]+}}:_(s64) = COPY %x5 +; CHECK: %{{[0-9]+}}:_(s64) = COPY %x6 +; CHECK: %{{[0-9]+}}:_(s64) = COPY %x7 ; CHECK: %x0 = COPY %[[ARG0]] define i64 @args_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7) { @@ -36,14 +36,14 @@ define i64 @args_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3, ; CHECK-LABEL: name: args_ptrs -; CHECK: %[[ARG0:[0-9]+]](p0) = COPY %x0 -; CHECK: %{{[0-9]+}}(p0) = COPY %x1 -; CHECK: %{{[0-9]+}}(p0) = COPY %x2 -; CHECK: %{{[0-9]+}}(p0) = COPY %x3 -; CHECK: %{{[0-9]+}}(p0) = COPY %x4 -; CHECK: %{{[0-9]+}}(p0) = COPY %x5 -; CHECK: %{{[0-9]+}}(p0) = COPY %x6 -; CHECK: %{{[0-9]+}}(p0) = COPY %x7 +; CHECK: %[[ARG0:[0-9]+]]:_(p0) = COPY %x0 +; CHECK: %{{[0-9]+}}:_(p0) = COPY %x1 +; CHECK: %{{[0-9]+}}:_(p0) = COPY %x2 +; CHECK: %{{[0-9]+}}:_(p0) = COPY %x3 +; CHECK: %{{[0-9]+}}:_(p0) = COPY %x4 +; CHECK: %{{[0-9]+}}:_(p0) = COPY %x5 +; CHECK: %{{[0-9]+}}:_(p0) = COPY %x6 +; CHECK: %{{[0-9]+}}:_(p0) = COPY %x7 ; CHECK: %x0 = COPY %[[ARG0]] define i8* @args_ptrs(i8* %x0, i16* %x1, <2 x i8>* %x2, {i8, i16, i32}* %x3, [3 x float]* %x4, double* %x5, i8* %x6, i8* %x7) { @@ -51,27 +51,29 @@ define i8* @args_ptrs(i8* %x0, i16* %x1, <2 x i8>* %x2, {i8, i16, i32}* %x3, } ; CHECK-LABEL: name: args_arr -; CHECK: %[[ARG0:[0-9]+]](s64) = COPY %d0 +; CHECK: %[[ARG0:[0-9]+]]:_(s64) = COPY %d0 ; CHECK: %d0 = COPY %[[ARG0]] define [1 x double] @args_arr([1 x double] %d0) { ret [1 x double] %d0 } ; CHECK-LABEL: name: test_varargs -; CHECK: [[ANSWER:%[0-9]+]](s32) = G_CONSTANT i32 42 -; CHECK: [[D_ONE:%[0-9]+]](s64) = G_FCONSTANT double 1.000000e+00 -; CHECK: [[TWELVE:%[0-9]+]](s64) = G_CONSTANT i64 12 -; CHECK: [[THREE:%[0-9]+]](s8) = G_CONSTANT i8 3 -; CHECK: [[ONE:%[0-9]+]](s16) = G_CONSTANT i16 1 -; CHECK: [[FOUR:%[0-9]+]](s32) = G_CONSTANT i32 4 -; CHECK: [[F_ONE:%[0-9]+]](s32) = G_FCONSTANT float 1.000000e+00 -; CHECK: [[TWO:%[0-9]+]](s64) = G_FCONSTANT double 2.000000e+00 +; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 +; CHECK: [[D_ONE:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 +; CHECK: [[TWELVE:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[THREE:%[0-9]+]]:_(s8) = G_CONSTANT i8 3 +; CHECK: [[ONE:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 +; CHECK: [[FOUR:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 +; CHECK: [[F_ONE:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 +; CHECK: [[TWO:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 ; CHECK: %w0 = COPY [[ANSWER]] ; CHECK: %d0 = COPY [[D_ONE]] ; CHECK: %x1 = COPY [[TWELVE]] -; CHECK: %w2 = COPY [[THREE]](s8) -; CHECK: %w3 = COPY [[ONE]](s16) +; CHECK: [[THREE_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[THREE]] +; CHECK: %w2 = COPY [[THREE_TMP]](s32) +; CHECK: [[ONE_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[ONE]] +; CHECK: %w3 = COPY [[ONE_TMP]](s32) ; CHECK: %w4 = COPY [[FOUR]](s32) ; CHECK: %s1 = COPY [[F_ONE]](s32) ; CHECK: %d2 = COPY [[TWO]](s64) diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index 11fa721fc64fd..25c0e78a7b203 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -43,7 +43,7 @@ define [1 x double] @constant() { ; The key problem here is that we may fail to create an MBB referenced by a ; PHI. If so, we cannot complete the G_PHI and mustn't try or bad things ; happen. -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %vreg5, %vreg2; mem:ST4[%addr] GPR:%vreg5,%vreg2 (in function: pending_phis) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: G_STORE %vreg6, %vreg2; mem:ST4[%addr] GPR:%vreg6,%vreg2 (in function: pending_phis) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for pending_phis ; FALLBACK-WITH-REPORT-OUT-LABEL: pending_phis: define i32 @pending_phis(i1 %tst, i32 %val, i32* %addr) { diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll index cd3ea9715e0fd..62abf3d81d580 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-stackprotect.ll @@ -7,8 +7,8 @@ ; CHECK: - { id: 0, name: StackGuardSlot, type: default, offset: 0, size: 8, alignment: 8, ; CHECK-NOT: id: 1 -; CHECK: [[GUARD_SLOT:%[0-9]+]](p0) = G_FRAME_INDEX %stack.0.StackGuardSlot -; CHECK: [[GUARD:%[0-9]+]](p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) +; CHECK: [[GUARD_SLOT:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.StackGuardSlot +; CHECK: [[GUARD:%[0-9]+]]:gpr64sp(p0) = LOAD_STACK_GUARD :: (dereferenceable invariant load 8 from @__stack_chk_guard) ; CHECK: G_STORE [[GUARD]](p0), [[GUARD_SLOT]](p0) :: (volatile store 8 into %stack.0.StackGuardSlot) declare void @llvm.stackprotector(i8*, i8**) define void @test_stack_guard_remat2() { diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 10cdadd67ac7d..7c67a22e23c8c 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -7,9 +7,9 @@ target triple = "aarch64--" ; Tests for add. ; CHECK-LABEL: name: addi64 -; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1 -; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_ADD [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_ADD [[ARG1]], [[ARG2]] ; CHECK-NEXT: %x0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %x0 define i64 @addi64(i64 %arg1, i64 %arg2) { @@ -18,9 +18,9 @@ define i64 @addi64(i64 %arg1, i64 %arg2) { } ; CHECK-LABEL: name: muli64 -; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1 -; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_MUL [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_MUL [[ARG1]], [[ARG2]] ; CHECK-NEXT: %x0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %x0 define i64 @muli64(i64 %arg1, i64 %arg2) { @@ -32,19 +32,19 @@ define i64 @muli64(i64 %arg1, i64 %arg2) { ; CHECK-LABEL: name: allocai64 ; CHECK: stack: ; CHECK-NEXT: - { id: 0, name: ptr1, type: default, offset: 0, size: 8, alignment: 8, -; CHECK-NEXT: stack-id: 0, callee-saved-register: '', di-variable: '', di-expression: '', -; CHECK-NEXT: di-location: '' } +; CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +; CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } ; CHECK-NEXT: - { id: 1, name: ptr2, type: default, offset: 0, size: 8, alignment: 1, -; CHECK-NEXT: stack-id: 0, callee-saved-register: '', di-variable: '', di-expression: '', -; CHECK-NEXT: di-location: '' } +; CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +; CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } ; CHECK-NEXT: - { id: 2, name: ptr3, type: default, offset: 0, size: 128, alignment: 8, -; CHECK-NEXT: stack-id: 0, callee-saved-register: '', di-variable: '', di-expression: '', -; CHECK-NEXT: di-location: '' } +; CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +; CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } ; CHECK-NEXT: - { id: 3, name: ptr4, type: default, offset: 0, size: 1, alignment: 8, -; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.0.ptr1 -; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.1.ptr2 -; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.2.ptr3 -; CHECK: %{{[0-9]+}}(p0) = G_FRAME_INDEX %stack.3.ptr4 +; CHECK: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %stack.0.ptr1 +; CHECK: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %stack.1.ptr2 +; CHECK: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %stack.2.ptr3 +; CHECK: %{{[0-9]+}}:_(p0) = G_FRAME_INDEX %stack.3.ptr4 define void @allocai64() { %ptr1 = alloca i64 %ptr2 = alloca i64, align 1 @@ -107,10 +107,10 @@ end: ; CHECK-NEXT: successors: %[[TRUE:bb.[0-9]+.true]](0x40000000), ; CHECK: %[[FALSE:bb.[0-9]+.false]](0x40000000) ; -; CHECK: [[ADDR:%.*]](p0) = COPY %x0 +; CHECK: [[ADDR:%.*]]:_(p0) = COPY %x0 ; ; Check that we emit the correct branch. -; CHECK: [[TST:%.*]](s1) = G_LOAD [[ADDR]](p0) +; CHECK: [[TST:%.*]]:_(s1) = G_LOAD [[ADDR]](p0) ; CHECK: G_BRCOND [[TST]](s1), %[[TRUE]] ; CHECK: G_BR %[[FALSE]] ; @@ -135,19 +135,19 @@ false: ; ; CHECK: {{bb.[0-9]+.entry}}: ; CHECK-NEXT: successors: %[[BB_CASE100:bb.[0-9]+.case100]](0x40000000), %[[BB_NOTCASE100_CHECKNEXT:bb.[0-9]+.entry]](0x40000000) -; CHECK: %0(s32) = COPY %w0 -; CHECK: %[[reg100:[0-9]+]](s32) = G_CONSTANT i32 100 -; CHECK: %[[reg200:[0-9]+]](s32) = G_CONSTANT i32 200 -; CHECK: %[[reg0:[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: %[[reg1:[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK: %[[reg2:[0-9]+]](s32) = G_CONSTANT i32 2 -; CHECK: %[[regicmp100:[0-9]+]](s1) = G_ICMP intpred(eq), %[[reg100]](s32), %0 +; CHECK: %0:_(s32) = COPY %w0 +; CHECK: %[[reg100:[0-9]+]]:_(s32) = G_CONSTANT i32 100 +; CHECK: %[[reg200:[0-9]+]]:_(s32) = G_CONSTANT i32 200 +; CHECK: %[[reg0:[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: %[[reg1:[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: %[[reg2:[0-9]+]]:_(s32) = G_CONSTANT i32 2 +; CHECK: %[[regicmp100:[0-9]+]]:_(s1) = G_ICMP intpred(eq), %[[reg100]](s32), %0 ; CHECK: G_BRCOND %[[regicmp100]](s1), %[[BB_CASE100]] ; CHECK: G_BR %[[BB_NOTCASE100_CHECKNEXT]] ; ; CHECK: [[BB_NOTCASE100_CHECKNEXT]]: ; CHECK-NEXT: successors: %[[BB_CASE200:bb.[0-9]+.case200]](0x40000000), %[[BB_NOTCASE200_CHECKNEXT:bb.[0-9]+.entry]](0x40000000) -; CHECK: %[[regicmp200:[0-9]+]](s1) = G_ICMP intpred(eq), %[[reg200]](s32), %0 +; CHECK: %[[regicmp200:[0-9]+]]:_(s1) = G_ICMP intpred(eq), %[[reg200]](s32), %0 ; CHECK: G_BRCOND %[[regicmp200]](s1), %[[BB_CASE200]] ; CHECK: G_BR %[[BB_NOTCASE200_CHECKNEXT]] ; @@ -157,20 +157,20 @@ false: ; ; CHECK: [[BB_DEFAULT]]: ; CHECK-NEXT: successors: %[[BB_RET:bb.[0-9]+.return]](0x80000000) -; CHECK: %[[regretdefault:[0-9]+]](s32) = G_ADD %0, %[[reg0]] +; CHECK: %[[regretdefault:[0-9]+]]:_(s32) = G_ADD %0, %[[reg0]] ; CHECK: G_BR %[[BB_RET]] ; ; CHECK: [[BB_CASE100]]: ; CHECK-NEXT: successors: %[[BB_RET:bb.[0-9]+.return]](0x80000000) -; CHECK: %[[regretc100:[0-9]+]](s32) = G_ADD %0, %[[reg1]] +; CHECK: %[[regretc100:[0-9]+]]:_(s32) = G_ADD %0, %[[reg1]] ; CHECK: G_BR %[[BB_RET]] ; ; CHECK: [[BB_CASE200]]: ; CHECK-NEXT: successors: %[[BB_RET]](0x80000000) -; CHECK: %[[regretc200:[0-9]+]](s32) = G_ADD %0, %[[reg2]] +; CHECK: %[[regretc200:[0-9]+]]:_(s32) = G_ADD %0, %[[reg2]] ; ; CHECK: [[BB_RET]]: -; CHECK-NEXT: %[[regret:[0-9]+]](s32) = G_PHI %[[regretdefault]](s32), %[[BB_DEFAULT]], %[[regretc100]](s32), %[[BB_CASE100]] +; CHECK-NEXT: %[[regret:[0-9]+]]:_(s32) = G_PHI %[[regretdefault]](s32), %[[BB_DEFAULT]], %[[regretc100]](s32), %[[BB_CASE100]] ; CHECK: %w0 = COPY %[[regret]](s32) ; CHECK: RET_ReallyLR implicit %w0 ; @@ -289,9 +289,9 @@ L2: ; preds = %L1 ; Tests for or. ; CHECK-LABEL: name: ori64 -; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1 -; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_OR [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_OR [[ARG1]], [[ARG2]] ; CHECK-NEXT: %x0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %x0 define i64 @ori64(i64 %arg1, i64 %arg2) { @@ -300,9 +300,9 @@ define i64 @ori64(i64 %arg1, i64 %arg2) { } ; CHECK-LABEL: name: ori32 -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_OR [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_OR [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @ori32(i32 %arg1, i32 %arg2) { @@ -312,9 +312,9 @@ define i32 @ori32(i32 %arg1, i32 %arg2) { ; Tests for xor. ; CHECK-LABEL: name: xori64 -; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1 -; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_XOR [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_XOR [[ARG1]], [[ARG2]] ; CHECK-NEXT: %x0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %x0 define i64 @xori64(i64 %arg1, i64 %arg2) { @@ -323,9 +323,9 @@ define i64 @xori64(i64 %arg1, i64 %arg2) { } ; CHECK-LABEL: name: xori32 -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_XOR [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_XOR [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @xori32(i32 %arg1, i32 %arg2) { @@ -335,9 +335,9 @@ define i32 @xori32(i32 %arg1, i32 %arg2) { ; Tests for and. ; CHECK-LABEL: name: andi64 -; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1 -; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_AND [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_AND [[ARG1]], [[ARG2]] ; CHECK-NEXT: %x0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %x0 define i64 @andi64(i64 %arg1, i64 %arg2) { @@ -346,9 +346,9 @@ define i64 @andi64(i64 %arg1, i64 %arg2) { } ; CHECK-LABEL: name: andi32 -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_AND [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_AND [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @andi32(i32 %arg1, i32 %arg2) { @@ -358,9 +358,9 @@ define i32 @andi32(i32 %arg1, i32 %arg2) { ; Tests for sub. ; CHECK-LABEL: name: subi64 -; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s64) = COPY %x1 -; CHECK-NEXT: [[RES:%[0-9]+]](s64) = G_SUB [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s64) = G_SUB [[ARG1]], [[ARG2]] ; CHECK-NEXT: %x0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %x0 define i64 @subi64(i64 %arg1, i64 %arg2) { @@ -369,9 +369,9 @@ define i64 @subi64(i64 %arg1, i64 %arg2) { } ; CHECK-LABEL: name: subi32 -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_SUB [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_SUB [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @subi32(i32 %arg1, i32 %arg2) { @@ -380,8 +380,8 @@ define i32 @subi32(i32 %arg1, i32 %arg2) { } ; CHECK-LABEL: name: ptrtoint -; CHECK: [[ARG1:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[RES:%[0-9]+]](s64) = G_PTRTOINT [[ARG1]] +; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_PTRTOINT [[ARG1]] ; CHECK: %x0 = COPY [[RES]] ; CHECK: RET_ReallyLR implicit %x0 define i64 @ptrtoint(i64* %a) { @@ -390,8 +390,8 @@ define i64 @ptrtoint(i64* %a) { } ; CHECK-LABEL: name: inttoptr -; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0 -; CHECK: [[RES:%[0-9]+]](p0) = G_INTTOPTR [[ARG1]] +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[RES:%[0-9]+]]:_(p0) = G_INTTOPTR [[ARG1]] ; CHECK: %x0 = COPY [[RES]] ; CHECK: RET_ReallyLR implicit %x0 define i64* @inttoptr(i64 %a) { @@ -400,7 +400,7 @@ define i64* @inttoptr(i64 %a) { } ; CHECK-LABEL: name: trivial_bitcast -; CHECK: [[ARG1:%[0-9]+]](p0) = COPY %x0 +; CHECK: [[ARG1:%[0-9]+]]:_(p0) = COPY %x0 ; CHECK: %x0 = COPY [[ARG1]] ; CHECK: RET_ReallyLR implicit %x0 define i64* @trivial_bitcast(i8* %a) { @@ -409,13 +409,13 @@ define i64* @trivial_bitcast(i8* %a) { } ; CHECK-LABEL: name: trivial_bitcast_with_copy -; CHECK: [[A:%[0-9]+]](p0) = COPY %x0 +; CHECK: [[A:%[0-9]+]]:_(p0) = COPY %x0 ; CHECK: G_BR %[[CAST:bb\.[0-9]+.cast]] ; CHECK: [[END:bb\.[0-9]+.end]]: ; CHECK: [[CAST]]: -; CHECK: {{%[0-9]+}}(p0) = COPY [[A]] +; CHECK: {{%[0-9]+}}:_(p0) = COPY [[A]] ; CHECK: G_BR %[[END]] define i64* @trivial_bitcast_with_copy(i8* %a) { br label %cast @@ -429,9 +429,9 @@ cast: } ; CHECK-LABEL: name: bitcast -; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0 -; CHECK: [[RES1:%[0-9]+]](<2 x s32>) = G_BITCAST [[ARG1]] -; CHECK: [[RES2:%[0-9]+]](s64) = G_BITCAST [[RES1]] +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[RES1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[ARG1]] +; CHECK: [[RES2:%[0-9]+]]:_(s64) = G_BITCAST [[RES1]] ; CHECK: %x0 = COPY [[RES2]] ; CHECK: RET_ReallyLR implicit %x0 define i64 @bitcast(i64 %a) { @@ -441,10 +441,10 @@ define i64 @bitcast(i64 %a) { } ; CHECK-LABEL: name: trunc -; CHECK: [[ARG1:%[0-9]+]](s64) = COPY %x0 -; CHECK: [[VEC:%[0-9]+]](<4 x s32>) = G_LOAD -; CHECK: [[RES1:%[0-9]+]](s8) = G_TRUNC [[ARG1]] -; CHECK: [[RES2:%[0-9]+]](<4 x s16>) = G_TRUNC [[VEC]] +; CHECK: [[ARG1:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[VEC:%[0-9]+]]:_(<4 x s32>) = G_LOAD +; CHECK: [[RES1:%[0-9]+]]:_(s8) = G_TRUNC [[ARG1]] +; CHECK: [[RES2:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[VEC]] define void @trunc(i64 %a) { %vecptr = alloca <4 x i32> %vec = load <4 x i32>, <4 x i32>* %vecptr @@ -454,13 +454,13 @@ define void @trunc(i64 %a) { } ; CHECK-LABEL: name: load -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[ADDR42:%[0-9]+]](p42) = COPY %x1 -; CHECK: [[VAL1:%[0-9]+]](s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr, align 16) -; CHECK: [[VAL2:%[0-9]+]](s64) = G_LOAD [[ADDR42]](p42) :: (load 8 from %ir.addr42) -; CHECK: [[SUM2:%.*]](s64) = G_ADD [[VAL1]], [[VAL2]] -; CHECK: [[VAL3:%[0-9]+]](s64) = G_LOAD [[ADDR]](p0) :: (volatile load 8 from %ir.addr) -; CHECK: [[SUM3:%[0-9]+]](s64) = G_ADD [[SUM2]], [[VAL3]] +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[ADDR42:%[0-9]+]]:_(p42) = COPY %x1 +; CHECK: [[VAL1:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr, align 16) +; CHECK: [[VAL2:%[0-9]+]]:_(s64) = G_LOAD [[ADDR42]](p42) :: (load 8 from %ir.addr42) +; CHECK: [[SUM2:%.*]]:_(s64) = G_ADD [[VAL1]], [[VAL2]] +; CHECK: [[VAL3:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (volatile load 8 from %ir.addr) +; CHECK: [[SUM3:%[0-9]+]]:_(s64) = G_ADD [[SUM2]], [[VAL3]] ; CHECK: %x0 = COPY [[SUM3]] ; CHECK: RET_ReallyLR implicit %x0 define i64 @load(i64* %addr, i64 addrspace(42)* %addr42) { @@ -475,10 +475,10 @@ define i64 @load(i64* %addr, i64 addrspace(42)* %addr42) { } ; CHECK-LABEL: name: store -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[ADDR42:%[0-9]+]](p42) = COPY %x1 -; CHECK: [[VAL1:%[0-9]+]](s64) = COPY %x2 -; CHECK: [[VAL2:%[0-9]+]](s64) = COPY %x3 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[ADDR42:%[0-9]+]]:_(p42) = COPY %x1 +; CHECK: [[VAL1:%[0-9]+]]:_(s64) = COPY %x2 +; CHECK: [[VAL2:%[0-9]+]]:_(s64) = COPY %x3 ; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr, align 16) ; CHECK: G_STORE [[VAL2]](s64), [[ADDR42]](p42) :: (store 8 into %ir.addr42) ; CHECK: G_STORE [[VAL1]](s64), [[ADDR]](p0) :: (volatile store 8 into %ir.addr) @@ -492,12 +492,12 @@ define void @store(i64* %addr, i64 addrspace(42)* %addr42, i64 %val1, i64 %val2) } ; CHECK-LABEL: name: intrinsics -; CHECK: [[CUR:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[BITS:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[CREG:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[PTR:%[0-9]+]](p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), [[CREG]] -; CHECK: [[PTR_VEC:%[0-9]+]](p0) = G_FRAME_INDEX %stack.0.ptr.vec -; CHECK: [[VEC:%[0-9]+]](<8 x s8>) = G_LOAD [[PTR_VEC]] +; CHECK: [[CUR:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[BITS:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[CREG:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), [[CREG]] +; CHECK: [[PTR_VEC:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr.vec +; CHECK: [[VEC:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_VEC]] ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), [[VEC]](<8 x s8>), [[VEC]](<8 x s8>), [[PTR]](p0) ; CHECK: RET_ReallyLR declare i8* @llvm.returnaddress(i32) @@ -516,12 +516,12 @@ define void @intrinsics(i32 %cur, i32 %bits) { ; CHECK: G_BR %[[FALSE:bb\.[0-9]+.false]] ; CHECK: [[TRUE]]: -; CHECK: [[RES1:%[0-9]+]](s32) = G_LOAD +; CHECK: [[RES1:%[0-9]+]]:_(s32) = G_LOAD ; CHECK: [[FALSE]]: -; CHECK: [[RES2:%[0-9]+]](s32) = G_LOAD +; CHECK: [[RES2:%[0-9]+]]:_(s32) = G_LOAD -; CHECK: [[RES:%[0-9]+]](s32) = G_PHI [[RES1]](s32), %[[TRUE]], [[RES2]](s32), %[[FALSE]] +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_PHI [[RES1]](s32), %[[TRUE]], [[RES2]](s32), %[[FALSE]] ; CHECK: %w0 = COPY [[RES]] define i32 @test_phi(i32* %addr1, i32* %addr2, i1 %tst) { br i1 %tst, label %true, label %false @@ -551,13 +551,13 @@ define void @unreachable(i32 %a) { ; It's important that constants are after argument passing, but before the ; rest of the entry block. ; CHECK-LABEL: name: constant_int -; CHECK: [[IN:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[ONE:%[0-9]+]](s32) = G_CONSTANT i32 1 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[ONE:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: {{bb.[0-9]+}}.next: -; CHECK: [[SUM1:%[0-9]+]](s32) = G_ADD [[IN]], [[ONE]] -; CHECK: [[SUM2:%[0-9]+]](s32) = G_ADD [[IN]], [[ONE]] -; CHECK: [[RES:%[0-9]+]](s32) = G_ADD [[SUM1]], [[SUM2]] +; CHECK: [[SUM1:%[0-9]+]]:_(s32) = G_ADD [[IN]], [[ONE]] +; CHECK: [[SUM2:%[0-9]+]]:_(s32) = G_ADD [[IN]], [[ONE]] +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ADD [[SUM1]], [[SUM2]] ; CHECK: %w0 = COPY [[RES]] define i32 @constant_int(i32 %in) { @@ -571,24 +571,24 @@ next: } ; CHECK-LABEL: name: constant_int_start -; CHECK: [[TWO:%[0-9]+]](s32) = G_CONSTANT i32 2 -; CHECK: [[ANSWER:%[0-9]+]](s32) = G_CONSTANT i32 42 -; CHECK: [[RES:%[0-9]+]](s32) = G_ADD [[TWO]], [[ANSWER]] +; CHECK: [[TWO:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_ADD [[TWO]], [[ANSWER]] define i32 @constant_int_start() { %res = add i32 2, 42 ret i32 %res } ; CHECK-LABEL: name: test_undef -; CHECK: [[UNDEF:%[0-9]+]](s32) = G_IMPLICIT_DEF +; CHECK: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: %w0 = COPY [[UNDEF]] define i32 @test_undef() { ret i32 undef } ; CHECK-LABEL: name: test_constant_inttoptr -; CHECK: [[ONE:%[0-9]+]](s64) = G_CONSTANT i64 1 -; CHECK: [[PTR:%[0-9]+]](p0) = G_INTTOPTR [[ONE]] +; CHECK: [[ONE:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 +; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ONE]] ; CHECK: %x0 = COPY [[PTR]] define i8* @test_constant_inttoptr() { ret i8* inttoptr(i64 1 to i8*) @@ -597,15 +597,15 @@ define i8* @test_constant_inttoptr() { ; This failed purely because the Constant -> VReg map was kept across ; functions, so reuse the "i64 1" from above. ; CHECK-LABEL: name: test_reused_constant -; CHECK: [[ONE:%[0-9]+]](s64) = G_CONSTANT i64 1 +; CHECK: [[ONE:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK: %x0 = COPY [[ONE]] define i64 @test_reused_constant() { ret i64 1 } ; CHECK-LABEL: name: test_sext -; CHECK: [[IN:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[RES:%[0-9]+]](s64) = G_SEXT [[IN]] +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_SEXT [[IN]] ; CHECK: %x0 = COPY [[RES]] define i64 @test_sext(i32 %in) { %res = sext i32 %in to i64 @@ -613,8 +613,8 @@ define i64 @test_sext(i32 %in) { } ; CHECK-LABEL: name: test_zext -; CHECK: [[IN:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[RES:%[0-9]+]](s64) = G_ZEXT [[IN]] +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_ZEXT [[IN]] ; CHECK: %x0 = COPY [[RES]] define i64 @test_zext(i32 %in) { %res = zext i32 %in to i64 @@ -622,9 +622,9 @@ define i64 @test_zext(i32 %in) { } ; CHECK-LABEL: name: test_shl -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_SHL [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_SHL [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @test_shl(i32 %arg1, i32 %arg2) { @@ -634,9 +634,9 @@ define i32 @test_shl(i32 %arg1, i32 %arg2) { ; CHECK-LABEL: name: test_lshr -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_LSHR [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_LSHR [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @test_lshr(i32 %arg1, i32 %arg2) { @@ -645,9 +645,9 @@ define i32 @test_lshr(i32 %arg1, i32 %arg2) { } ; CHECK-LABEL: name: test_ashr -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_ASHR [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_ASHR [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @test_ashr(i32 %arg1, i32 %arg2) { @@ -656,9 +656,9 @@ define i32 @test_ashr(i32 %arg1, i32 %arg2) { } ; CHECK-LABEL: name: test_sdiv -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_SDIV [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_SDIV [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @test_sdiv(i32 %arg1, i32 %arg2) { @@ -667,9 +667,9 @@ define i32 @test_sdiv(i32 %arg1, i32 %arg2) { } ; CHECK-LABEL: name: test_udiv -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_UDIV [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_UDIV [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @test_udiv(i32 %arg1, i32 %arg2) { @@ -678,9 +678,9 @@ define i32 @test_udiv(i32 %arg1, i32 %arg2) { } ; CHECK-LABEL: name: test_srem -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_SREM [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_SREM [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @test_srem(i32 %arg1, i32 %arg2) { @@ -689,9 +689,9 @@ define i32 @test_srem(i32 %arg1, i32 %arg2) { } ; CHECK-LABEL: name: test_urem -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %w1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_UREM [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_UREM [[ARG1]], [[ARG2]] ; CHECK-NEXT: %w0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %w0 define i32 @test_urem(i32 %arg1, i32 %arg2) { @@ -700,15 +700,15 @@ define i32 @test_urem(i32 %arg1, i32 %arg2) { } ; CHECK-LABEL: name: test_constant_null -; CHECK: [[NULL:%[0-9]+]](p0) = G_CONSTANT i64 0 +; CHECK: [[NULL:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK: %x0 = COPY [[NULL]] define i8* @test_constant_null() { ret i8* null } ; CHECK-LABEL: name: test_struct_memops -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]](s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr, align 4) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[VAL:%[0-9]+]]:_(s64) = G_LOAD [[ADDR]](p0) :: (load 8 from %ir.addr, align 4) ; CHECK: G_STORE [[VAL]](s64), [[ADDR]](p0) :: (store 8 into %ir.addr, align 4) define void @test_struct_memops({ i8, i32 }* %addr) { %val = load { i8, i32 }, { i8, i32 }* %addr @@ -717,8 +717,8 @@ define void @test_struct_memops({ i8, i32 }* %addr) { } ; CHECK-LABEL: name: test_i1_memops -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]](s1) = G_LOAD [[ADDR]](p0) :: (load 1 from %ir.addr) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[VAL:%[0-9]+]]:_(s1) = G_LOAD [[ADDR]](p0) :: (load 1 from %ir.addr) ; CHECK: G_STORE [[VAL]](s1), [[ADDR]](p0) :: (store 1 into %ir.addr) define void @test_i1_memops(i1* %addr) { %val = load i1, i1* %addr @@ -727,10 +727,10 @@ define void @test_i1_memops(i1* %addr) { } ; CHECK-LABEL: name: int_comparison -; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2 -; CHECK: [[TST:%[0-9]+]](s1) = G_ICMP intpred(ne), [[LHS]](s32), [[RHS]] +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[TST:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LHS]](s32), [[RHS]] ; CHECK: G_STORE [[TST]](s1), [[ADDR]](p0) define void @int_comparison(i32 %a, i32 %b, i1* %addr) { %res = icmp ne i32 %a, %b @@ -739,10 +739,10 @@ define void @int_comparison(i32 %a, i32 %b, i1* %addr) { } ; CHECK-LABEL: name: ptr_comparison -; CHECK: [[LHS:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[RHS:%[0-9]+]](p0) = COPY %x1 -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2 -; CHECK: [[TST:%[0-9]+]](s1) = G_ICMP intpred(eq), [[LHS]](p0), [[RHS]] +; CHECK: [[LHS:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[RHS:%[0-9]+]]:_(p0) = COPY %x1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[TST:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LHS]](p0), [[RHS]] ; CHECK: G_STORE [[TST]](s1), [[ADDR]](p0) define void @ptr_comparison(i8* %a, i8* %b, i1* %addr) { %res = icmp eq i8* %a, %b @@ -751,9 +751,9 @@ define void @ptr_comparison(i8* %a, i8* %b, i1* %addr) { } ; CHECK-LABEL: name: test_fadd -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %s0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %s1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_FADD [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FADD [[ARG1]], [[ARG2]] ; CHECK-NEXT: %s0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %s0 define float @test_fadd(float %arg1, float %arg2) { @@ -762,9 +762,9 @@ define float @test_fadd(float %arg1, float %arg2) { } ; CHECK-LABEL: name: test_fsub -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %s0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %s1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_FSUB [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FSUB [[ARG1]], [[ARG2]] ; CHECK-NEXT: %s0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %s0 define float @test_fsub(float %arg1, float %arg2) { @@ -773,9 +773,9 @@ define float @test_fsub(float %arg1, float %arg2) { } ; CHECK-LABEL: name: test_fmul -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %s0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %s1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_FMUL [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FMUL [[ARG1]], [[ARG2]] ; CHECK-NEXT: %s0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %s0 define float @test_fmul(float %arg1, float %arg2) { @@ -784,9 +784,9 @@ define float @test_fmul(float %arg1, float %arg2) { } ; CHECK-LABEL: name: test_fdiv -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %s0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %s1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_FDIV [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FDIV [[ARG1]], [[ARG2]] ; CHECK-NEXT: %s0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %s0 define float @test_fdiv(float %arg1, float %arg2) { @@ -795,9 +795,9 @@ define float @test_fdiv(float %arg1, float %arg2) { } ; CHECK-LABEL: name: test_frem -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %s0 -; CHECK-NEXT: [[ARG2:%[0-9]+]](s32) = COPY %s1 -; CHECK-NEXT: [[RES:%[0-9]+]](s32) = G_FREM [[ARG1]], [[ARG2]] +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK-NEXT: [[ARG2:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK-NEXT: [[RES:%[0-9]+]]:_(s32) = G_FREM [[ARG1]], [[ARG2]] ; CHECK-NEXT: %s0 = COPY [[RES]] ; CHECK-NEXT: RET_ReallyLR implicit %s0 define float @test_frem(float %arg1, float %arg2) { @@ -806,13 +806,13 @@ define float @test_frem(float %arg1, float %arg2) { } ; CHECK-LABEL: name: test_sadd_overflow -; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2 -; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_SADDO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SADDO [[LHS]], [[RHS]] +; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 ; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { @@ -822,14 +822,14 @@ define void @test_sadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { } ; CHECK-LABEL: name: test_uadd_overflow -; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2 -; CHECK: [[ZERO:%[0-9]+]](s1) = G_CONSTANT i1 false -; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_UADDE [[LHS]], [[RHS]], [[ZERO]] -; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[ZERO:%[0-9]+]]:_(s1) = G_CONSTANT i1 false +; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UADDE [[LHS]], [[RHS]], [[ZERO]] +; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 ; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { @@ -839,13 +839,13 @@ define void @test_uadd_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { } ; CHECK-LABEL: name: test_ssub_overflow -; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2 -; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_SSUBO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SSUBO [[LHS]], [[RHS]] +; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 ; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { @@ -855,14 +855,14 @@ define void @test_ssub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { } ; CHECK-LABEL: name: test_usub_overflow -; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2 -; CHECK: [[ZERO:%[0-9]+]](s1) = G_CONSTANT i1 false -; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_USUBE [[LHS]], [[RHS]], [[ZERO]] -; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[ZERO:%[0-9]+]]:_(s1) = G_CONSTANT i1 false +; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_USUBE [[LHS]], [[RHS]], [[ZERO]] +; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 ; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { @@ -872,13 +872,13 @@ define void @test_usub_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %subr) { } ; CHECK-LABEL: name: test_smul_overflow -; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2 -; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_SMULO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_SMULO [[LHS]], [[RHS]] +; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 ; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { @@ -888,13 +888,13 @@ define void @test_smul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { } ; CHECK-LABEL: name: test_umul_overflow -; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2 -; CHECK: [[VAL:%[0-9]+]](s32), [[OVERFLOW:%[0-9]+]](s1) = G_UMULO [[LHS]], [[RHS]] -; CHECK: [[TMP:%[0-9]+]](s64) = G_IMPLICIT_DEF -; CHECK: [[TMP1:%[0-9]+]](s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 -; CHECK: [[RES:%[0-9]+]](s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[VAL:%[0-9]+]]:_(s32), [[OVERFLOW:%[0-9]+]]:_(s1) = G_UMULO [[LHS]], [[RHS]] +; CHECK: [[TMP:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF +; CHECK: [[TMP1:%[0-9]+]]:_(s64) = G_INSERT [[TMP]], [[VAL]](s32), 0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_INSERT [[TMP1]], [[OVERFLOW]](s1), 32 ; CHECK: G_STORE [[RES]](s64), [[ADDR]](p0) declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { @@ -904,8 +904,8 @@ define void @test_umul_overflow(i32 %lhs, i32 %rhs, { i32, i1 }* %addr) { } ; CHECK-LABEL: name: test_extractvalue -; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD -; CHECK: [[RES:%[0-9]+]](s32) = G_EXTRACT [[STRUCT]](s128), 64 +; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT [[STRUCT]](s128), 64 ; CHECK: %w0 = COPY [[RES]] %struct.nested = type {i8, { i8, i32 }, i32} define i32 @test_extractvalue(%struct.nested* %addr) { @@ -915,8 +915,8 @@ define i32 @test_extractvalue(%struct.nested* %addr) { } ; CHECK-LABEL: name: test_extractvalue_agg -; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD -; CHECK: [[RES:%[0-9]+]](s64) = G_EXTRACT [[STRUCT]](s128), 32 +; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 32 ; CHECK: G_STORE [[RES]] define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { %struct = load %struct.nested, %struct.nested* %addr @@ -926,9 +926,9 @@ define void @test_extractvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { } ; CHECK-LABEL: name: test_insertvalue -; CHECK: [[VAL:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD -; CHECK: [[NEWSTRUCT:%[0-9]+]](s128) = G_INSERT [[STRUCT]], [[VAL]](s32), 64 +; CHECK: [[VAL:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD +; CHECK: [[NEWSTRUCT:%[0-9]+]]:_(s128) = G_INSERT [[STRUCT]], [[VAL]](s32), 64 ; CHECK: G_STORE [[NEWSTRUCT]](s128), define void @test_insertvalue(%struct.nested* %addr, i32 %val) { %struct = load %struct.nested, %struct.nested* %addr @@ -939,9 +939,9 @@ define void @test_insertvalue(%struct.nested* %addr, i32 %val) { define [1 x i64] @test_trivial_insert([1 x i64] %s, i64 %val) { ; CHECK-LABEL: name: test_trivial_insert -; CHECK: [[STRUCT:%[0-9]+]](s64) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]](s64) = COPY %x1 -; CHECK: [[RES:%[0-9]+]](s64) = COPY [[VAL]](s64) +; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[VAL:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[RES:%[0-9]+]]:_(s64) = COPY [[VAL]](s64) ; CHECK: %x0 = COPY [[RES]] %res = insertvalue [1 x i64] %s, i64 %val, 0 ret [1 x i64] %res @@ -949,18 +949,18 @@ define [1 x i64] @test_trivial_insert([1 x i64] %s, i64 %val) { define [1 x i8*] @test_trivial_insert_ptr([1 x i8*] %s, i8* %val) { ; CHECK-LABEL: name: test_trivial_insert_ptr -; CHECK: [[STRUCT:%[0-9]+]](s64) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]](p0) = COPY %x1 -; CHECK: [[RES:%[0-9]+]](s64) = G_PTRTOINT [[VAL]](p0) +; CHECK: [[STRUCT:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[VAL:%[0-9]+]]:_(p0) = COPY %x1 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_PTRTOINT [[VAL]](p0) ; CHECK: %x0 = COPY [[RES]] %res = insertvalue [1 x i8*] %s, i8* %val, 0 ret [1 x i8*] %res } ; CHECK-LABEL: name: test_insertvalue_agg -; CHECK: [[SMALLSTRUCT:%[0-9]+]](s64) = G_LOAD -; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD -; CHECK: [[RES:%[0-9]+]](s128) = G_INSERT [[STRUCT]], [[SMALLSTRUCT]](s64), 32 +; CHECK: [[SMALLSTRUCT:%[0-9]+]]:_(s64) = G_LOAD +; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD +; CHECK: [[RES:%[0-9]+]]:_(s128) = G_INSERT [[STRUCT]], [[SMALLSTRUCT]](s64), 32 ; CHECK: G_STORE [[RES]](s128) define void @test_insertvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { %smallstruct = load {i8, i32}, {i8, i32}* %addr2 @@ -971,10 +971,11 @@ define void @test_insertvalue_agg(%struct.nested* %addr, {i8, i32}* %addr2) { } ; CHECK-LABEL: name: test_select -; CHECK: [[TST:%[0-9]+]](s1) = COPY %w0 -; CHECK: [[LHS:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[RHS:%[0-9]+]](s32) = COPY %w2 -; CHECK: [[RES:%[0-9]+]](s32) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]] +; CHECK: [[TST_C:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[TST:%[0-9]+]]:_(s1) = G_TRUNC [[TST_C]] +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %w2 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]] ; CHECK: %w0 = COPY [[RES]] define i32 @test_select(i1 %tst, i32 %lhs, i32 %rhs) { %res = select i1 %tst, i32 %lhs, i32 %rhs @@ -982,10 +983,11 @@ define i32 @test_select(i1 %tst, i32 %lhs, i32 %rhs) { } ; CHECK-LABEL: name: test_select_ptr -; CHECK: [[TST:%[0-9]+]](s1) = COPY %w0 -; CHECK: [[LHS:%[0-9]+]](p0) = COPY %x1 -; CHECK: [[RHS:%[0-9]+]](p0) = COPY %x2 -; CHECK: [[RES:%[0-9]+]](p0) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]] +; CHECK: [[TST_C:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[TST:%[0-9]+]]:_(s1) = G_TRUNC [[TST_C]] +; CHECK: [[LHS:%[0-9]+]]:_(p0) = COPY %x1 +; CHECK: [[RHS:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[RES:%[0-9]+]]:_(p0) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]] ; CHECK: %x0 = COPY [[RES]] define i8* @test_select_ptr(i1 %tst, i8* %lhs, i8* %rhs) { %res = select i1 %tst, i8* %lhs, i8* %rhs @@ -993,10 +995,11 @@ define i8* @test_select_ptr(i1 %tst, i8* %lhs, i8* %rhs) { } ; CHECK-LABEL: name: test_select_vec -; CHECK: [[TST:%[0-9]+]](s1) = COPY %w0 -; CHECK: [[LHS:%[0-9]+]](<4 x s32>) = COPY %q0 -; CHECK: [[RHS:%[0-9]+]](<4 x s32>) = COPY %q1 -; CHECK: [[RES:%[0-9]+]](<4 x s32>) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]] +; CHECK: [[TST_C:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[TST:%[0-9]+]]:_(s1) = G_TRUNC [[TST_C]] +; CHECK: [[LHS:%[0-9]+]]:_(<4 x s32>) = COPY %q0 +; CHECK: [[RHS:%[0-9]+]]:_(<4 x s32>) = COPY %q1 +; CHECK: [[RES:%[0-9]+]]:_(<4 x s32>) = G_SELECT [[TST]](s1), [[LHS]], [[RHS]] ; CHECK: %q0 = COPY [[RES]] define <4 x i32> @test_select_vec(i1 %tst, <4 x i32> %lhs, <4 x i32> %rhs) { %res = select i1 %tst, <4 x i32> %lhs, <4 x i32> %rhs @@ -1004,11 +1007,11 @@ define <4 x i32> @test_select_vec(i1 %tst, <4 x i32> %lhs, <4 x i32> %rhs) { } ; CHECK-LABEL: name: test_vselect_vec -; CHECK: [[TST32:%[0-9]+]](<4 x s32>) = COPY %q0 -; CHECK: [[LHS:%[0-9]+]](<4 x s32>) = COPY %q1 -; CHECK: [[RHS:%[0-9]+]](<4 x s32>) = COPY %q2 -; CHECK: [[TST:%[0-9]+]](<4 x s1>) = G_TRUNC [[TST32]](<4 x s32>) -; CHECK: [[RES:%[0-9]+]](<4 x s32>) = G_SELECT [[TST]](<4 x s1>), [[LHS]], [[RHS]] +; CHECK: [[TST32:%[0-9]+]]:_(<4 x s32>) = COPY %q0 +; CHECK: [[LHS:%[0-9]+]]:_(<4 x s32>) = COPY %q1 +; CHECK: [[RHS:%[0-9]+]]:_(<4 x s32>) = COPY %q2 +; CHECK: [[TST:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[TST32]](<4 x s32>) +; CHECK: [[RES:%[0-9]+]]:_(<4 x s32>) = G_SELECT [[TST]](<4 x s1>), [[LHS]], [[RHS]] ; CHECK: %q0 = COPY [[RES]] define <4 x i32> @test_vselect_vec(<4 x i32> %tst32, <4 x i32> %lhs, <4 x i32> %rhs) { %tst = trunc <4 x i32> %tst32 to <4 x i1> @@ -1017,9 +1020,9 @@ define <4 x i32> @test_vselect_vec(<4 x i32> %tst32, <4 x i32> %lhs, <4 x i32> % } ; CHECK-LABEL: name: test_fptosi -; CHECK: [[FPADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[FP:%[0-9]+]](s32) = G_LOAD [[FPADDR]](p0) -; CHECK: [[RES:%[0-9]+]](s64) = G_FPTOSI [[FP]](s32) +; CHECK: [[FPADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[FP:%[0-9]+]]:_(s32) = G_LOAD [[FPADDR]](p0) +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPTOSI [[FP]](s32) ; CHECK: %x0 = COPY [[RES]] define i64 @test_fptosi(float* %fp.addr) { %fp = load float, float* %fp.addr @@ -1028,9 +1031,9 @@ define i64 @test_fptosi(float* %fp.addr) { } ; CHECK-LABEL: name: test_fptoui -; CHECK: [[FPADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[FP:%[0-9]+]](s32) = G_LOAD [[FPADDR]](p0) -; CHECK: [[RES:%[0-9]+]](s64) = G_FPTOUI [[FP]](s32) +; CHECK: [[FPADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[FP:%[0-9]+]]:_(s32) = G_LOAD [[FPADDR]](p0) +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPTOUI [[FP]](s32) ; CHECK: %x0 = COPY [[RES]] define i64 @test_fptoui(float* %fp.addr) { %fp = load float, float* %fp.addr @@ -1039,9 +1042,9 @@ define i64 @test_fptoui(float* %fp.addr) { } ; CHECK-LABEL: name: test_sitofp -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[IN:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[FP:%[0-9]+]](s64) = G_SITOFP [[IN]](s32) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[FP:%[0-9]+]]:_(s64) = G_SITOFP [[IN]](s32) ; CHECK: G_STORE [[FP]](s64), [[ADDR]](p0) define void @test_sitofp(double* %addr, i32 %in) { %fp = sitofp i32 %in to double @@ -1050,9 +1053,9 @@ define void @test_sitofp(double* %addr, i32 %in) { } ; CHECK-LABEL: name: test_uitofp -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[IN:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[FP:%[0-9]+]](s64) = G_UITOFP [[IN]](s32) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[FP:%[0-9]+]]:_(s64) = G_UITOFP [[IN]](s32) ; CHECK: G_STORE [[FP]](s64), [[ADDR]](p0) define void @test_uitofp(double* %addr, i32 %in) { %fp = uitofp i32 %in to double @@ -1061,8 +1064,8 @@ define void @test_uitofp(double* %addr, i32 %in) { } ; CHECK-LABEL: name: test_fpext -; CHECK: [[IN:%[0-9]+]](s32) = COPY %s0 -; CHECK: [[RES:%[0-9]+]](s64) = G_FPEXT [[IN]](s32) +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FPEXT [[IN]](s32) ; CHECK: %d0 = COPY [[RES]] define double @test_fpext(float %in) { %res = fpext float %in to double @@ -1070,8 +1073,8 @@ define double @test_fpext(float %in) { } ; CHECK-LABEL: name: test_fptrunc -; CHECK: [[IN:%[0-9]+]](s64) = COPY %d0 -; CHECK: [[RES:%[0-9]+]](s32) = G_FPTRUNC [[IN]](s64) +; CHECK: [[IN:%[0-9]+]]:_(s64) = COPY %d0 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FPTRUNC [[IN]](s64) ; CHECK: %s0 = COPY [[RES]] define float @test_fptrunc(double %in) { %res = fptrunc double %in to float @@ -1079,8 +1082,8 @@ define float @test_fptrunc(double %in) { } ; CHECK-LABEL: name: test_constant_float -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[TMP:%[0-9]+]](s32) = G_FCONSTANT float 1.500000e+00 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[TMP:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.500000e+00 ; CHECK: G_STORE [[TMP]](s32), [[ADDR]](p0) define void @test_constant_float(float* %addr) { store float 1.5, float* %addr @@ -1088,12 +1091,12 @@ define void @test_constant_float(float* %addr) { } ; CHECK-LABEL: name: float_comparison -; CHECK: [[LHSADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[RHSADDR:%[0-9]+]](p0) = COPY %x1 -; CHECK: [[BOOLADDR:%[0-9]+]](p0) = COPY %x2 -; CHECK: [[LHS:%[0-9]+]](s32) = G_LOAD [[LHSADDR]](p0) -; CHECK: [[RHS:%[0-9]+]](s32) = G_LOAD [[RHSADDR]](p0) -; CHECK: [[TST:%[0-9]+]](s1) = G_FCMP floatpred(oge), [[LHS]](s32), [[RHS]] +; CHECK: [[LHSADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[RHSADDR:%[0-9]+]]:_(p0) = COPY %x1 +; CHECK: [[BOOLADDR:%[0-9]+]]:_(p0) = COPY %x2 +; CHECK: [[LHS:%[0-9]+]]:_(s32) = G_LOAD [[LHSADDR]](p0) +; CHECK: [[RHS:%[0-9]+]]:_(s32) = G_LOAD [[RHSADDR]](p0) +; CHECK: [[TST:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[LHS]](s32), [[RHS]] ; CHECK: G_STORE [[TST]](s1), [[BOOLADDR]](p0) define void @float_comparison(float* %a.addr, float* %b.addr, i1* %bool.addr) { %a = load float, float* %a.addr @@ -1104,10 +1107,10 @@ define void @float_comparison(float* %a.addr, float* %b.addr, i1* %bool.addr) { } ; CHECK-LABEL: name: trivial_float_comparison -; CHECK: [[ENTRY_R1:%[0-9]+]](s1) = G_CONSTANT i1 false -; CHECK: [[ENTRY_R2:%[0-9]+]](s1) = G_CONSTANT i1 true -; CHECK: [[R1:%[0-9]+]](s1) = COPY [[ENTRY_R1]](s1) -; CHECK: [[R2:%[0-9]+]](s1) = COPY [[ENTRY_R2]](s1) +; CHECK: [[ENTRY_R1:%[0-9]+]]:_(s1) = G_CONSTANT i1 false +; CHECK: [[ENTRY_R2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true +; CHECK: [[R1:%[0-9]+]]:_(s1) = COPY [[ENTRY_R1]](s1) +; CHECK: [[R2:%[0-9]+]]:_(s1) = COPY [[ENTRY_R2]](s1) ; CHECK: G_ADD [[R1]], [[R2]] define i1 @trivial_float_comparison(double %a, double %b) { %r1 = fcmp false double %a, %b @@ -1120,7 +1123,7 @@ define i1 @trivial_float_comparison(double %a, double %b) { define i32* @test_global() { ; CHECK-LABEL: name: test_global -; CHECK: [[TMP:%[0-9]+]](p0) = G_GLOBAL_VALUE @var{{$}} +; CHECK: [[TMP:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var{{$}} ; CHECK: %x0 = COPY [[TMP]](p0) ret i32* @var @@ -1129,7 +1132,7 @@ define i32* @test_global() { @var1 = addrspace(42) global i32 0 define i32 addrspace(42)* @test_global_addrspace() { ; CHECK-LABEL: name: test_global -; CHECK: [[TMP:%[0-9]+]](p42) = G_GLOBAL_VALUE @var1{{$}} +; CHECK: [[TMP:%[0-9]+]]:_(p42) = G_GLOBAL_VALUE @var1{{$}} ; CHECK: %x0 = COPY [[TMP]](p42) ret i32 addrspace(42)* @var1 @@ -1138,7 +1141,7 @@ define i32 addrspace(42)* @test_global_addrspace() { define void()* @test_global_func() { ; CHECK-LABEL: name: test_global_func -; CHECK: [[TMP:%[0-9]+]](p0) = G_GLOBAL_VALUE @allocai64{{$}} +; CHECK: [[TMP:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @allocai64{{$}} ; CHECK: %x0 = COPY [[TMP]](p0) ret void()* @allocai64 @@ -1147,9 +1150,9 @@ define void()* @test_global_func() { declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile) define void @test_memcpy(i8* %dst, i8* %src, i64 %size) { ; CHECK-LABEL: name: test_memcpy -; CHECK: [[DST:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[SRC:%[0-9]+]](p0) = COPY %x1 -; CHECK: [[SIZE:%[0-9]+]](s64) = COPY %x2 +; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY %x1 +; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY %x2 ; CHECK: %x0 = COPY [[DST]] ; CHECK: %x1 = COPY [[SRC]] ; CHECK: %x2 = COPY [[SIZE]] @@ -1161,9 +1164,9 @@ define void @test_memcpy(i8* %dst, i8* %src, i64 %size) { declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile) define void @test_memmove(i8* %dst, i8* %src, i64 %size) { ; CHECK-LABEL: name: test_memmove -; CHECK: [[DST:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[SRC:%[0-9]+]](p0) = COPY %x1 -; CHECK: [[SIZE:%[0-9]+]](s64) = COPY %x2 +; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[SRC:%[0-9]+]]:_(p0) = COPY %x1 +; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY %x2 ; CHECK: %x0 = COPY [[DST]] ; CHECK: %x1 = COPY [[SRC]] ; CHECK: %x2 = COPY [[SIZE]] @@ -1175,11 +1178,13 @@ define void @test_memmove(i8* %dst, i8* %src, i64 %size) { declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32 %align, i1 %volatile) define void @test_memset(i8* %dst, i8 %val, i64 %size) { ; CHECK-LABEL: name: test_memset -; CHECK: [[DST:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[SRC:%[0-9]+]](s8) = COPY %w1 -; CHECK: [[SIZE:%[0-9]+]](s64) = COPY %x2 +; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[SRC_C:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[SRC:%[0-9]+]]:_(s8) = G_TRUNC [[SRC_C]] +; CHECK: [[SIZE:%[0-9]+]]:_(s64) = COPY %x2 ; CHECK: %x0 = COPY [[DST]] -; CHECK: %w1 = COPY [[SRC]] +; CHECK: [[SRC_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[SRC]] +; CHECK: %w1 = COPY [[SRC_TMP]] ; CHECK: %x2 = COPY [[SIZE]] ; CHECK: BL $memset, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %w1, implicit %x2 call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i32 1, i1 0) @@ -1190,12 +1195,12 @@ declare i64 @llvm.objectsize.i64(i8*, i1) declare i32 @llvm.objectsize.i32(i8*, i1) define void @test_objectsize(i8* %addr0, i8* %addr1) { ; CHECK-LABEL: name: test_objectsize -; CHECK: [[ADDR0:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[ADDR1:%[0-9]+]](p0) = COPY %x1 -; CHECK: {{%[0-9]+}}(s64) = G_CONSTANT i64 -1 -; CHECK: {{%[0-9]+}}(s64) = G_CONSTANT i64 0 -; CHECK: {{%[0-9]+}}(s32) = G_CONSTANT i32 -1 -; CHECK: {{%[0-9]+}}(s32) = G_CONSTANT i32 0 +; CHECK: [[ADDR0:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[ADDR1:%[0-9]+]]:_(p0) = COPY %x1 +; CHECK: {{%[0-9]+}}:_(s64) = G_CONSTANT i64 -1 +; CHECK: {{%[0-9]+}}:_(s64) = G_CONSTANT i64 0 +; CHECK: {{%[0-9]+}}:_(s32) = G_CONSTANT i32 -1 +; CHECK: {{%[0-9]+}}:_(s32) = G_CONSTANT i32 0 %size64.0 = call i64 @llvm.objectsize.i64(i8* %addr0, i1 0) %size64.intmin = call i64 @llvm.objectsize.i64(i8* %addr0, i1 1) %size32.0 = call i32 @llvm.objectsize.i32(i8* %addr0, i1 0) @@ -1205,8 +1210,8 @@ define void @test_objectsize(i8* %addr0, i8* %addr1) { define void @test_large_const(i128* %addr) { ; CHECK-LABEL: name: test_large_const -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]](s128) = G_CONSTANT i128 42 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[VAL:%[0-9]+]]:_(s128) = G_CONSTANT i128 42 ; CHECK: G_STORE [[VAL]](s128), [[ADDR]](p0) store i128 42, i128* %addr ret void @@ -1219,8 +1224,8 @@ define void @test_large_const(i128* %addr) { define i8* @test_const_placement() { ; CHECK-LABEL: name: test_const_placement ; CHECK: bb.{{[0-9]+}} (%ir-block.{{[0-9]+}}): -; CHECK: [[VAL_INT:%[0-9]+]](s32) = G_CONSTANT i32 42 -; CHECK: [[VAL:%[0-9]+]](p0) = G_INTTOPTR [[VAL_INT]](s32) +; CHECK: [[VAL_INT:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 +; CHECK: [[VAL:%[0-9]+]]:_(p0) = G_INTTOPTR [[VAL_INT]](s32) ; CHECK: {{bb.[0-9]+}}.next: br label %next @@ -1240,7 +1245,7 @@ define void @test_va_end(i8* %list) { define void @test_va_arg(i8* %list) { ; CHECK-LABEL: test_va_arg -; CHECK: [[LIST:%[0-9]+]](p0) = COPY %x0 +; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY %x0 ; CHECK: G_VAARG [[LIST]](p0), 8 ; CHECK: G_VAARG [[LIST]](p0), 1 ; CHECK: G_VAARG [[LIST]](p0), 16 @@ -1254,9 +1259,9 @@ define void @test_va_arg(i8* %list) { declare float @llvm.pow.f32(float, float) define float @test_pow_intrin(float %l, float %r) { ; CHECK-LABEL: name: test_pow_intrin -; CHECK: [[LHS:%[0-9]+]](s32) = COPY %s0 -; CHECK: [[RHS:%[0-9]+]](s32) = COPY %s1 -; CHECK: [[RES:%[0-9]+]](s32) = G_FPOW [[LHS]], [[RHS]] +; CHECK: [[LHS:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[RHS:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FPOW [[LHS]], [[RHS]] ; CHECK: %s0 = COPY [[RES]] %res = call float @llvm.pow.f32(float %l, float %r) ret float %res @@ -1265,10 +1270,10 @@ define float @test_pow_intrin(float %l, float %r) { declare float @llvm.fma.f32(float, float, float) define float @test_fma_intrin(float %a, float %b, float %c) { ; CHECK-LABEL: name: test_fma_intrin -; CHECK: [[A:%[0-9]+]](s32) = COPY %s0 -; CHECK: [[B:%[0-9]+]](s32) = COPY %s1 -; CHECK: [[C:%[0-9]+]](s32) = COPY %s2 -; CHECK: [[RES:%[0-9]+]](s32) = G_FMA [[A]], [[B]], [[C]] +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[B:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK: [[C:%[0-9]+]]:_(s32) = COPY %s2 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FMA [[A]], [[B]], [[C]] ; CHECK: %s0 = COPY [[RES]] %res = call float @llvm.fma.f32(float %a, float %b, float %c) ret float %res @@ -1277,8 +1282,8 @@ define float @test_fma_intrin(float %a, float %b, float %c) { declare float @llvm.exp.f32(float) define float @test_exp_intrin(float %a) { ; CHECK-LABEL: name: test_exp_intrin -; CHECK: [[A:%[0-9]+]](s32) = COPY %s0 -; CHECK: [[RES:%[0-9]+]](s32) = G_FEXP [[A]] +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FEXP [[A]] ; CHECK: %s0 = COPY [[RES]] %res = call float @llvm.exp.f32(float %a) ret float %res @@ -1287,8 +1292,8 @@ define float @test_exp_intrin(float %a) { declare float @llvm.exp2.f32(float) define float @test_exp2_intrin(float %a) { ; CHECK-LABEL: name: test_exp2_intrin -; CHECK: [[A:%[0-9]+]](s32) = COPY %s0 -; CHECK: [[RES:%[0-9]+]](s32) = G_FEXP2 [[A]] +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FEXP2 [[A]] ; CHECK: %s0 = COPY [[RES]] %res = call float @llvm.exp2.f32(float %a) ret float %res @@ -1297,8 +1302,8 @@ define float @test_exp2_intrin(float %a) { declare float @llvm.log.f32(float) define float @test_log_intrin(float %a) { ; CHECK-LABEL: name: test_log_intrin -; CHECK: [[A:%[0-9]+]](s32) = COPY %s0 -; CHECK: [[RES:%[0-9]+]](s32) = G_FLOG [[A]] +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FLOG [[A]] ; CHECK: %s0 = COPY [[RES]] %res = call float @llvm.log.f32(float %a) ret float %res @@ -1307,8 +1312,8 @@ define float @test_log_intrin(float %a) { declare float @llvm.log2.f32(float) define float @test_log2_intrin(float %a) { ; CHECK-LABEL: name: test_log2_intrin -; CHECK: [[A:%[0-9]+]](s32) = COPY %s0 -; CHECK: [[RES:%[0-9]+]](s32) = G_FLOG2 [[A]] +; CHECK: [[A:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FLOG2 [[A]] ; CHECK: %s0 = COPY [[RES]] %res = call float @llvm.log2.f32(float %a) ret float %res @@ -1326,12 +1331,12 @@ define void @test_lifetime_intrin() { define void @test_load_store_atomics(i8* %addr) { ; CHECK-LABEL: name: test_load_store_atomics -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[V0:%[0-9]+]](s8) = G_LOAD [[ADDR]](p0) :: (load unordered 1 from %ir.addr) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[V0:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load unordered 1 from %ir.addr) ; CHECK: G_STORE [[V0]](s8), [[ADDR]](p0) :: (store monotonic 1 into %ir.addr) -; CHECK: [[V1:%[0-9]+]](s8) = G_LOAD [[ADDR]](p0) :: (load acquire 1 from %ir.addr) +; CHECK: [[V1:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load acquire 1 from %ir.addr) ; CHECK: G_STORE [[V1]](s8), [[ADDR]](p0) :: (store release 1 into %ir.addr) -; CHECK: [[V2:%[0-9]+]](s8) = G_LOAD [[ADDR]](p0) :: (load syncscope("singlethread") seq_cst 1 from %ir.addr) +; CHECK: [[V2:%[0-9]+]]:_(s8) = G_LOAD [[ADDR]](p0) :: (load syncscope("singlethread") seq_cst 1 from %ir.addr) ; CHECK: G_STORE [[V2]](s8), [[ADDR]](p0) :: (store syncscope("singlethread") monotonic 1 into %ir.addr) %v0 = load atomic i8, i8* %addr unordered, align 1 store atomic i8 %v0, i8* %addr monotonic, align 1 @@ -1347,8 +1352,8 @@ define void @test_load_store_atomics(i8* %addr) { define float @test_fneg_f32(float %x) { ; CHECK-LABEL: name: test_fneg_f32 -; CHECK: [[ARG:%[0-9]+]](s32) = COPY %s0 -; CHECK: [[RES:%[0-9]+]](s32) = G_FNEG [[ARG]] +; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_FNEG [[ARG]] ; CHECK: %s0 = COPY [[RES]](s32) %neg = fsub float -0.000000e+00, %x ret float %neg @@ -1356,8 +1361,8 @@ define float @test_fneg_f32(float %x) { define double @test_fneg_f64(double %x) { ; CHECK-LABEL: name: test_fneg_f64 -; CHECK: [[ARG:%[0-9]+]](s64) = COPY %d0 -; CHECK: [[RES:%[0-9]+]](s64) = G_FNEG [[ARG]] +; CHECK: [[ARG:%[0-9]+]]:_(s64) = COPY %d0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = G_FNEG [[ARG]] ; CHECK: %d0 = COPY [[RES]](s64) %neg = fsub double -0.000000e+00, %x ret double %neg @@ -1374,10 +1379,10 @@ define void @test_trivial_inlineasm() { define <2 x i32> @test_insertelement(<2 x i32> %vec, i32 %elt, i32 %idx){ ; CHECK-LABEL: name: test_insertelement -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = COPY %d0 -; CHECK: [[ELT:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[IDX:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[RES:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[VEC]], [[ELT]](s32), [[IDX]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY %d0 +; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[RES:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[VEC]], [[ELT]](s32), [[IDX]](s32) ; CHECK: %d0 = COPY [[RES]](<2 x s32>) %res = insertelement <2 x i32> %vec, i32 %elt, i32 %idx ret <2 x i32> %res @@ -1385,9 +1390,9 @@ define <2 x i32> @test_insertelement(<2 x i32> %vec, i32 %elt, i32 %idx){ define i32 @test_extractelement(<2 x i32> %vec, i32 %idx) { ; CHECK-LABEL: name: test_extractelement -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = COPY %d0 -; CHECK: [[IDX:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[RES:%[0-9]+]](s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = COPY %d0 +; CHECK: [[IDX:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>), [[IDX]](s32) ; CHECK: %w0 = COPY [[RES]](s32) %res = extractelement <2 x i32> %vec, i32 %idx ret i32 %res @@ -1395,7 +1400,7 @@ define i32 @test_extractelement(<2 x i32> %vec, i32 %idx) { define i32 @test_singleelementvector(i32 %elt){ ; CHECK-LABEL: name: test_singleelementvector -; CHECK: [[ELT:%[0-9]+]](s32) = COPY %w0 +; CHECK: [[ELT:%[0-9]+]]:_(s32) = COPY %w0 ; CHECK-NOT: G_INSERT_VECTOR_ELT ; CHECK-NOT: G_EXTRACT_VECTOR_ELT ; CHECK: %w0 = COPY [[ELT]](s32) @@ -1406,24 +1411,24 @@ define i32 @test_singleelementvector(i32 %elt){ define <2 x i32> @test_constantaggzerovector_v2i32() { ; CHECK-LABEL: name: test_constantaggzerovector_v2i32 -; CHECK: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[ZERO]](s32), [[ZERO]](s32) +; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[ZERO]](s32), [[ZERO]](s32) ; CHECK: %d0 = COPY [[VEC]](<2 x s32>) ret <2 x i32> zeroinitializer } define <2 x float> @test_constantaggzerovector_v2f32() { ; CHECK-LABEL: name: test_constantaggzerovector_v2f32 -; CHECK: [[ZERO:%[0-9]+]](s32) = G_FCONSTANT float 0.000000e+00 -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[ZERO]](s32), [[ZERO]](s32) +; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[ZERO]](s32), [[ZERO]](s32) ; CHECK: %d0 = COPY [[VEC]](<2 x s32>) ret <2 x float> zeroinitializer } define i32 @test_constantaggzerovector_v3i32() { ; CHECK-LABEL: name: test_constantaggzerovector_v3i32 -; CHECK: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[VEC:%[0-9]+]](<3 x s32>) = G_MERGE_VALUES [[ZERO]](s32), [[ZERO]](s32), [[ZERO]](s32) +; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[VEC:%[0-9]+]]:_(<3 x s32>) = G_MERGE_VALUES [[ZERO]](s32), [[ZERO]](s32), [[ZERO]](s32) ; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<3 x s32>) %elt = extractelement <3 x i32> zeroinitializer, i32 1 ret i32 %elt @@ -1431,19 +1436,19 @@ define i32 @test_constantaggzerovector_v3i32() { define <2 x i32> @test_constantdatavector_v2i32() { ; CHECK-LABEL: name: test_constantdatavector_v2i32 -; CHECK: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) +; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) ; CHECK: %d0 = COPY [[VEC]](<2 x s32>) ret <2 x i32> } define i32 @test_constantdatavector_v3i32() { ; CHECK-LABEL: name: test_constantdatavector_v3i32 -; CHECK: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 -; CHECK: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3 -; CHECK: [[VEC:%[0-9]+]](<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32), [[C3]](s32) +; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 +; CHECK: [[VEC:%[0-9]+]]:_(<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32), [[C3]](s32) ; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<3 x s32>) %elt = extractelement <3 x i32> , i32 1 ret i32 %elt @@ -1451,28 +1456,28 @@ define i32 @test_constantdatavector_v3i32() { define <4 x i32> @test_constantdatavector_v4i32() { ; CHECK-LABEL: name: test_constantdatavector_v4i32 -; CHECK: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 -; CHECK: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3 -; CHECK: [[C4:%[0-9]+]](s32) = G_CONSTANT i32 4 -; CHECK: [[VEC:%[0-9]+]](<4 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) +; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 +; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 +; CHECK: [[VEC:%[0-9]+]]:_(<4 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32) ; CHECK: %q0 = COPY [[VEC]](<4 x s32>) ret <4 x i32> } define <2 x double> @test_constantdatavector_v2f64() { ; CHECK-LABEL: name: test_constantdatavector_v2f64 -; CHECK: [[FC1:%[0-9]+]](s64) = G_FCONSTANT double 1.000000e+00 -; CHECK: [[FC2:%[0-9]+]](s64) = G_FCONSTANT double 2.000000e+00 -; CHECK: [[VEC:%[0-9]+]](<2 x s64>) = G_MERGE_VALUES [[FC1]](s64), [[FC2]](s64) +; CHECK: [[FC1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 +; CHECK: [[FC2:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s64>) = G_MERGE_VALUES [[FC1]](s64), [[FC2]](s64) ; CHECK: %q0 = COPY [[VEC]](<2 x s64>) ret <2 x double> } define i32 @test_constantaggzerovector_v1s32(i32 %arg){ ; CHECK-LABEL: name: test_constantaggzerovector_v1s32 -; CHECK: [[ARG:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 +; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NOT: G_MERGE_VALUES ; CHECK: G_ADD [[ARG]], [[C0]] %vec = insertelement <1 x i32> undef, i32 %arg, i32 0 @@ -1483,8 +1488,8 @@ define i32 @test_constantaggzerovector_v1s32(i32 %arg){ define i32 @test_constantdatavector_v1s32(i32 %arg){ ; CHECK-LABEL: name: test_constantdatavector_v1s32 -; CHECK: [[ARG:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 +; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NOT: G_MERGE_VALUES ; CHECK: G_ADD [[ARG]], [[C1]] %vec = insertelement <1 x i32> undef, i32 %arg, i32 0 @@ -1496,7 +1501,7 @@ define i32 @test_constantdatavector_v1s32(i32 %arg){ declare ghccc float @different_call_conv_target(float %x) define float @test_different_call_conv_target(float %x) { ; CHECK-LABEL: name: test_different_call_conv -; CHECK: [[X:%[0-9]+]](s32) = COPY %s0 +; CHECK: [[X:%[0-9]+]]:_(s32) = COPY %s0 ; CHECK: %s8 = COPY [[X]] ; CHECK: BL @different_call_conv_target, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s8, implicit-def %s0 %res = call ghccc float @different_call_conv_target(float %x) @@ -1505,11 +1510,11 @@ define float @test_different_call_conv_target(float %x) { define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) { ; CHECK-LABEL: name: test_shufflevector_s32_v2s32 -; CHECK: [[ARG:%[0-9]+]](s32) = COPY %w0 -; CHECK-DAG: [[UNDEF:%[0-9]+]](s32) = G_IMPLICIT_DEF -; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32) -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], [[MASK]](<2 x s32>) +; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF +; CHECK-DAG: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[MASK:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], [[MASK]](<2 x s32>) ; CHECK: %d0 = COPY [[VEC]](<2 x s32>) %vec = insertelement <1 x i32> undef, i32 %arg, i32 0 %res = shufflevector <1 x i32> %vec, <1 x i32> undef, <2 x i32> zeroinitializer @@ -1518,10 +1523,10 @@ define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) { define i32 @test_shufflevector_v2s32_s32(<2 x i32> %arg) { ; CHECK-LABEL: name: test_shufflevector_v2s32_s32 -; CHECK: [[ARG:%[0-9]+]](<2 x s32>) = COPY %d0 -; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF -; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK: [[RES:%[0-9]+]](s32) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], [[C1]](s32) +; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY %d0 +; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF +; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], [[C1]](s32) ; CHECK: %w0 = COPY [[RES]](s32) %vec = shufflevector <2 x i32> %arg, <2 x i32> undef, <1 x i32> %res = extractelement <1 x i32> %vec, i32 0 @@ -1530,12 +1535,12 @@ define i32 @test_shufflevector_v2s32_s32(<2 x i32> %arg) { define <2 x i32> @test_shufflevector_v2s32_v2s32(<2 x i32> %arg) { ; CHECK-LABEL: name: test_shufflevector_v2s32_v2s32 -; CHECK: [[ARG:%[0-9]+]](<2 x s32>) = COPY %d0 -; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF -; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32) -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], [[MASK]](<2 x s32>) +; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY %d0 +; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF +; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK-DAG: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[MASK:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], [[MASK]](<2 x s32>) ; CHECK: %d0 = COPY [[VEC]](<2 x s32>) %res = shufflevector <2 x i32> %arg, <2 x i32> undef, <2 x i32> ret <2 x i32> %res @@ -1543,12 +1548,12 @@ define <2 x i32> @test_shufflevector_v2s32_v2s32(<2 x i32> %arg) { define i32 @test_shufflevector_v2s32_v3s32(<2 x i32> %arg) { ; CHECK-LABEL: name: test_shufflevector_v2s32_v3s32 -; CHECK: [[ARG:%[0-9]+]](<2 x s32>) = COPY %d0 -; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF -; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK-DAG: [[MASK:%[0-9]+]](<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32), [[C1]](s32) -; CHECK: [[VEC:%[0-9]+]](<3 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], [[MASK]](<3 x s32>) +; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY %d0 +; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF +; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK-DAG: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[MASK:%[0-9]+]]:_(<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32), [[C1]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<3 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], [[MASK]](<3 x s32>) ; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<3 x s32>) %vec = shufflevector <2 x i32> %arg, <2 x i32> undef, <3 x i32> %res = extractelement <3 x i32> %vec, i32 0 @@ -1557,14 +1562,14 @@ define i32 @test_shufflevector_v2s32_v3s32(<2 x i32> %arg) { define <4 x i32> @test_shufflevector_v2s32_v4s32(<2 x i32> %arg1, <2 x i32> %arg2) { ; CHECK-LABEL: name: test_shufflevector_v2s32_v4s32 -; CHECK: [[ARG1:%[0-9]+]](<2 x s32>) = COPY %d0 -; CHECK: [[ARG2:%[0-9]+]](<2 x s32>) = COPY %d1 -; CHECK: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 -; CHECK: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3 -; CHECK: [[MASK:%[0-9]+]](<4 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32) -; CHECK: [[VEC:%[0-9]+]](<4 x s32>) = G_SHUFFLE_VECTOR [[ARG1]](<2 x s32>), [[ARG2]], [[MASK]](<4 x s32>) +; CHECK: [[ARG1:%[0-9]+]]:_(<2 x s32>) = COPY %d0 +; CHECK: [[ARG2:%[0-9]+]]:_(<2 x s32>) = COPY %d1 +; CHECK: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 +; CHECK: [[MASK:%[0-9]+]]:_(<4 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[ARG1]](<2 x s32>), [[ARG2]], [[MASK]](<4 x s32>) ; CHECK: %q0 = COPY [[VEC]](<4 x s32>) %res = shufflevector <2 x i32> %arg1, <2 x i32> %arg2, <4 x i32> ret <4 x i32> %res @@ -1572,12 +1577,12 @@ define <4 x i32> @test_shufflevector_v2s32_v4s32(<2 x i32> %arg1, <2 x i32> %arg define <2 x i32> @test_shufflevector_v4s32_v2s32(<4 x i32> %arg) { ; CHECK-LABEL: name: test_shufflevector_v4s32_v2s32 -; CHECK: [[ARG:%[0-9]+]](<4 x s32>) = COPY %q0 -; CHECK-DAG: [[UNDEF:%[0-9]+]](<4 x s32>) = G_IMPLICIT_DEF -; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK-DAG: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3 -; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C3]](s32) -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<4 x s32>), [[UNDEF]], [[MASK]](<2 x s32>) +; CHECK: [[ARG:%[0-9]+]]:_(<4 x s32>) = COPY %q0 +; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF +; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK-DAG: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 +; CHECK-DAG: [[MASK:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C3]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](<4 x s32>), [[UNDEF]], [[MASK]](<2 x s32>) ; CHECK: %d0 = COPY [[VEC]](<2 x s32>) %res = shufflevector <4 x i32> %arg, <4 x i32> undef, <2 x i32> ret <2 x i32> %res @@ -1586,35 +1591,35 @@ define <2 x i32> @test_shufflevector_v4s32_v2s32(<4 x i32> %arg) { define <16 x i8> @test_shufflevector_v8s8_v16s8(<8 x i8> %arg1, <8 x i8> %arg2) { ; CHECK-LABEL: name: test_shufflevector_v8s8_v16s8 -; CHECK: [[ARG1:%[0-9]+]](<8 x s8>) = COPY %d0 -; CHECK: [[ARG2:%[0-9]+]](<8 x s8>) = COPY %d1 -; CHECK: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[C8:%[0-9]+]](s32) = G_CONSTANT i32 8 -; CHECK: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK: [[C9:%[0-9]+]](s32) = G_CONSTANT i32 9 -; CHECK: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 -; CHECK: [[C10:%[0-9]+]](s32) = G_CONSTANT i32 10 -; CHECK: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3 -; CHECK: [[C11:%[0-9]+]](s32) = G_CONSTANT i32 11 -; CHECK: [[C4:%[0-9]+]](s32) = G_CONSTANT i32 4 -; CHECK: [[C12:%[0-9]+]](s32) = G_CONSTANT i32 12 -; CHECK: [[C5:%[0-9]+]](s32) = G_CONSTANT i32 5 -; CHECK: [[C13:%[0-9]+]](s32) = G_CONSTANT i32 13 -; CHECK: [[C6:%[0-9]+]](s32) = G_CONSTANT i32 6 -; CHECK: [[C14:%[0-9]+]](s32) = G_CONSTANT i32 14 -; CHECK: [[C7:%[0-9]+]](s32) = G_CONSTANT i32 7 -; CHECK: [[C15:%[0-9]+]](s32) = G_CONSTANT i32 15 -; CHECK: [[MASK:%[0-9]+]](<16 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C8]](s32), [[C1]](s32), [[C9]](s32), [[C2]](s32), [[C10]](s32), [[C3]](s32), [[C11]](s32), [[C4]](s32), [[C12]](s32), [[C5]](s32), [[C13]](s32), [[C6]](s32), [[C14]](s32), [[C7]](s32), [[C15]](s32) -; CHECK: [[VEC:%[0-9]+]](<16 x s8>) = G_SHUFFLE_VECTOR [[ARG1]](<8 x s8>), [[ARG2]], [[MASK]](<16 x s32>) +; CHECK: [[ARG1:%[0-9]+]]:_(<8 x s8>) = COPY %d0 +; CHECK: [[ARG2:%[0-9]+]]:_(<8 x s8>) = COPY %d1 +; CHECK: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 +; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 +; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 +; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 +; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 +; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 +; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 +; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 +; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 +; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 +; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 +; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 +; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 +; CHECK: [[MASK:%[0-9]+]]:_(<16 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C8]](s32), [[C1]](s32), [[C9]](s32), [[C2]](s32), [[C10]](s32), [[C3]](s32), [[C11]](s32), [[C4]](s32), [[C12]](s32), [[C5]](s32), [[C13]](s32), [[C6]](s32), [[C14]](s32), [[C7]](s32), [[C15]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[ARG1]](<8 x s8>), [[ARG2]], [[MASK]](<16 x s32>) ; CHECK: %q0 = COPY [[VEC]](<16 x s8>) %res = shufflevector <8 x i8> %arg1, <8 x i8> %arg2, <16 x i32> ret <16 x i8> %res } ; CHECK-LABEL: test_constant_vector -; CHECK: [[UNDEF:%[0-9]+]](s16) = G_IMPLICIT_DEF -; CHECK: [[F:%[0-9]+]](s16) = G_FCONSTANT half 0xH3C00 -; CHECK: [[M:%[0-9]+]](<4 x s16>) = G_MERGE_VALUES [[UNDEF]](s16), [[UNDEF]](s16), [[UNDEF]](s16), [[F]](s16) +; CHECK: [[UNDEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF +; CHECK: [[F:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 +; CHECK: [[M:%[0-9]+]]:_(<4 x s16>) = G_MERGE_VALUES [[UNDEF]](s16), [[UNDEF]](s16), [[UNDEF]](s16), [[F]](s16) ; CHECK: %d0 = COPY [[M]](<4 x s16>) define <4 x half> @test_constant_vector() { ret <4 x half> @@ -1622,8 +1627,8 @@ define <4 x half> @test_constant_vector() { define i32 @test_target_mem_intrinsic(i32* %addr) { ; CHECK-LABEL: name: test_target_mem_intrinsic -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]](s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load 4 from %ir.addr) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[VAL:%[0-9]+]]:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.ldxr), [[ADDR]](p0) :: (volatile load 4 from %ir.addr) ; CHECK: G_TRUNC [[VAL]](s64) %val = call i64 @llvm.aarch64.ldxr.p0i32(i32* %addr) %trunc = trunc i64 %val to i32 diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir index 296f65c041a17..4042047dfc243 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir +++ b/test/CodeGen/AArch64/GlobalISel/arm64-regbankselect.mir @@ -66,6 +66,9 @@ define void @bitcast_s64_fpr() { ret void } define void @bitcast_s64_gpr_fpr() { ret void } define void @bitcast_s64_fpr_gpr() { ret void } + define void @bitcast_s128() { ret void } + define void @copy_s128() { ret void } + define void @copy_s128_from_load() { ret void } define i64 @greedyWithChainOfComputation(i64 %arg1, <2 x i32>* %addr) { %varg1 = bitcast i64 %arg1 to <2 x i32> @@ -96,17 +99,14 @@ # Based on the type i32, this should be gpr. name: defaultMapping legalized: true -# CHECK-LABEL: name: defaultMapping -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0.entry: liveins: %x0 - ; CHECK: %1(s32) = G_ADD %0 + ; CHECK-LABEL: name: defaultMapping + ; CHECK: %1:gpr(s32) = G_ADD %0 %0(s32) = COPY %w0 %1(s32) = G_ADD %0, %0 ... @@ -117,18 +117,15 @@ body: | # FPR is used for both floating point and vector registers. name: defaultMappingVector legalized: true -# CHECK-LABEL: name: defaultMappingVector -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0.entry: liveins: %d0 - ; CHECK: %0(<2 x s32>) = COPY %d0 - ; CHECK: %1(<2 x s32>) = G_ADD %0 + ; CHECK-LABEL: name: defaultMappingVector + ; CHECK: %0:fpr(<2 x s32>) = COPY %d0 + ; CHECK: %1:fpr(<2 x s32>) = G_ADD %0 %0(<2 x s32>) = COPY %d0 %1(<2 x s32>) = G_ADD %0, %0 ... @@ -139,12 +136,6 @@ body: | # in FPR, but at the use, it should be GPR. name: defaultMapping1Repair legalized: true -# CHECK-LABEL: name: defaultMapping1Repair -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -152,10 +143,11 @@ registers: body: | bb.0.entry: liveins: %s0, %x0 - ; CHECK: %0(s32) = COPY %s0 - ; CHECK-NEXT: %1(s32) = COPY %w0 - ; CHECK-NEXT: %3(s32) = COPY %0 - ; CHECK-NEXT: %2(s32) = G_ADD %3, %1 + ; CHECK-LABEL: name: defaultMapping1Repair + ; CHECK: %0:fpr(s32) = COPY %s0 + ; CHECK-NEXT: %1:gpr(s32) = COPY %w0 + ; CHECK-NEXT: %3:gpr(s32) = COPY %0 + ; CHECK-NEXT: %2:gpr(s32) = G_ADD %3, %1 %0(s32) = COPY %s0 %1(s32) = COPY %w0 %2(s32) = G_ADD %0, %1 @@ -164,22 +156,17 @@ body: | # Check that we repair the assignment for %0 differently for both uses. name: defaultMapping2Repairs legalized: true -# CHECK-LABEL: name: defaultMapping2Repairs -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0.entry: liveins: %s0, %x0 - ; CHECK: %0(s32) = COPY %s0 - ; CHECK-NEXT: %2(s32) = COPY %0 - ; CHECK-NEXT: %3(s32) = COPY %0 - ; CHECK-NEXT: %1(s32) = G_ADD %2, %3 + ; CHECK-LABEL: name: defaultMapping2Repairs + ; CHECK: %0:fpr(s32) = COPY %s0 + ; CHECK-NEXT: %2:gpr(s32) = COPY %0 + ; CHECK-NEXT: %3:gpr(s32) = COPY %0 + ; CHECK-NEXT: %1:gpr(s32) = G_ADD %2, %3 %0(s32) = COPY %s0 %1(s32) = G_ADD %0, %0 ... @@ -191,20 +178,16 @@ body: | # fixes that. name: defaultMappingDefRepair legalized: true -# CHECK-LABEL: name: defaultMappingDefRepair -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: fpr } body: | bb.0.entry: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK-NEXT: %2(s32) = G_ADD %0, %0 - ; CHECK-NEXT: %1(s32) = COPY %2 + ; CHECK-LABEL: name: defaultMappingDefRepair + ; CHECK: %0:gpr(s32) = COPY %w0 + ; CHECK-NEXT: %2:gpr(s32) = G_ADD %0, %0 + ; CHECK-NEXT: %1:fpr(s32) = COPY %2 %0(s32) = COPY %w0 %1(s32) = G_ADD %0, %0 ... @@ -252,12 +235,6 @@ body: | # Make sure we can repair physical register uses as well. name: defaultMappingUseRepairPhysReg legalized: true -# CHECK-LABEL: name: defaultMappingUseRepairPhysReg -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -265,10 +242,11 @@ registers: body: | bb.0.entry: liveins: %w0, %s0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK-NEXT: %1(s32) = COPY %s0 - ; CHECK-NEXT: %3(s32) = COPY %1 - ; CHECK-NEXT: %2(s32) = G_ADD %0, %3 + ; CHECK-LABEL: name: defaultMappingUseRepairPhysReg + ; CHECK: %0:gpr(s32) = COPY %w0 + ; CHECK-NEXT: %1:fpr(s32) = COPY %s0 + ; CHECK-NEXT: %3:gpr(s32) = COPY %1 + ; CHECK-NEXT: %2:gpr(s32) = G_ADD %0, %3 %0(s32) = COPY %w0 %1(s32) = COPY %s0 %2(s32) = G_ADD %0, %1 @@ -278,18 +256,15 @@ body: | # Make sure we can repair physical register defs. name: defaultMappingDefRepairPhysReg legalized: true -# CHECK-LABEL: name: defaultMappingDefRepairPhysReg -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0.entry: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK-NEXT: %1(s32) = G_ADD %0, %0 + ; CHECK-LABEL: name: defaultMappingDefRepairPhysReg + ; CHECK: %0:gpr(s32) = COPY %w0 + ; CHECK-NEXT: %1:gpr(s32) = G_ADD %0, %0 ; CHECK-NEXT: %s0 = COPY %1 %0(s32) = COPY %w0 %1(s32) = G_ADD %0, %0 @@ -301,21 +276,6 @@ body: | # G_OR instruction from fpr to gpr. name: greedyMappingOr legalized: true -# CHECK-LABEL: name: greedyMappingOr -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } - -# Fast mode maps vector instruction on FPR. -# FAST-NEXT: - { id: 2, class: fpr, preferred-register: '' } -# Fast mode needs two extra copies. -# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' } -# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' } - -# Greedy mode coalesce the computation on the GPR register -# because it is the cheapest. -# GREEDY-NEXT: - { id: 2, class: gpr, preferred-register: '' } - registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -323,20 +283,19 @@ registers: body: | bb.0.entry: liveins: %x0, %x1 - ; CHECK: %0(<2 x s32>) = COPY %x0 - ; CHECK-NEXT: %1(<2 x s32>) = COPY %x1 - + ; CHECK: %0:gpr(<2 x s32>) = COPY %x0 + ; CHECK-NEXT: %1:gpr(<2 x s32>) = COPY %x1 ; Fast mode tries to reuse the source of the copy for the destination. ; Now, the default mapping says that %0 and %1 need to be in FPR. ; The repairing code insert two copies to materialize that. - ; FAST-NEXT: %3(<2 x s32>) = COPY %0 - ; FAST-NEXT: %4(<2 x s32>) = COPY %1 + ; FAST-NEXT: %3:fpr(<2 x s32>) = COPY %0 + ; FAST-NEXT: %4:fpr(<2 x s32>) = COPY %1 ; The mapping of G_OR is on FPR. - ; FAST-NEXT: %2(<2 x s32>) = G_OR %3, %4 + ; FAST-NEXT: %2:fpr(<2 x s32>) = G_OR %3, %4 ; Greedy mode remapped the instruction on the GPR bank. - ; GREEDY-NEXT: %2(<2 x s32>) = G_OR %0, %1 + ; GREEDY-NEXT: %2:gpr(<2 x s32>) = G_OR %0, %1 %0(<2 x s32>) = COPY %x0 %1(<2 x s32>) = COPY %x1 %2(<2 x s32>) = G_OR %0, %1 @@ -348,21 +307,6 @@ body: | # %2 constraint. name: greedyMappingOrWithConstraints legalized: true -# CHECK-LABEL: name: greedyMappingOrWithConstraints -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } - -# Fast mode maps vector instruction on FPR. -# Fast mode needs two extra copies. -# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' } -# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' } - -# Greedy mode coalesce the computation on the GPR register because it -# is the cheapest, but will need one extra copy to materialize %2 into a FPR. -# GREEDY-NEXT: - { id: 3, class: gpr, preferred-register: '' } - registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -370,22 +314,23 @@ registers: body: | bb.0.entry: liveins: %x0, %x1 - ; CHECK: %0(<2 x s32>) = COPY %x0 - ; CHECK-NEXT: %1(<2 x s32>) = COPY %x1 + ; CHECK-LABEL: name: greedyMappingOrWithConstraints + ; CHECK: %0:gpr(<2 x s32>) = COPY %x0 + ; CHECK-NEXT: %1:gpr(<2 x s32>) = COPY %x1 ; Fast mode tries to reuse the source of the copy for the destination. ; Now, the default mapping says that %0 and %1 need to be in FPR. ; The repairing code insert two copies to materialize that. - ; FAST-NEXT: %3(<2 x s32>) = COPY %0 - ; FAST-NEXT: %4(<2 x s32>) = COPY %1 + ; FAST-NEXT: %3:fpr(<2 x s32>) = COPY %0 + ; FAST-NEXT: %4:fpr(<2 x s32>) = COPY %1 ; The mapping of G_OR is on FPR. - ; FAST-NEXT: %2(<2 x s32>) = G_OR %3, %4 + ; FAST-NEXT: %2:fpr(<2 x s32>) = G_OR %3, %4 ; Greedy mode remapped the instruction on the GPR bank. - ; GREEDY-NEXT: %3(<2 x s32>) = G_OR %0, %1 + ; GREEDY-NEXT: %3:gpr(<2 x s32>) = G_OR %0, %1 ; We need to keep %2 into FPR because we do not know anything about it. - ; GREEDY-NEXT: %2(<2 x s32>) = COPY %3 + ; GREEDY-NEXT: %2:fpr(<2 x s32>) = COPY %3 %0(<2 x s32>) = COPY %x0 %1(<2 x s32>) = COPY %x1 %2(<2 x s32>) = G_OR %0, %1 @@ -405,8 +350,8 @@ body: | bb.0: liveins: %x0 - ; CHECK: %0 = COPY %x0 - ; CHECK-NEXT: %1 = ADDXrr %0, %0 + ; CHECK: %0:gpr64 = COPY %x0 + ; CHECK-NEXT: %1:gpr64 = ADDXrr %0, %0 ; CHECK-NEXT: %x0 = COPY %1 ; CHECK-NEXT: RET_ReallyLR implicit %x0 @@ -441,8 +386,8 @@ registers: - { id: 1, class: _ } # CHECK: body: -# CHECK: %0(s32) = COPY %w0 -# CHECK: %1(s32) = G_BITCAST %0 +# CHECK: %0:gpr(s32) = COPY %w0 +# CHECK: %1:gpr(s32) = G_BITCAST %0 body: | bb.0: liveins: %w0 @@ -464,8 +409,8 @@ registers: - { id: 1, class: _ } # CHECK: body: -# CHECK: %0(<2 x s16>) = COPY %s0 -# CHECK: %1(<2 x s16>) = G_BITCAST %0 +# CHECK: %0:fpr(<2 x s16>) = COPY %s0 +# CHECK: %1:fpr(<2 x s16>) = G_BITCAST %0 body: | bb.0: liveins: %s0 @@ -488,8 +433,9 @@ registers: - { id: 1, class: _ } # CHECK: body: -# CHECK: %0(s32) = COPY %w0 -# CHECK: %1(<2 x s16>) = G_BITCAST %0 +# CHECK: %0:gpr(s32) = COPY %w0 +# FAST: %1:fpr(<2 x s16>) = G_BITCAST %0 +# GREEDY: %1:gpr(<2 x s16>) = G_BITCAST %0 body: | bb.0: liveins: %w0 @@ -502,18 +448,13 @@ body: | # CHECK-LABEL: name: bitcast_s32_fpr_gpr name: bitcast_s32_fpr_gpr legalized: true - -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } -# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# GREEDY-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - # CHECK: body: -# CHECK: %0(<2 x s16>) = COPY %s0 -# CHECK: %1(s32) = G_BITCAST %0 +# CHECK: %0:fpr(<2 x s16>) = COPY %s0 +# FAST: %1:gpr(s32) = G_BITCAST %0 +# GREEDY: %1:fpr(s32) = G_BITCAST %0 body: | bb.0: liveins: %s0 @@ -526,17 +467,12 @@ body: | # CHECK-LABEL: name: bitcast_s64_gpr name: bitcast_s64_gpr legalized: true - -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - # CHECK: body: -# CHECK: %0(s64) = COPY %x0 -# CHECK: %1(s64) = G_BITCAST %0 +# CHECK: %0:gpr(s64) = COPY %x0 +# CHECK: %1:gpr(s64) = G_BITCAST %0 body: | bb.0: liveins: %x0 @@ -549,17 +485,12 @@ body: | # CHECK-LABEL: name: bitcast_s64_fpr name: bitcast_s64_fpr legalized: true - -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - # CHECK: body: -# CHECK: %0(<2 x s32>) = COPY %d0 -# CHECK: %1(<2 x s32>) = G_BITCAST %0 +# CHECK: %0:fpr(<2 x s32>) = COPY %d0 +# CHECK: %1:fpr(<2 x s32>) = G_BITCAST %0 body: | bb.0: liveins: %d0 @@ -572,17 +503,13 @@ body: | # CHECK-LABEL: name: bitcast_s64_gpr_fpr name: bitcast_s64_gpr_fpr legalized: true - -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } -# FAST-NEXT: - { id: 1, class: fpr, preferred-register: '' } -# GREEDY-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } # CHECK: body: -# CHECK: %0(s64) = COPY %x0 -# CHECK: %1(<2 x s32>) = G_BITCAST %0 +# CHECK: %0:gpr(s64) = COPY %x0 +# FAST: %1:fpr(<2 x s32>) = G_BITCAST %0 +# GREEDY: %1:gpr(<2 x s32>) = G_BITCAST %0 body: | bb.0: liveins: %x0 @@ -595,18 +522,13 @@ body: | # CHECK-LABEL: name: bitcast_s64_fpr_gpr name: bitcast_s64_fpr_gpr legalized: true - -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr, preferred-register: '' } -# FAST-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# GREEDY-NEXT: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - # CHECK: body: -# CHECK: %0(<2 x s32>) = COPY %d0 -# CHECK: %1(s64) = G_BITCAST %0 +# CHECK: %0:fpr(<2 x s32>) = COPY %d0 +# FAST: %1:gpr(s64) = G_BITCAST %0 +# GREEDY: %1:fpr(s64) = G_BITCAST %0 body: | bb.0: liveins: %d0 @@ -615,6 +537,90 @@ body: | %1(s64) = G_BITCAST %0 ... +--- +# CHECK-LABEL: name: bitcast_s128 +name: bitcast_s128 +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: _} + - { id: 1, class: _} + - { id: 2, class: _} + - { id: 3, class: _} +# CHECK: %3:fpr(s128) = G_MERGE_VALUES +# CHECK: %2:fpr(<2 x s64>) = G_BITCAST %3(s128) +body: | + bb.1: + liveins: %x0, %x1 + %0(s64) = COPY %x0 + %1(s64) = COPY %x1 + %3(s128) = G_MERGE_VALUES %0(s64), %1(s64) + %2(<2 x s64>) = G_BITCAST %3(s128) + %q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit %q0 + +... + +--- +# CHECK-LABEL: name: copy_s128 +# This test checks that we issue the proper mapping +# for copy of size > 64. +# The mapping should be the same as G_BITCAST. +name: copy_s128 +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: _} + - { id: 1, class: _} + - { id: 2, class: _} + - { id: 3, class: _} + - { id: 4, class: _} +# CHECK: %3:fpr(s128) = G_MERGE_VALUES +# CHECK: %4:fpr(s128) = COPY %3(s128) +# CHECK-NEXT: %2:fpr(<2 x s64>) = G_BITCAST %4(s128) +body: | + bb.1: + liveins: %x0, %x1 + %0(s64) = COPY %x0 + %1(s64) = COPY %x1 + %3(s128) = G_MERGE_VALUES %0(s64), %1(s64) + %4(s128) = COPY %3(s128) + %2(<2 x s64>) = G_BITCAST %4(s128) + %q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit %q0 + +... + +--- +# CHECK-LABEL: name: copy_s128_from_load +# This test checks that we issue the proper mapping +# for copy of size > 64 when the input is neither +# a physcal register nor a generic register. +# This used to crash when we moved to the statically +# computed mapping, because we were assuming non-physregs +# were generic registers and thus have a type, whereas +# it is not necessarily the case. +name: copy_s128_from_load +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: fpr128} + - { id: 1, class: _} +# CHECK: registers: +# CHECK: - { id: 0, class: fpr128, preferred-register: '' } +# CHECK: - { id: 1, class: fpr, preferred-register: '' } +# CHECK: %1:fpr(s128) = COPY %0 +body: | + bb.1: + liveins: %x0 + %0 = LDRQui killed %x0, 0 + %1(s128) = COPY %0 + %q0 = COPY %1(s128) + RET_ReallyLR implicit %q0 + +... + + --- # Make sure the greedy mode is able to take advantage of the # alternative mappings of G_LOAD to coalesce the whole chain @@ -622,17 +628,6 @@ body: | # CHECK-LABEL: name: greedyWithChainOfComputation name: greedyWithChainOfComputation legalized: true - -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# FAST-NEXT: - { id: 2, class: fpr, preferred-register: '' } -# FAST-NEXT: - { id: 3, class: fpr, preferred-register: '' } -# FAST-NEXT: - { id: 4, class: fpr, preferred-register: '' } -# GREEDY-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# GREEDY-NEXT: - { id: 3, class: gpr, preferred-register: '' } -# GREEDY-NEXT: - { id: 4, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -640,17 +635,18 @@ registers: - { id: 3, class: _ } - { id: 4, class: _ } - { id: 5, class: _ } - # No repairing should be necessary for both modes. -# CHECK: %0(s64) = COPY %x0 -# CHECK-NEXT: %1(p0) = COPY %x1 -# CHECK-NEXT: %2(<2 x s32>) = G_BITCAST %0(s64) -# CHECK-NEXT: %3(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr) -# CHECK-NEXT: %4(<2 x s32>) = G_OR %2, %3 -# CHECK-NEXT: %5(s64) = G_BITCAST %4(<2 x s32>) +# CHECK: %0:gpr(s64) = COPY %x0 +# CHECK-NEXT: %1:gpr(p0) = COPY %x1 +# FAST-NEXT: %2:fpr(<2 x s32>) = G_BITCAST %0(s64) +# FAST-NEXT: %3:fpr(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr) +# FAST-NEXT: %4:fpr(<2 x s32>) = G_OR %2, %3 +# GREEDY-NEXT: %2:gpr(<2 x s32>) = G_BITCAST %0(s64) +# GREEDY-NEXT: %3:gpr(<2 x s32>) = G_LOAD %1(p0) :: (load 8 from %ir.addr) +# GREEDY-NEXT: %4:gpr(<2 x s32>) = G_OR %2, %3 +# CHECK-NEXT: %5:gpr(s64) = G_BITCAST %4(<2 x s32>) # CHECK-NEXT: %x0 = COPY %5(s64) # CHECK-NEXT: RET_ReallyLR implicit %x0 - body: | bb.0: liveins: %x0, %x1 @@ -686,12 +682,12 @@ registers: - { id: 3, class: _ } # No repairing should be necessary for both modes. -# CHECK: %0(s64) = COPY %x0 -# CHECK-NEXT: %1(p0) = COPY %x1 -# CHECK-NEXT: %2(s64) = G_LOAD %1(p0) :: (load 8 from %ir.addr) +# CHECK: %0:gpr(s64) = COPY %x0 +# CHECK-NEXT: %1:gpr(p0) = COPY %x1 +# CHECK-NEXT: %2:fpr(s64) = G_LOAD %1(p0) :: (load 8 from %ir.addr) # %0 has been mapped to GPR, we need to repair to match FPR. -# CHECK-NEXT: %4(s64) = COPY %0 -# CHECK-NEXT: %3(s64) = G_FADD %4, %2 +# CHECK-NEXT: %4:fpr(s64) = COPY %0 +# CHECK-NEXT: %3:fpr(s64) = G_FADD %4, %2 # CHECK-NEXT: %x0 = COPY %3(s64) # CHECK-NEXT: RET_ReallyLR implicit %x0 @@ -726,12 +722,12 @@ registers: - { id: 1, class: _ } - { id: 2, class: _ } -# CHECK: %0(s64) = COPY %x0 -# CHECK-NEXT: %1(p0) = COPY %x1 +# CHECK: %0:gpr(s64) = COPY %x0 +# CHECK-NEXT: %1:gpr(p0) = COPY %x1 # %0 has been mapped to GPR, we need to repair to match FPR. -# CHECK-NEXT: %3(s64) = COPY %0 -# CHECK-NEXT: %4(s64) = COPY %0 -# CHECK-NEXT: %2(s64) = G_FADD %3, %4 +# CHECK-NEXT: %3:fpr(s64) = COPY %0 +# CHECK-NEXT: %4:fpr(s64) = COPY %0 +# CHECK-NEXT: %2:fpr(s64) = G_FADD %3, %4 # CHECK-NEXT: G_STORE %2(s64), %1(p0) :: (store 8 into %ir.addr) # CHECK-NEXT: RET_ReallyLR diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll index cdcdb9644627d..4b6fab704da10 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator-ios.ll @@ -5,12 +5,12 @@ ; CHECK: fixedStack: ; CHECK-DAG: - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1, ; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 1, size: 1, -; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] -; CHECK: [[LHS:%[0-9]+]](s8) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0) -; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] -; CHECK: [[RHS:%[0-9]+]](s8) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0) -; CHECK: [[SUM:%[0-9]+]](s8) = G_ADD [[LHS]], [[RHS]] -; CHECK: [[SUM32:%[0-9]+]](s32) = G_SEXT [[SUM]](s8) +; CHECK: [[LHS_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] +; CHECK: [[LHS:%[0-9]+]]:_(s8) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 0) +; CHECK: [[RHS_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] +; CHECK: [[RHS:%[0-9]+]]:_(s8) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]], align 0) +; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[LHS]], [[RHS]] +; CHECK: [[SUM32:%[0-9]+]]:_(s32) = G_SEXT [[SUM]](s8) ; CHECK: %w0 = COPY [[SUM32]](s32) define signext i8 @test_stack_slots([8 x i64], i8 signext %lhs, i8 signext %rhs) { %sum = add i8 %lhs, %rhs @@ -18,15 +18,15 @@ define signext i8 @test_stack_slots([8 x i64], i8 signext %lhs, i8 signext %rhs) } ; CHECK-LABEL: name: test_call_stack -; CHECK: [[C42:%[0-9]+]](s8) = G_CONSTANT i8 42 -; CHECK: [[C12:%[0-9]+]](s8) = G_CONSTANT i8 12 -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[C42_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 0 -; CHECK: [[C42_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C42_OFFS]](s64) +; CHECK: [[C42:%[0-9]+]]:_(s8) = G_CONSTANT i8 42 +; CHECK: [[C12:%[0-9]+]]:_(s8) = G_CONSTANT i8 12 +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[C42_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[C42_LOC:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[C42_OFFS]](s64) ; CHECK: G_STORE [[C42]](s8), [[C42_LOC]](p0) :: (store 1 into stack, align 0) -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[C12_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 1 -; CHECK: [[C12_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C12_OFFS]](s64) +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[C12_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 +; CHECK: [[C12_LOC:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[C12_OFFS]](s64) ; CHECK: G_STORE [[C12]](s8), [[C12_LOC]](p0) :: (store 1 into stack + 1, align 0) ; CHECK: BL @test_stack_slots define void @test_call_stack() { @@ -46,27 +46,27 @@ define void @test_128bit_struct([2 x i64]* %ptr) { } ; CHECK-LABEL: name: take_128bit_struct -; CHECK: {{%.*}}(p0) = COPY %x0 -; CHECK: {{%.*}}(s64) = COPY %x1 -; CHECK: {{%.*}}(s64) = COPY %x2 +; CHECK: {{%.*}}:_(p0) = COPY %x0 +; CHECK: {{%.*}}:_(s64) = COPY %x1 +; CHECK: {{%.*}}:_(s64) = COPY %x2 define void @take_128bit_struct([2 x i64]* %ptr, [2 x i64] %in) { store [2 x i64] %in, [2 x i64]* %ptr ret void } ; CHECK-LABEL: name: test_split_struct -; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD {{.*}}(p0) -; CHECK: [[LO:%[0-9]+]](s64) = G_EXTRACT [[STRUCT]](s128), 0 -; CHECK: [[HI:%[0-9]+]](s64) = G_EXTRACT [[STRUCT]](s128), 64 +; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD {{.*}}(p0) +; CHECK: [[LO:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 0 +; CHECK: [[HI:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 64 -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF:%[0-9]+]](s64) = G_CONSTANT i64 0 -; CHECK: [[ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF]] +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] ; CHECK: G_STORE [[LO]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0) -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF:%[0-9]+]](s64) = G_CONSTANT i64 8 -; CHECK: [[ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF]] +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] ; CHECK: G_STORE [[HI]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0) define void @test_split_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr @@ -81,11 +81,11 @@ define void @test_split_struct([2 x i64]* %ptr) { ; CHECK-DAG: - { id: [[LO_FRAME:[0-9]+]], type: default, offset: 0, size: 8 ; CHECK-DAG: - { id: [[HI_FRAME:[0-9]+]], type: default, offset: 8, size: 8 -; CHECK: [[LOPTR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[LO_FRAME]] -; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD [[LOPTR]](p0) :: (invariant load 8 from %fixed-stack.[[LO_FRAME]], align 0) +; CHECK: [[LOPTR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LO_FRAME]] +; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD [[LOPTR]](p0) :: (invariant load 8 from %fixed-stack.[[LO_FRAME]], align 0) -; CHECK: [[HIPTR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[HI_FRAME]] -; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD [[HIPTR]](p0) :: (invariant load 8 from %fixed-stack.[[HI_FRAME]], align 0) +; CHECK: [[HIPTR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[HI_FRAME]] +; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[HIPTR]](p0) :: (invariant load 8 from %fixed-stack.[[HI_FRAME]], align 0) define void @take_split_struct([2 x i64]* %ptr, i64, i64, i64, i64, i64, i64, [2 x i64] %in) { diff --git a/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/test/CodeGen/AArch64/GlobalISel/call-translator.ll index 004e3fd2a1dca..23a39a336fa3a 100644 --- a/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -12,7 +12,7 @@ define void @test_trivial_call() { ; CHECK-LABEL: name: test_simple_return ; CHECK: BL @simple_return_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit-def %x0 -; CHECK: [[RES:%[0-9]+]](s64) = COPY %x0 +; CHECK: [[RES:%[0-9]+]]:_(s64) = COPY %x0 ; CHECK: %x0 = COPY [[RES]] ; CHECK: RET_ReallyLR implicit %x0 declare i64 @simple_return_callee() @@ -22,7 +22,7 @@ define i64 @test_simple_return() { } ; CHECK-LABEL: name: test_simple_arg -; CHECK: [[IN:%[0-9]+]](s32) = COPY %w0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 ; CHECK: %w0 = COPY [[IN]] ; CHECK: BL @simple_arg_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0 ; CHECK: RET_ReallyLR @@ -36,7 +36,7 @@ define void @test_simple_arg(i32 %in) { ; CHECK: registers: ; Make sure the register feeding the indirect call is properly constrained. ; CHECK: - { id: [[FUNC:[0-9]+]], class: gpr64, preferred-register: '' } -; CHECK: %[[FUNC]](p0) = COPY %x0 +; CHECK: %[[FUNC]]:gpr64(p0) = COPY %x0 ; CHECK: BLR %[[FUNC]](p0), csr_aarch64_aapcs, implicit-def %lr, implicit %sp ; CHECK: RET_ReallyLR define void @test_indirect_call(void()* %func) { @@ -45,8 +45,8 @@ define void @test_indirect_call(void()* %func) { } ; CHECK-LABEL: name: test_multiple_args -; CHECK: [[IN:%[0-9]+]](s64) = COPY %x0 -; CHECK: [[ANSWER:%[0-9]+]](s32) = G_CONSTANT i32 42 +; CHECK: [[IN:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK: %w0 = COPY [[ANSWER]] ; CHECK: %x1 = COPY [[IN]] ; CHECK: BL @multiple_args_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0, implicit %x1 @@ -59,16 +59,17 @@ define void @test_multiple_args(i64 %in) { ; CHECK-LABEL: name: test_struct_formal -; CHECK: [[DBL:%[0-9]+]](s64) = COPY %d0 -; CHECK: [[I64:%[0-9]+]](s64) = COPY %x0 -; CHECK: [[I8:%[0-9]+]](s8) = COPY %w1 -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2 +; CHECK: [[DBL:%[0-9]+]]:_(s64) = COPY %d0 +; CHECK: [[I64:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[I8_C:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[I8:%[0-9]+]]:_(s8) = G_TRUNC [[I8_C]] +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x2 -; CHECK: [[UNDEF:%[0-9]+]](s192) = G_IMPLICIT_DEF -; CHECK: [[ARG0:%[0-9]+]](s192) = G_INSERT [[UNDEF]], [[DBL]](s64), 0 -; CHECK: [[ARG1:%[0-9]+]](s192) = G_INSERT [[ARG0]], [[I64]](s64), 64 -; CHECK: [[ARG2:%[0-9]+]](s192) = G_INSERT [[ARG1]], [[I8]](s8), 128 -; CHECK: [[ARG:%[0-9]+]](s192) = COPY [[ARG2]] +; CHECK: [[UNDEF:%[0-9]+]]:_(s192) = G_IMPLICIT_DEF +; CHECK: [[ARG0:%[0-9]+]]:_(s192) = G_INSERT [[UNDEF]], [[DBL]](s64), 0 +; CHECK: [[ARG1:%[0-9]+]]:_(s192) = G_INSERT [[ARG0]], [[I64]](s64), 64 +; CHECK: [[ARG2:%[0-9]+]]:_(s192) = G_INSERT [[ARG1]], [[I8]](s8), 128 +; CHECK: [[ARG:%[0-9]+]]:_(s192) = COPY [[ARG2]] ; CHECK: G_STORE [[ARG]](s192), [[ADDR]](p0) ; CHECK: RET_ReallyLR @@ -79,12 +80,12 @@ define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) ; CHECK-LABEL: name: test_struct_return -; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[VAL:%[0-9]+]](s192) = G_LOAD [[ADDR]](p0) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[VAL:%[0-9]+]]:_(s192) = G_LOAD [[ADDR]](p0) -; CHECK: [[DBL:%[0-9]+]](s64) = G_EXTRACT [[VAL]](s192), 0 -; CHECK: [[I64:%[0-9]+]](s64) = G_EXTRACT [[VAL]](s192), 64 -; CHECK: [[I32:%[0-9]+]](s32) = G_EXTRACT [[VAL]](s192), 128 +; CHECK: [[DBL:%[0-9]+]]:_(s64) = G_EXTRACT [[VAL]](s192), 0 +; CHECK: [[I64:%[0-9]+]]:_(s64) = G_EXTRACT [[VAL]](s192), 64 +; CHECK: [[I32:%[0-9]+]]:_(s32) = G_EXTRACT [[VAL]](s192), 128 ; CHECK: %d0 = COPY [[DBL]](s64) ; CHECK: %x0 = COPY [[I64]](s64) @@ -97,23 +98,23 @@ define {double, i64, i32} @test_struct_return({double, i64, i32}* %addr) { ; CHECK-LABEL: name: test_arr_call ; CHECK: hasCalls: true -; CHECK: [[ARG:%[0-9]+]](s256) = G_LOAD +; CHECK: [[ARG:%[0-9]+]]:_(s256) = G_LOAD -; CHECK: [[E0:%[0-9]+]](s64) = G_EXTRACT [[ARG]](s256), 0 -; CHECK: [[E1:%[0-9]+]](s64) = G_EXTRACT [[ARG]](s256), 64 -; CHECK: [[E2:%[0-9]+]](s64) = G_EXTRACT [[ARG]](s256), 128 -; CHECK: [[E3:%[0-9]+]](s64) = G_EXTRACT [[ARG]](s256), 192 +; CHECK: [[E0:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 0 +; CHECK: [[E1:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 64 +; CHECK: [[E2:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 128 +; CHECK: [[E3:%[0-9]+]]:_(s64) = G_EXTRACT [[ARG]](s256), 192 ; CHECK: %x0 = COPY [[E0]](s64) ; CHECK: %x1 = COPY [[E1]](s64) ; CHECK: %x2 = COPY [[E2]](s64) ; CHECK: %x3 = COPY [[E3]](s64) ; CHECK: BL @arr_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2, implicit %x3, implicit-def %x0, implicit-def %x1, implicit-def %x2, implicit-def %x3 -; CHECK: [[E0:%[0-9]+]](s64) = COPY %x0 -; CHECK: [[E1:%[0-9]+]](s64) = COPY %x1 -; CHECK: [[E2:%[0-9]+]](s64) = COPY %x2 -; CHECK: [[E3:%[0-9]+]](s64) = COPY %x3 -; CHECK: [[RES:%[0-9]+]](s256) = G_MERGE_VALUES [[E0]](s64), [[E1]](s64), [[E2]](s64), [[E3]](s64) +; CHECK: [[E0:%[0-9]+]]:_(s64) = COPY %x0 +; CHECK: [[E1:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[E2:%[0-9]+]]:_(s64) = COPY %x2 +; CHECK: [[E3:%[0-9]+]]:_(s64) = COPY %x3 +; CHECK: [[RES:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[E0]](s64), [[E1]](s64), [[E2]](s64), [[E3]](s64) ; CHECK: G_EXTRACT [[RES]](s256), 64 declare [4 x i64] @arr_callee([4 x i64]) define i64 @test_arr_call([4 x i64]* %addr) { @@ -125,13 +126,14 @@ define i64 @test_arr_call([4 x i64]* %addr) { ; CHECK-LABEL: name: test_abi_exts_call -; CHECK: [[VAL:%[0-9]+]](s8) = G_LOAD -; CHECK: %w0 = COPY [[VAL]] +; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_LOAD +; CHECK: [[VAL_TMP:%[0-9]+]]:_(s32) = G_ANYEXT [[VAL]] +; CHECK: %w0 = COPY [[VAL_TMP]] ; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0 -; CHECK: [[SVAL:%[0-9]+]](s32) = G_SEXT [[VAL]](s8) +; CHECK: [[SVAL:%[0-9]+]]:_(s32) = G_SEXT [[VAL]](s8) ; CHECK: %w0 = COPY [[SVAL]](s32) ; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0 -; CHECK: [[ZVAL:%[0-9]+]](s32) = G_ZEXT [[VAL]](s8) +; CHECK: [[ZVAL:%[0-9]+]]:_(s32) = G_ZEXT [[VAL]](s8) ; CHECK: %w0 = COPY [[ZVAL]](s32) ; CHECK: BL @take_char, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0 declare void @take_char(i8) @@ -144,8 +146,8 @@ define void @test_abi_exts_call(i8* %addr) { } ; CHECK-LABEL: name: test_abi_sext_ret -; CHECK: [[VAL:%[0-9]+]](s8) = G_LOAD -; CHECK: [[SVAL:%[0-9]+]](s32) = G_SEXT [[VAL]](s8) +; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_LOAD +; CHECK: [[SVAL:%[0-9]+]]:_(s32) = G_SEXT [[VAL]](s8) ; CHECK: %w0 = COPY [[SVAL]](s32) ; CHECK: RET_ReallyLR implicit %w0 define signext i8 @test_abi_sext_ret(i8* %addr) { @@ -154,8 +156,8 @@ define signext i8 @test_abi_sext_ret(i8* %addr) { } ; CHECK-LABEL: name: test_abi_zext_ret -; CHECK: [[VAL:%[0-9]+]](s8) = G_LOAD -; CHECK: [[SVAL:%[0-9]+]](s32) = G_ZEXT [[VAL]](s8) +; CHECK: [[VAL:%[0-9]+]]:_(s8) = G_LOAD +; CHECK: [[SVAL:%[0-9]+]]:_(s32) = G_ZEXT [[VAL]](s8) ; CHECK: %w0 = COPY [[SVAL]](s32) ; CHECK: RET_ReallyLR implicit %w0 define zeroext i8 @test_abi_zext_ret(i8* %addr) { @@ -168,13 +170,13 @@ define zeroext i8 @test_abi_zext_ret(i8* %addr) { ; CHECK-DAG: - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 8, ; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 8, size: 8, ; CHECK-DAG: - { id: [[STACK16:[0-9]+]], type: default, offset: 16, size: 8, -; CHECK: [[LHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] -; CHECK: [[LHS:%[0-9]+]](s64) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) -; CHECK: [[RHS_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] -; CHECK: [[RHS:%[0-9]+]](s64) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0) -; CHECK: [[ADDR_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]] -; CHECK: [[ADDR:%[0-9]+]](p0) = G_LOAD [[ADDR_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK16]], align 0) -; CHECK: [[SUM:%[0-9]+]](s64) = G_ADD [[LHS]], [[RHS]] +; CHECK: [[LHS_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]] +; CHECK: [[LHS:%[0-9]+]]:_(s64) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK0]], align 0) +; CHECK: [[RHS_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]] +; CHECK: [[RHS:%[0-9]+]]:_(s64) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK8]], align 0) +; CHECK: [[ADDR_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK16]] +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_LOAD [[ADDR_ADDR]](p0) :: (invariant load 8 from %fixed-stack.[[STACK16]], align 0) +; CHECK: [[SUM:%[0-9]+]]:_(s64) = G_ADD [[LHS]], [[RHS]] ; CHECK: G_STORE [[SUM]](s64), [[ADDR]](p0) define void @test_stack_slots([8 x i64], i64 %lhs, i64 %rhs, i64* %addr) { %sum = add i64 %lhs, %rhs @@ -183,21 +185,21 @@ define void @test_stack_slots([8 x i64], i64 %lhs, i64 %rhs, i64* %addr) { } ; CHECK-LABEL: name: test_call_stack -; CHECK: [[C42:%[0-9]+]](s64) = G_CONSTANT i64 42 -; CHECK: [[C12:%[0-9]+]](s64) = G_CONSTANT i64 12 -; CHECK: [[PTR:%[0-9]+]](p0) = G_CONSTANT i64 0 +; CHECK: [[C42:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 +; CHECK: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 +; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; CHECK: ADJCALLSTACKDOWN 24, 0, implicit-def %sp, implicit %sp -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[C42_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 0 -; CHECK: [[C42_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C42_OFFS]](s64) +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[C42_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[C42_LOC:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[C42_OFFS]](s64) ; CHECK: G_STORE [[C42]](s64), [[C42_LOC]](p0) :: (store 8 into stack, align 0) -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[C12_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 8 -; CHECK: [[C12_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[C12_OFFS]](s64) +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[C12_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[C12_LOC:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[C12_OFFS]](s64) ; CHECK: G_STORE [[C12]](s64), [[C12_LOC]](p0) :: (store 8 into stack + 8, align 0) -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[PTR_OFFS:%[0-9]+]](s64) = G_CONSTANT i64 16 -; CHECK: [[PTR_LOC:%[0-9]+]](p0) = G_GEP [[SP]], [[PTR_OFFS]](s64) +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[PTR_OFFS:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 +; CHECK: [[PTR_LOC:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[PTR_OFFS]](s64) ; CHECK: G_STORE [[PTR]](p0), [[PTR_LOC]](p0) :: (store 8 into stack + 16, align 0) ; CHECK: BL @test_stack_slots ; CHECK: ADJCALLSTACKUP 24, 0, implicit-def %sp, implicit %sp @@ -210,8 +212,8 @@ define void @test_call_stack() { ; CHECK: fixedStack: ; CHECK-NEXT: - { id: [[SLOT:[0-9]+]], type: default, offset: 0, size: 1, alignment: 16, stack-id: 0, ; CHECK-NEXT: isImmutable: true, -; CHECK: [[ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[SLOT]] -; CHECK: {{%[0-9]+}}(s1) = G_LOAD [[ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[SLOT]], align 0) +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[SLOT]] +; CHECK: {{%[0-9]+}}:_(s1) = G_LOAD [[ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[SLOT]], align 0) define void @test_mem_i1([8 x i64], i1 %in) { ret void } @@ -228,27 +230,27 @@ define void @test_128bit_struct([2 x i64]* %ptr) { } ; CHECK-LABEL: name: take_128bit_struct -; CHECK: {{%.*}}(p0) = COPY %x0 -; CHECK: {{%.*}}(s64) = COPY %x1 -; CHECK: {{%.*}}(s64) = COPY %x2 +; CHECK: {{%.*}}:_(p0) = COPY %x0 +; CHECK: {{%.*}}:_(s64) = COPY %x1 +; CHECK: {{%.*}}:_(s64) = COPY %x2 define void @take_128bit_struct([2 x i64]* %ptr, [2 x i64] %in) { store [2 x i64] %in, [2 x i64]* %ptr ret void } ; CHECK-LABEL: name: test_split_struct -; CHECK: [[STRUCT:%[0-9]+]](s128) = G_LOAD {{.*}}(p0) -; CHECK: [[LO:%[0-9]+]](s64) = G_EXTRACT [[STRUCT]](s128), 0 -; CHECK: [[HI:%[0-9]+]](s64) = G_EXTRACT [[STRUCT]](s128), 64 +; CHECK: [[STRUCT:%[0-9]+]]:_(s128) = G_LOAD {{.*}}(p0) +; CHECK: [[LO:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 0 +; CHECK: [[HI:%[0-9]+]]:_(s64) = G_EXTRACT [[STRUCT]](s128), 64 -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF:%[0-9]+]](s64) = G_CONSTANT i64 0 -; CHECK: [[ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF]] +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] ; CHECK: G_STORE [[LO]](s64), [[ADDR]](p0) :: (store 8 into stack, align 0) -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF:%[0-9]+]](s64) = G_CONSTANT i64 8 -; CHECK: [[ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF]] +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF]] ; CHECK: G_STORE [[HI]](s64), [[ADDR]](p0) :: (store 8 into stack + 8, align 0) define void @test_split_struct([2 x i64]* %ptr) { %struct = load [2 x i64], [2 x i64]* %ptr @@ -263,11 +265,11 @@ define void @test_split_struct([2 x i64]* %ptr) { ; CHECK-DAG: - { id: [[LO_FRAME:[0-9]+]], type: default, offset: 0, size: 8 ; CHECK-DAG: - { id: [[HI_FRAME:[0-9]+]], type: default, offset: 8, size: 8 -; CHECK: [[LOPTR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[LO_FRAME]] -; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD [[LOPTR]](p0) :: (invariant load 8 from %fixed-stack.[[LO_FRAME]], align 0) +; CHECK: [[LOPTR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LO_FRAME]] +; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD [[LOPTR]](p0) :: (invariant load 8 from %fixed-stack.[[LO_FRAME]], align 0) -; CHECK: [[HIPTR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[HI_FRAME]] -; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD [[HIPTR]](p0) :: (invariant load 8 from %fixed-stack.[[HI_FRAME]], align 0) +; CHECK: [[HIPTR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[HI_FRAME]] +; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD [[HIPTR]](p0) :: (invariant load 8 from %fixed-stack.[[HI_FRAME]], align 0) define void @take_split_struct([2 x i64]* %ptr, i64, i64, i64, i64, i64, i64, [2 x i64] %in) { diff --git a/test/CodeGen/AArch64/GlobalISel/combine-anyext-crash.mir b/test/CodeGen/AArch64/GlobalISel/combine-anyext-crash.mir new file mode 100644 index 0000000000000..339adf51451bc --- /dev/null +++ b/test/CodeGen/AArch64/GlobalISel/combine-anyext-crash.mir @@ -0,0 +1,42 @@ +# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64--" + + define void @test_anyext_crash() { + entry: + br label %block2 + + block2: + %0 = trunc i16 0 to i8 + %1 = uitofp i8 %0 to double + br label %block2 + } + + +... +--- +name: test_anyext_crash +alignment: 2 +legalized: false +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } +body: | + bb.1: + ; Check we don't crash due to trying to legalize a dead instruction. + ; CHECK-LABEL: test_anyext_crash + ; CHECK-LABEL: bb.1: + successors: %bb.2 + + %0(s16) = G_CONSTANT i16 0 + + bb.2: + successors: %bb.2 + + %1(s8) = G_TRUNC %0(s16) + %2(s64) = G_UITOFP %1(s8) + G_BR %bb.2 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll index cd32cb41c7c0b..eb2d2ec4307c6 100644 --- a/test/CodeGen/AArch64/GlobalISel/debug-insts.ll +++ b/test/CodeGen/AArch64/GlobalISel/debug-insts.ll @@ -3,8 +3,9 @@ ; CHECK-LABEL: name: debug_declare ; CHECK: stack: -; CHECK: - { id: {{.*}}, name: in.addr, type: default, offset: 0, size: {{.*}}, alignment: {{.*}}, -; CHECK-NEXT: callee-saved-register: '', di-variable: '!11', di-expression: '!DIExpression()', +; CHECK: - { id: {{.*}}, name: in.addr, type: default, offset: 0, size: {{.*}}, alignment: {{.*}}, +; CHECK-NEXT: callee-saved-register: '', callee-saved-restored: true, +; CHECK-NEXT: di-variable: '!11', di-expression: '!DIExpression()', ; CHECK: DBG_VALUE debug-use %0(s32), debug-use _, !11, !DIExpression(), debug-location !12 define void @debug_declare(i32 %in) #0 !dbg !7 { entry: @@ -25,7 +26,7 @@ entry: } ; CHECK-LABEL: name: debug_value -; CHECK: [[IN:%[0-9]+]](s32) = COPY %w0 +; CHECK: [[IN:%[0-9]+]]:_(s32) = COPY %w0 define void @debug_value(i32 %in) #0 !dbg !16 { %addr = alloca i32 ; CHECK: DBG_VALUE debug-use [[IN]](s32), debug-use _, !17, !DIExpression(), debug-location !18 diff --git a/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll b/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll index 196910e96ce3e..62aceaa813089 100644 --- a/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll +++ b/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll @@ -1,15 +1,15 @@ ; RUN: llc -mtriple=aarch64 -global-isel %s -o - -stop-after=irtranslator | FileCheck %s ; CHECK-LABEL: name: test_simple_alloca -; CHECK: [[NUMELTS:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 -1 -; CHECK: [[NUMELTS_64:%[0-9]+]](s64) = G_ZEXT [[NUMELTS]](s32) -; CHECK: [[NUMBYTES:%[0-9]+]](s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]] -; CHECK: [[SP_TMP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[ALLOC:%[0-9]+]](p0) = G_GEP [[SP_TMP]], [[NUMBYTES]] -; CHECK: [[ALIGNED_ALLOC:%[0-9]+]](p0) = G_PTR_MASK [[ALLOC]], 4 +; CHECK: [[NUMELTS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[TYPE_SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 +; CHECK: [[NUMELTS_64:%[0-9]+]]:_(s64) = G_ZEXT [[NUMELTS]](s32) +; CHECK: [[NUMBYTES:%[0-9]+]]:_(s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]] +; CHECK: [[SP_TMP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[ALLOC:%[0-9]+]]:_(p0) = G_GEP [[SP_TMP]], [[NUMBYTES]] +; CHECK: [[ALIGNED_ALLOC:%[0-9]+]]:_(p0) = G_PTR_MASK [[ALLOC]], 4 ; CHECK: %sp = COPY [[ALIGNED_ALLOC]] -; CHECK: [[ALLOC:%[0-9]+]](p0) = COPY [[ALIGNED_ALLOC]] +; CHECK: [[ALLOC:%[0-9]+]]:_(p0) = COPY [[ALIGNED_ALLOC]] ; CHECK: %x0 = COPY [[ALLOC]] define i8* @test_simple_alloca(i32 %numelts) { %addr = alloca i8, i32 %numelts @@ -17,15 +17,15 @@ define i8* @test_simple_alloca(i32 %numelts) { } ; CHECK-LABEL: name: test_aligned_alloca -; CHECK: [[NUMELTS:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 -1 -; CHECK: [[NUMELTS_64:%[0-9]+]](s64) = G_ZEXT [[NUMELTS]](s32) -; CHECK: [[NUMBYTES:%[0-9]+]](s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]] -; CHECK: [[SP_TMP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[ALLOC:%[0-9]+]](p0) = G_GEP [[SP_TMP]], [[NUMBYTES]] -; CHECK: [[ALIGNED_ALLOC:%[0-9]+]](p0) = G_PTR_MASK [[ALLOC]], 5 +; CHECK: [[NUMELTS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[TYPE_SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 +; CHECK: [[NUMELTS_64:%[0-9]+]]:_(s64) = G_ZEXT [[NUMELTS]](s32) +; CHECK: [[NUMBYTES:%[0-9]+]]:_(s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]] +; CHECK: [[SP_TMP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[ALLOC:%[0-9]+]]:_(p0) = G_GEP [[SP_TMP]], [[NUMBYTES]] +; CHECK: [[ALIGNED_ALLOC:%[0-9]+]]:_(p0) = G_PTR_MASK [[ALLOC]], 5 ; CHECK: %sp = COPY [[ALIGNED_ALLOC]] -; CHECK: [[ALLOC:%[0-9]+]](p0) = COPY [[ALIGNED_ALLOC]] +; CHECK: [[ALLOC:%[0-9]+]]:_(p0) = COPY [[ALIGNED_ALLOC]] ; CHECK: %x0 = COPY [[ALLOC]] define i8* @test_aligned_alloca(i32 %numelts) { %addr = alloca i8, i32 %numelts, align 32 @@ -33,14 +33,14 @@ define i8* @test_aligned_alloca(i32 %numelts) { } ; CHECK-LABEL: name: test_natural_alloca -; CHECK: [[NUMELTS:%[0-9]+]](s32) = COPY %w0 -; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 -16 -; CHECK: [[NUMELTS_64:%[0-9]+]](s64) = G_ZEXT [[NUMELTS]](s32) -; CHECK: [[NUMBYTES:%[0-9]+]](s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]] -; CHECK: [[SP_TMP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[ALLOC:%[0-9]+]](p0) = G_GEP [[SP_TMP]], [[NUMBYTES]] +; CHECK: [[NUMELTS:%[0-9]+]]:_(s32) = COPY %w0 +; CHECK: [[TYPE_SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 -16 +; CHECK: [[NUMELTS_64:%[0-9]+]]:_(s64) = G_ZEXT [[NUMELTS]](s32) +; CHECK: [[NUMBYTES:%[0-9]+]]:_(s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]] +; CHECK: [[SP_TMP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[ALLOC:%[0-9]+]]:_(p0) = G_GEP [[SP_TMP]], [[NUMBYTES]] ; CHECK: %sp = COPY [[ALLOC]] -; CHECK: [[ALLOC_TMP:%[0-9]+]](p0) = COPY [[ALLOC]] +; CHECK: [[ALLOC_TMP:%[0-9]+]]:_(p0) = COPY [[ALLOC]] ; CHECK: %x0 = COPY [[ALLOC_TMP]] define i128* @test_natural_alloca(i32 %numelts) { %addr = alloca i128, i32 %numelts diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll index 8d1b02216ea76..70dddeb458599 100644 --- a/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll +++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-bitcast.ll @@ -23,7 +23,7 @@ define i32 @test_bitcast_invalid_vreg() { %tmp15 = add i32 30, 30 ; At this point we mapped 46 values. The 'i32 100' constant will grow the map. -; CHECK: %46(s32) = G_CONSTANT i32 100 +; CHECK: %46:_(s32) = G_CONSTANT i32 100 ; CHECK: %w0 = COPY %46(s32) %res = bitcast i32 100 to i32 ret i32 %res diff --git a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll index d9fec0ec7d46b..0e7fbd32c6fa8 100644 --- a/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll +++ b/test/CodeGen/AArch64/GlobalISel/irtranslator-exceptions.ll @@ -13,26 +13,26 @@ declare i32 @llvm.eh.typeid.for(i8*) ; CHECK: EH_LABEL ; CHECK: %w0 = COPY ; CHECK: BL @foo, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %w0, implicit-def %w0 -; CHECK: {{%[0-9]+}}(s32) = COPY %w0 +; CHECK: {{%[0-9]+}}:_(s32) = COPY %w0 ; CHECK: EH_LABEL ; CHECK: G_BR %[[GOOD]] ; CHECK: [[BAD]] (landing-pad): ; CHECK: EH_LABEL -; CHECK: [[UNDEF:%[0-9]+]](s128) = G_IMPLICIT_DEF -; CHECK: [[PTR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[VAL_WITH_PTR:%[0-9]+]](s128) = G_INSERT [[UNDEF]], [[PTR]](p0), 0 -; CHECK: [[SEL_PTR:%[0-9]+]](p0) = COPY %x1 -; CHECK: [[SEL:%[0-9]+]](s32) = G_PTRTOINT [[SEL_PTR]] -; CHECK: [[PTR_SEL:%[0-9]+]](s128) = G_INSERT [[VAL_WITH_PTR]], [[SEL]](s32), 64 -; CHECK: [[PTR_RET:%[0-9]+]](s64) = G_EXTRACT [[PTR_SEL]](s128), 0 -; CHECK: [[SEL_RET:%[0-9]+]](s32) = G_EXTRACT [[PTR_SEL]](s128), 64 +; CHECK: [[UNDEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF +; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[VAL_WITH_PTR:%[0-9]+]]:_(s128) = G_INSERT [[UNDEF]], [[PTR]](p0), 0 +; CHECK: [[SEL_PTR:%[0-9]+]]:_(p0) = COPY %x1 +; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]] +; CHECK: [[PTR_SEL:%[0-9]+]]:_(s128) = G_INSERT [[VAL_WITH_PTR]], [[SEL]](s32), 64 +; CHECK: [[PTR_RET:%[0-9]+]]:_(s64) = G_EXTRACT [[PTR_SEL]](s128), 0 +; CHECK: [[SEL_RET:%[0-9]+]]:_(s32) = G_EXTRACT [[PTR_SEL]](s128), 64 ; CHECK: %x0 = COPY [[PTR_RET]] ; CHECK: %w1 = COPY [[SEL_RET]] ; CHECK: [[GOOD]]: -; CHECK: [[SEL:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK: {{%[0-9]+}}(s128) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 64 +; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: {{%[0-9]+}}:_(s128) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 64 define { i8*, i32 } @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { %res32 = invoke i32 @foo(i32 42) to label %continue unwind label %broken @@ -49,7 +49,7 @@ continue: } ; CHECK-LABEL: name: test_invoke_indirect -; CHECK: [[CALLEE:%[0-9]+]](p0) = COPY %x0 +; CHECK: [[CALLEE:%[0-9]+]]:gpr64(p0) = COPY %x0 ; CHECK: BLR [[CALLEE]] define void @test_invoke_indirect(void()* %callee) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { invoke void %callee() to label %continue unwind label %broken @@ -64,20 +64,20 @@ continue: ; CHECK-LABEL: name: test_invoke_varargs -; CHECK: [[NULL:%[0-9]+]](p0) = G_CONSTANT i64 0 -; CHECK: [[ANSWER:%[0-9]+]](s32) = G_CONSTANT i32 42 -; CHECK: [[ONE:%[0-9]+]](s32) = G_FCONSTANT float 1.0 +; CHECK: [[NULL:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 +; CHECK: [[ANSWER:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 +; CHECK: [[ONE:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.0 ; CHECK: %x0 = COPY [[NULL]] -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFFSET:%[0-9]+]](s64) = G_CONSTANT i64 0 -; CHECK: [[SLOT:%[0-9]+]](p0) = G_GEP [[SP]], [[OFFSET]](s64) +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 +; CHECK: [[SLOT:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFFSET]](s64) ; CHECK: G_STORE [[ANSWER]](s32), [[SLOT]] -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFFSET:%[0-9]+]](s64) = G_CONSTANT i64 8 -; CHECK: [[SLOT:%[0-9]+]](p0) = G_GEP [[SP]], [[OFFSET]](s64) +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 +; CHECK: [[SLOT:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFFSET]](s64) ; CHECK: G_STORE [[ONE]](s32), [[SLOT]] ; CHECK: BL @printf diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-add.mir b/test/CodeGen/AArch64/GlobalISel/legalize-add.mir index a94c710e239c2..fa6727da1bb1f 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-add.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-add.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -32,18 +33,18 @@ registers: body: | bb.0.entry: liveins: %x0, %x1, %x2, %x3 - ; CHECK-LABEL: name: test_scalar_add_big - ; CHECK-NOT: G_MERGE_VALUES - ; CHECK-NOT: G_UNMERGE_VALUES - ; CHECK-DAG: [[CARRY0_32:%.*]](s32) = G_CONSTANT i32 0 - ; CHECK-DAG: [[CARRY0:%[0-9]+]](s1) = G_TRUNC [[CARRY0_32]] - ; CHECK: [[RES_LO:%.*]](s64), [[CARRY:%.*]](s1) = G_UADDE %0, %2, [[CARRY0]] - ; CHECK: [[RES_HI:%.*]](s64), {{%.*}}(s1) = G_UADDE %1, %3, [[CARRY]] - ; CHECK-NOT: G_MERGE_VALUES - ; CHECK-NOT: G_UNMERGE_VALUES - ; CHECK: %x0 = COPY [[RES_LO]] - ; CHECK: %x1 = COPY [[RES_HI]] + ; CHECK-LABEL: name: test_scalar_add_big + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY %x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY %x3 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[COPY]], [[COPY2]], [[TRUNC]] + ; CHECK: [[UADDE2:%[0-9]+]]:_(s64), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[COPY1]], [[COPY3]], [[UADDE1]] + ; CHECK: %x0 = COPY [[UADDE]](s64) + ; CHECK: %x1 = COPY [[UADDE2]](s64) %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = COPY %x2 @@ -68,14 +69,16 @@ registers: body: | bb.0.entry: liveins: %x0, %x1, %x2, %x3 - ; CHECK-LABEL: name: test_scalar_add_small - ; CHECK: [[A:%.*]](s64) = COPY %x0 - ; CHECK: [[B:%.*]](s64) = COPY %x1 - ; CHECK: [[OP0:%.*]](s32) = G_TRUNC [[A]] - ; CHECK: [[OP1:%.*]](s32) = G_TRUNC [[B]] - ; CHECK: [[RES32:%.*]](s32) = G_ADD [[OP0]], [[OP1]] - ; CHECK: [[RES:%.*]](s8) = G_TRUNC [[RES32]](s32) + ; CHECK-LABEL: name: test_scalar_add_small + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC]], [[TRUNC1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ADD]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s8) + ; CHECK: %x0 = COPY [[ANYEXT]](s64) %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s8) = G_TRUNC %0 @@ -100,16 +103,16 @@ registers: body: | bb.0.entry: liveins: %q0, %q1, %q2, %q3 - ; CHECK-LABEL: name: test_vector_add - ; CHECK-NOT: G_EXTRACT - ; CHECK-NOT: G_SEQUENCE - ; CHECK: [[RES_LO:%.*]](<2 x s64>) = G_ADD %0, %2 - ; CHECK: [[RES_HI:%.*]](<2 x s64>) = G_ADD %1, %3 - ; CHECK-NOT: G_EXTRACT - ; CHECK-NOT: G_SEQUENCE - ; CHECK: %q0 = COPY [[RES_LO]] - ; CHECK: %q1 = COPY [[RES_HI]] + ; CHECK-LABEL: name: test_vector_add + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY %q0 + ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY %q1 + ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY %q2 + ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY %q3 + ; CHECK: [[ADD:%[0-9]+]]:_(<2 x s64>) = G_ADD [[COPY]], [[COPY2]] + ; CHECK: [[ADD1:%[0-9]+]]:_(<2 x s64>) = G_ADD [[COPY1]], [[COPY3]] + ; CHECK: %q0 = COPY [[ADD]](<2 x s64>) + ; CHECK: %q1 = COPY [[ADD1]](<2 x s64>) %0(<2 x s64>) = COPY %q0 %1(<2 x s64>) = COPY %q1 %2(<2 x s64>) = COPY %q2 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-and.mir b/test/CodeGen/AArch64/GlobalISel/legalize-and.mir index ec5f90476168e..9646480e42527 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-and.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-and.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -21,14 +22,17 @@ registers: body: | bb.0.entry: liveins: %x0, %x1, %x2, %x3 - ; CHECK-LABEL: name: test_scalar_and_small - ; CHECK: [[A:%.*]](s64) = COPY %x0 - ; CHECK: [[B:%.*]](s64) = COPY %x1 - ; CHECK: [[OP0:%.*]](s32) = G_TRUNC [[A]] - ; CHECK: [[OP1:%.*]](s32) = G_TRUNC [[B]] - ; CHECK: [[RES32:%.*]](s32) = G_AND [[OP0]], [[OP1]] - ; CHECK: [[RES:%.*]](s8) = G_TRUNC [[RES32]](s32) + ; CHECK-LABEL: name: test_scalar_and_small + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC1]], [[TRUNC2]] + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s8) + ; CHECK: %x0 = COPY [[ANYEXT]](s64) %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s8) = G_TRUNC %0 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir index 51b1c3890ab75..706ad118be8bf 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir @@ -32,18 +32,18 @@ body: | %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 - ; CHECK: [[CMP1:%[0-9]+]](s32) = G_ICMP intpred(sge), %0(s64), %1 - ; CHECK: [[CMP_T1:%[0-9]+]](s1) = G_TRUNC [[CMP1]] + ; CHECK: [[CMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(sge), %0(s64), %1 + ; CHECK: [[CMP_T1:%[0-9]+]]:_(s1) = G_TRUNC [[CMP1]] %4(s1) = G_ICMP intpred(sge), %0, %1 - ; CHECK: [[CSTMASK1:%[0-9]+]](s32) = G_CONSTANT i32 255 - ; CHECK: [[T1:%[0-9]+]](s32) = G_TRUNC %0(s64) - ; CHECK: [[AND1:%[0-9]+]](s32) = G_AND [[T1]], [[CSTMASK1]] - ; CHECK: [[CSTMASK2:%[0-9]+]](s32) = G_CONSTANT i32 255 - ; CHECK: [[T2:%[0-9]+]](s32) = G_TRUNC %1(s64) - ; CHECK: [[AND2:%[0-9]+]](s32) = G_AND [[T2]], [[CSTMASK2]] - ; CHECK: [[CMP2:%[0-9]+]](s32) = G_ICMP intpred(ult), [[AND1]](s32), [[AND2]] - ; CHECK: [[CMP_T2:%[0-9]+]](s1) = G_TRUNC [[CMP2]] + ; CHECK: [[CSTMASK1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[T1:%[0-9]+]]:_(s32) = G_TRUNC %0(s64) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[T1]], [[CSTMASK1]] + ; CHECK: [[CSTMASK2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[T2:%[0-9]+]]:_(s32) = G_TRUNC %1(s64) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[T2]], [[CSTMASK2]] + ; CHECK: [[CMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND1]](s32), [[AND2]] + ; CHECK: [[CMP_T2:%[0-9]+]]:_(s1) = G_TRUNC [[CMP2]] %8(s1) = G_ICMP intpred(ult), %2, %3 %9(p0) = G_INTTOPTR %0(s64) diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir index fbacc28d7434e..eee1a44e547a4 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-combines.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -16,13 +17,15 @@ body: | bb.0: liveins: %w0 + ; Here the types don't match. + ; CHECK-LABEL: name: test_combines_2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ADD]](s32) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[MV]](s64), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[MV]](s64), 0 %0:_(s32) = COPY %w0 - ; Similarly, here the types don't match. - ; CHECK-LABEL: name: test_combines_2 - ; CHECK: %2(s64) = G_MERGE_VALUES %0(s32), %1(s32) - ; CHECK: %3(s1) = G_EXTRACT %2(s64), 0 - ; CHECK: %4(s64) = G_EXTRACT %2(s64), 0 %1:_(s32) = G_ADD %0, %0 %2:_(s64) = G_MERGE_VALUES %0, %1 %3:_(s1) = G_EXTRACT %2, 0 @@ -35,13 +38,12 @@ body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: test_combines_3 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ADD]] %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_combines_3 - ; CHECK: %1(s32) = G_ADD %0, %0 - ; CHECK-NOT: G_SEQUENCE - ; CHECK-NOT: G_EXTRACT - ; CHECK: %5(s32) = G_ADD %0, %1 %1:_(s32) = G_ADD %0, %0 %2:_(s64) = G_MERGE_VALUES %0, %1 %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2 @@ -54,11 +56,12 @@ body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: test_combines_4 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY1]], [[COPY1]] %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_combines_4 - ; CHECK: %2(s64) = COPY %0(s64) - ; CHECK: %3(s64) = G_ADD %2, %2 %1:_(s128) = G_MERGE_VALUES %0, %0 %2:_(s64) = G_EXTRACT %1, 0 %3:_(s64) = G_ADD %2, %2 @@ -70,12 +73,12 @@ body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: test_combines_5 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ADD]] %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_combines_5 - ; CHECK-NOT: G_MERGE_VALUES - ; CHECK-NOT: G_EXTRACT - ; CHECK: %5(s32) = G_ADD %0, %1 %1:_(s32) = G_ADD %0, %0 %2:_(s64) = G_MERGE_VALUES %0, %1 %3:_(s32), %4:_(s32) = G_UNMERGE_VALUES %2 @@ -88,15 +91,13 @@ body: | bb.0: liveins: %w0 + ; Check that we replace all the uses of a G_EXTRACT. ; CHECK-LABEL: name: test_combines_6 - ; CHECK: %0(s32) = COPY %w0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY]], [[COPY]] + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[MUL]] %0:_(s32) = COPY %w0 - ; Check that we replace all the uses of a G_EXTRACT. - ; CHECK-NOT: G_MERGE_VALUES - ; CHECK-NOT: G_EXTRACT - ; CHECK: %3(s32) = G_MUL %0, %0 - ; CHECK: %4(s32) = G_ADD %0, %3 %1:_(s32) = G_MERGE_VALUES %0 %2:_(s32) = G_UNMERGE_VALUES %1 %3:_(s32) = G_MUL %2, %2 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir index 16d9e59698fe1..adeee11bfbfad 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -26,17 +27,17 @@ registers: - { id: 5, class: _ } body: | bb.0.entry: - ; CHECK-LABEL: name: test_constant - ; CHECK: [[TMP:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; CHECK: %0(s1) = G_TRUNC [[TMP]] - ; CHECK: [[TMP:%[0-9]+]](s32) = G_CONSTANT i32 42 - ; CHECK: %1(s8) = G_TRUNC [[TMP]] - ; CHECK: [[TMP:%[0-9]+]](s32) = G_CONSTANT i32 -1 - ; CHECK: %2(s16) = G_TRUNC [[TMP]] - ; CHECK: %3(s32) = G_CONSTANT i32 -1 - ; CHECK: %4(s64) = G_CONSTANT i64 1 - ; CHECK: %5(s64) = G_CONSTANT i64 0 + ; CHECK-LABEL: name: test_constant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[C1]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 %0(s1) = G_CONSTANT i1 0 %1(s8) = G_CONSTANT i8 42 %2(s16) = G_CONSTANT i16 65535 @@ -53,12 +54,12 @@ registers: - { id: 2, class: _ } body: | bb.0.entry: - ; CHECK-LABEL: name: test_fconstant - ; CHECK: %0(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK: %1(s64) = G_FCONSTANT double 2.000000e+00 - ; CHECK: [[TMP:%[0-9]+]](s32) = G_FCONSTANT half 0xH0000 - ; CHECK: %2(s16) = G_FPTRUNC [[TMP]] + ; CHECK-LABEL: name: test_fconstant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT half 0xH0000 + ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[C2]](s32) %0(s32) = G_FCONSTANT float 1.0 %1(s64) = G_FCONSTANT double 2.0 %2(s16) = G_FCONSTANT half 0.0 @@ -70,8 +71,8 @@ registers: - { id: 0, class: _ } body: | bb.0: - ; CHECK-LABEL: name: test_global - ; CHECK: %0(p0) = G_GLOBAL_VALUE @var + ; CHECK-LABEL: name: test_global + ; CHECK: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var %0(p0) = G_GLOBAL_VALUE @var ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-div.mir b/test/CodeGen/AArch64/GlobalISel/legalize-div.mir index b869232590bfc..55e3e801023a9 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-div.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-div.mir @@ -26,30 +26,30 @@ body: | %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 - ; CHECK: [[A:%.*]](s64) = COPY %x0 - ; CHECK: [[B:%.*]](s64) = COPY %x1 - ; CHECK: [[C1:%.*]](s32) = G_CONSTANT i32 24 - ; CHECK: [[S1:%.*]](s32) = G_TRUNC [[A]] - ; CHECK: [[SHL1:%.*]](s32) = G_SHL [[S1]], [[C1]] - ; CHECK: [[SEXT1:%.*]](s32) = G_ASHR [[SHL1]], [[C1]] - ; CHECK: [[C2:%.*]](s32) = G_CONSTANT i32 24 - ; CHECK: [[S2:%.*]](s32) = G_TRUNC [[B]] - ; CHECK: [[SHL2:%.*]](s32) = G_SHL [[S2]], [[C2]] - ; CHECK: [[SEXT2:%.*]](s32) = G_ASHR [[SHL2]], [[C2]] - ; CHECK: [[DIV:%.*]](s32) = G_SDIV [[SEXT1]], [[SEXT2]] - ; CHECK: [[RES:%.*]](s8) = G_TRUNC [[DIV]] + ; CHECK: [[A:%.*]]:_(s64) = COPY %x0 + ; CHECK: [[B:%.*]]:_(s64) = COPY %x1 + ; CHECK: [[C1:%.*]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[S1:%.*]]:_(s32) = G_TRUNC [[A]] + ; CHECK: [[SHL1:%.*]]:_(s32) = G_SHL [[S1]], [[C1]] + ; CHECK: [[SEXT1:%.*]]:_(s32) = G_ASHR [[SHL1]], [[C1]] + ; CHECK: [[C2:%.*]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[S2:%.*]]:_(s32) = G_TRUNC [[B]] + ; CHECK: [[SHL2:%.*]]:_(s32) = G_SHL [[S2]], [[C2]] + ; CHECK: [[SEXT2:%.*]]:_(s32) = G_ASHR [[SHL2]], [[C2]] + ; CHECK: [[DIV:%.*]]:_(s32) = G_SDIV [[SEXT1]], [[SEXT2]] + ; CHECK: [[RES:%.*]]:_(s8) = G_TRUNC [[DIV]] %4(s8) = G_SDIV %2, %3 - ; CHECK: [[CMASK1:%.*]](s32) = G_CONSTANT i32 255 - ; CHECK: [[T1:%.*]](s32) = G_TRUNC [[A]] - ; CHECK: [[LHS32:%.*]](s32) = G_AND [[T1]], [[CMASK1]] - ; CHECK: [[CMASK2:%.*]](s32) = G_CONSTANT i32 255 - ; CHECK: [[T2:%.*]](s32) = G_TRUNC [[B]] - ; CHECK: [[RHS32:%.*]](s32) = G_AND [[T2]], [[CMASK2]] - ; CHECK: [[QUOT32:%[0-9]+]](s32) = G_UDIV [[LHS32]], [[RHS32]] - ; CHECK: [[RES:%[0-9]+]](s8) = G_TRUNC [[QUOT32]] + ; CHECK: [[CMASK1:%.*]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[T1:%.*]]:_(s32) = G_TRUNC [[A]] + ; CHECK: [[LHS32:%.*]]:_(s32) = G_AND [[T1]], [[CMASK1]] + ; CHECK: [[CMASK2:%.*]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[T2:%.*]]:_(s32) = G_TRUNC [[B]] + ; CHECK: [[RHS32:%.*]]:_(s32) = G_AND [[T2]], [[CMASK2]] + ; CHECK: [[QUOT32:%[0-9]+]]:_(s32) = G_UDIV [[LHS32]], [[RHS32]] + ; CHECK: [[RES:%[0-9]+]]:_(s8) = G_TRUNC [[QUOT32]] %5(s8) = G_UDIV %2, %3 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll b/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll index 42ca367e122bb..da40b274aa620 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll +++ b/test/CodeGen/AArch64/GlobalISel/legalize-exceptions.ll @@ -15,18 +15,18 @@ declare void @_Unwind_Resume(i8*) ; CHECK: [[LP]] (landing-pad): ; CHECK: EH_LABEL -; CHECK: [[PTR:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[STRUCT_PTR:%[0-9]+]](s64) = G_PTRTOINT [[PTR]](p0) +; CHECK: [[PTR:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[STRUCT_PTR:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR]](p0) -; CHECK: [[SEL_PTR:%[0-9]+]](p0) = COPY %x1 -; CHECK: [[SEL:%[0-9]+]](s32) = G_PTRTOINT [[SEL_PTR]] -; CHECK: [[STRUCT_SEL:%[0-9]+]](s64) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 0 +; CHECK: [[SEL_PTR:%[0-9]+]]:_(p0) = COPY %x1 +; CHECK: [[SEL:%[0-9]+]]:_(s32) = G_PTRTOINT [[SEL_PTR]] +; CHECK: [[STRUCT_SEL:%[0-9]+]]:_(s64) = G_INSERT {{%[0-9]+}}, [[SEL]](s32), 0 -; CHECK: [[PTR:%[0-9]+]](p0) = G_INTTOPTR [[STRUCT_PTR]](s64) +; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[STRUCT_PTR]](s64) ; CHECK: G_STORE [[PTR]](p0), {{%[0-9]+}}(p0) -; CHECK: [[SEL_TMP:%[0-9]+]](s32) = G_EXTRACT [[STRUCT_SEL]](s64), 0 -; CHECK: [[SEL:%[0-9]+]](s32) = COPY [[SEL_TMP]] +; CHECK: [[SEL_TMP:%[0-9]+]]:_(s32) = G_EXTRACT [[STRUCT_SEL]](s64), 0 +; CHECK: [[SEL:%[0-9]+]]:_(s32) = COPY [[SEL_TMP]] ; CHECK: G_STORE [[SEL]](s32), {{%[0-9]+}}(p0) define void @bar() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir b/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir index 70b55e4ebc66d..d352630c16b6a 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-ext.mir @@ -36,44 +36,44 @@ body: | liveins: %x0, %x1, %x2, %x3 %0(s64) = COPY %x0 - ; CHECK: %1(s1) = G_TRUNC %0 - ; CHECK: %2(s8) = G_TRUNC %0 - ; CHECK: %3(s16) = G_TRUNC %0 - ; CHECK: %4(s32) = G_TRUNC %0 + ; CHECK: %1:_(s1) = G_TRUNC %0 + ; CHECK: %2:_(s8) = G_TRUNC %0 + ; CHECK: %3:_(s16) = G_TRUNC %0 + ; CHECK: %4:_(s32) = G_TRUNC %0 %1(s1) = G_TRUNC %0 %2(s8) = G_TRUNC %0 %3(s16) = G_TRUNC %0 %4(s32) = G_TRUNC %0 - ; CHECK: %5(s64) = G_ANYEXT %1 - ; CHECK: %6(s64) = G_ZEXT %2 - ; CHECK: %7(s64) = G_ANYEXT %3 - ; CHECK: %8(s64) = G_SEXT %4 + ; CHECK: %5:_(s64) = G_ANYEXT %1 + ; CHECK: %6:_(s64) = G_ZEXT %2 + ; CHECK: %7:_(s64) = G_ANYEXT %3 + ; CHECK: %8:_(s64) = G_SEXT %4 %5(s64) = G_ANYEXT %1 %6(s64) = G_ZEXT %2 %7(s64) = G_ANYEXT %3 %8(s64) = G_SEXT %4 - ; CHECK: %9(s32) = G_SEXT %1 - ; CHECK: %10(s32) = G_ZEXT %2 - ; CHECK: %11(s32) = G_ANYEXT %3 + ; CHECK: %9:_(s32) = G_SEXT %1 + ; CHECK: %10:_(s32) = G_ZEXT %2 + ; CHECK: %11:_(s32) = G_ANYEXT %3 %9(s32) = G_SEXT %1 %10(s32) = G_ZEXT %2 %11(s32) = G_ANYEXT %3 - ; CHECK: %12(s32) = G_ZEXT %1 - ; CHECK: %13(s32) = G_ANYEXT %2 - ; CHECK: %14(s32) = G_SEXT %3 + ; CHECK: %12:_(s32) = G_ZEXT %1 + ; CHECK: %13:_(s32) = G_ANYEXT %2 + ; CHECK: %14:_(s32) = G_SEXT %3 %12(s32) = G_ZEXT %1 %13(s32) = G_ANYEXT %2 %14(s32) = G_SEXT %3 - ; CHECK: %15(s8) = G_ZEXT %1 - ; CHECK: %16(s16) = G_ANYEXT %2 + ; CHECK: %15:_(s8) = G_ZEXT %1 + ; CHECK: %16:_(s16) = G_ANYEXT %2 %15(s8) = G_ZEXT %1 %16(s16) = G_ANYEXT %2 - ; CHECK: %18(s64) = G_FPEXT %17 + ; CHECK: %18:_(s64) = G_FPEXT %17 %17(s32) = G_TRUNC %0 %18(s64) = G_FPEXT %17 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir b/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir index dc6b59b24a9ae..3f6c00e20a230 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-extracts.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- @@ -10,10 +11,16 @@ body: | ; value stored is forwarded directly from first load. ; CHECK-LABEL: name: test_extracts_1 - ; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD - ; CHECK: {{%[0-9]+}}(s64) = G_LOAD - ; CHECK: [[VAL:%[0-9]+]](s64) = COPY [[LO]] - ; CHECK: G_STORE [[VAL]] + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY %x2 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 16) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY2]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 16) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CHECK: G_STORE [[COPY3]](s64), [[COPY2]](p0) :: (store 8) + ; CHECK: RET_ReallyLR %0:_(s64) = COPY %x0 %1:_(s32) = COPY %w1 %2:_(p0) = COPY %x2 @@ -31,13 +38,19 @@ body: | ; Low extraction wipes takes whole low register. High extraction is real. ; CHECK-LABEL: name: test_extracts_2 - ; CHECK: [[LO_TMP:%[0-9]+]](s64) = G_LOAD - ; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD - ; CHECK: [[LO:%[0-9]+]](s64) = COPY [[LO_TMP]] - ; CHECK: [[NEWHI_TMP:%[0-9]+]](s32) = G_EXTRACT [[HI]](s64), 0 - ; CHECK: [[NEWHI:%[0-9]+]](s32) = COPY [[NEWHI_TMP]] - ; CHECK: G_STORE [[LO]] - ; CHECK: G_STORE [[NEWHI]] + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p0) = COPY %x2 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load 16) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY2]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP]](p0) :: (load 16) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD1]](s64), 0 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) + ; CHECK: G_STORE [[COPY3]](s64), [[COPY2]](p0) :: (store 8) + ; CHECK: G_STORE [[COPY4]](s32), [[COPY2]](p0) :: (store 4) + ; CHECK: RET_ReallyLR %0:_(s64) = COPY %x0 %1:_(s32) = COPY %w1 %2:_(p0) = COPY %x2 @@ -57,9 +70,12 @@ body: | ; CHECK-LABEL: name: test_extracts_3 - ; CHECK: [[LO:%[0-9]+]](s32) = G_EXTRACT %0(s64), 32 - ; CHECK: [[HI:%[0-9]+]](s32) = G_EXTRACT %1(s64), 0 - ; CHECK: %3(s64) = G_MERGE_VALUES [[LO]](s32), [[HI]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK: RET_ReallyLR %0:_(s64) = COPY %x0 %1:_(s64) = COPY %x1 %2:_(s128) = G_MERGE_VALUES %0, %1 @@ -75,8 +91,11 @@ body: | ; CHECK-LABEL: name: test_extracts_4 - ; CHECK: [[LO_TMP:%[0-9]+]](s32) = G_EXTRACT %0(s64), 32 - ; CHECK: %3(s32) = COPY [[LO_TMP]] + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s64), 32 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32) + ; CHECK: RET_ReallyLR %0:_(s64) = COPY %x0 %1:_(s64) = COPY %x1 %2:_(s128) = G_MERGE_VALUES %0, %1 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir b/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir index 64cbd93f46c42..4f57ee5525420 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-fcmp.mir @@ -29,13 +29,13 @@ body: | %2(s32) = G_TRUNC %0 %3(s32) = G_TRUNC %1 - ; CHECK: [[CMP1:%[0-9]+]](s32) = G_FCMP floatpred(oge), %0(s64), %1 - ; CHECK: [[TRUNC1:%[0-9]+]](s1) = G_TRUNC [[CMP1]] + ; CHECK: [[CMP1:%[0-9]+]]:_(s32) = G_FCMP floatpred(oge), %0(s64), %1 + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1) = G_TRUNC [[CMP1]] %4(s32) = G_FCMP floatpred(oge), %0, %1 %6(s1) = G_TRUNC %4(s32) - ; CHECK: [[CMP2:%[0-9]+]](s32) = G_FCMP floatpred(uno), %2(s32), %3 - ; CHECK: [[TRUNC2:%[0-9]+]](s1) = G_TRUNC [[CMP2]] + ; CHECK: [[CMP2:%[0-9]+]]:_(s32) = G_FCMP floatpred(uno), %2(s32), %3 + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[CMP2]] %5(s32) = G_FCMP floatpred(uno), %2, %3 %7(s1) = G_TRUNC %5(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-fneg.mir b/test/CodeGen/AArch64/GlobalISel/legalize-fneg.mir index 8b5cbdfa55e39..e7dc314f034f0 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-fneg.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-fneg.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -21,10 +22,10 @@ body: | bb.1: liveins: %s0 ; CHECK-LABEL: name: test_fneg_f32 - ; CHECK: [[VAR:%[0-9]+]](s32) = COPY %s0 - ; CHECK: [[ZERO:%[0-9]+]](s32) = G_FCONSTANT float -0.000000e+00 - ; CHECK: [[RES:%[0-9]+]](s32) = G_FSUB [[ZERO]], [[VAR]] - ; CHECK: %s0 = COPY [[RES]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %s0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -0.000000e+00 + ; CHECK: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[C]], [[COPY]] + ; CHECK: %s0 = COPY [[FSUB]](s32) %0(s32) = COPY %s0 %1(s32) = G_FNEG %0 %s0 = COPY %1(s32) @@ -38,10 +39,10 @@ body: | bb.1: liveins: %d0 ; CHECK-LABEL: name: test_fneg_f64 - ; CHECK: [[VAR:%[0-9]+]](s64) = COPY %d0 - ; CHECK: [[ZERO:%[0-9]+]](s64) = G_FCONSTANT double -0.000000e+00 - ; CHECK: [[RES:%[0-9]+]](s64) = G_FSUB [[ZERO]], [[VAR]] - ; CHECK: %d0 = COPY [[RES]](s64) + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %d0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -0.000000e+00 + ; CHECK: [[FSUB:%[0-9]+]]:_(s64) = G_FSUB [[C]], [[COPY]] + ; CHECK: %d0 = COPY [[FSUB]](s64) %0(s64) = COPY %d0 %1(s64) = G_FNEG %0 %d0 = COPY %1(s64) diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir b/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir index f79d0382ea7c4..f82d13c71cb5d 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-fptoi.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -29,10 +30,10 @@ name: test_fptosi_s32_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_fptosi_s32_s32 - ; CHECK: %1(s32) = G_FPTOSI %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + %0:_(s32) = COPY %w0 %1:_(s32) = G_FPTOSI %0 ... @@ -41,10 +42,10 @@ name: test_fptoui_s32_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_fptoui_s32_s32 - ; CHECK: %1(s32) = G_FPTOUI %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) + %0:_(s32) = COPY %w0 %1:_(s32) = G_FPTOUI %0 ... @@ -53,10 +54,10 @@ name: test_fptosi_s32_s64 body: | bb.0: liveins: %x0 - %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_fptosi_s32_s64 - ; CHECK: %1(s32) = G_FPTOSI %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + %0:_(s64) = COPY %x0 %1:_(s32) = G_FPTOSI %0 ... @@ -65,10 +66,10 @@ name: test_fptoui_s32_s64 body: | bb.0: liveins: %x0 - %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_fptoui_s32_s64 - ; CHECK: %1(s32) = G_FPTOUI %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s64) + %0:_(s64) = COPY %x0 %1:_(s32) = G_FPTOUI %0 ... @@ -77,10 +78,10 @@ name: test_fptosi_s64_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_fptosi_s64_s32 - ; CHECK: %1(s64) = G_FPTOSI %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s64) = G_FPTOSI [[COPY]](s32) + %0:_(s32) = COPY %w0 %1:_(s64) = G_FPTOSI %0 ... @@ -89,10 +90,10 @@ name: test_fptoui_s64_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_fptoui_s64_s32 - ; CHECK: %1(s64) = G_FPTOUI %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s64) = G_FPTOUI [[COPY]](s32) + %0:_(s32) = COPY %w0 %1:_(s64) = G_FPTOUI %0 ... @@ -101,10 +102,10 @@ name: test_fptosi_s64_s64 body: | bb.0: liveins: %x0 - %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_fptosi_s64_s64 - ; CHECK: %1(s64) = G_FPTOSI %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s64) = G_FPTOSI [[COPY]](s64) + %0:_(s64) = COPY %x0 %1:_(s64) = G_FPTOSI %0 ... @@ -113,10 +114,10 @@ name: test_fptoui_s64_s64 body: | bb.0: liveins: %x0 - %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_fptoui_s64_s64 - ; CHECK: %1(s64) = G_FPTOUI %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s64) = G_FPTOUI [[COPY]](s64) + %0:_(s64) = COPY %x0 %1:_(s64) = G_FPTOUI %0 ... @@ -127,11 +128,11 @@ name: test_fptosi_s1_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_fptosi_s1_s32 - ; CHECK: %2(s32) = G_FPTOSI %0 - ; CHECK: %1(s1) = G_TRUNC %2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOSI]](s32) + %0:_(s32) = COPY %w0 %1:_(s1) = G_FPTOSI %0 ... @@ -140,11 +141,11 @@ name: test_fptoui_s1_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_fptoui_s1_s32 - ; CHECK: %2(s32) = G_FPTOUI %0 - ; CHECK: %1(s1) = G_TRUNC %2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[FPTOUI]](s32) + %0:_(s32) = COPY %w0 %1:_(s1) = G_FPTOUI %0 ... @@ -153,11 +154,11 @@ name: test_fptosi_s8_s64 body: | bb.0: liveins: %x0 - %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_fptosi_s8_s64 - ; CHECK: %2(s32) = G_FPTOSI %0 - ; CHECK: %1(s8) = G_TRUNC %2 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[FPTOSI]](s32) + %0:_(s64) = COPY %x0 %1:_(s8) = G_FPTOSI %0 ... @@ -166,11 +167,11 @@ name: test_fptoui_s8_s64 body: | bb.0: liveins: %x0 - %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_fptoui_s8_s64 - ; CHECK: %2(s32) = G_FPTOUI %0 - ; CHECK: %1(s8) = G_TRUNC %2 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[FPTOUI]](s32) + %0:_(s64) = COPY %x0 %1:_(s8) = G_FPTOUI %0 ... @@ -179,11 +180,11 @@ name: test_fptosi_s16_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_fptosi_s16_s32 - ; CHECK: %2(s32) = G_FPTOSI %0 - ; CHECK: %1(s16) = G_TRUNC %2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[FPTOSI]](s32) + %0:_(s32) = COPY %w0 %1:_(s16) = G_FPTOSI %0 ... @@ -192,10 +193,10 @@ name: test_fptoui_s16_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_fptoui_s16_s32 - ; CHECK: %2(s32) = G_FPTOUI %0 - ; CHECK: %1(s16) = G_TRUNC %2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[FPTOUI]](s32) + %0:_(s32) = COPY %w0 %1:_(s16) = G_FPTOUI %0 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir b/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir index 130ecd2f63823..67310d10336ea 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-gep.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -19,16 +20,16 @@ registers: body: | bb.0.entry: liveins: %x0, %x1, %x2, %x3 - ; CHECK-LABEL: name: test_gep_small - ; CHECK: [[A:%.*]](p0) = COPY %x0 - ; CHECK: [[B:%.*]](s64) = COPY %x1 - ; CHECK: [[C:%.*]](s64) = G_CONSTANT i64 56 - ; CHECK: [[SRC:%.*]](s64) = COPY [[B]](s64) - ; CHECK: [[SHL:%.*]](s64) = G_SHL [[SRC]], [[C]] - ; CHECK: [[SEXT:%.*]](s64) = G_ASHR [[SHL]], [[C]] - ; CHECK: G_GEP [[A]], [[SEXT]] - + ; CHECK-LABEL: name: test_gep_small + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 56 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY2]], [[C]] + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[C]] + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_GEP [[COPY]], [[ASHR]](s64) + ; CHECK: %x0 = COPY [[GEP]](p0) %0(p0) = COPY %x0 %1(s64) = COPY %x1 %2(s8) = G_TRUNC %1 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir b/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir index 43aa06ba3d903..b0de3fc8092a9 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-ignore-non-generic.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -14,10 +15,10 @@ registers: body: | bb.0: liveins: %x0 - ; CHECK-LABEL: name: test_copy - ; CHECK: %0(s64) = COPY %x0 - ; CHECK-NEXT: %x0 = COPY %0 + ; CHECK-LABEL: name: test_copy + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: %x0 = COPY [[COPY]](s64) %0(s64) = COPY %x0 %x0 = COPY %0 ... @@ -26,8 +27,8 @@ body: | name: test_targetspecific body: | bb.0: + ; CHECK-LABEL: name: test_targetspecific ; CHECK: RET_ReallyLR - RET_ReallyLR ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir b/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir index 917f181099ec1..7432b6761b73e 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir @@ -21,8 +21,8 @@ body: | ; forwarded to the G_STORE. Hi part is unchanged so (split) G_LOAD gets ; forwarded. ; CHECK-LABEL: name: test_inserts_1 - ; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD - ; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD + ; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD + ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD ; CHECK: G_STORE %0(s64) ; CHECK: G_STORE [[HI]] %0:_(s64) = COPY %x0 @@ -43,9 +43,9 @@ body: | ; Low insertion wipes out the old register entirely, so %0 gets forwarded ; to the G_STORE again. Second insertion is real. ; CHECK-LABEL: name: test_inserts_2 - ; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD - ; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD - ; CHECK: [[NEWHI:%[0-9]+]](s64) = G_INSERT [[HI]], %1(s32), 0 + ; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD + ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD + ; CHECK: [[NEWHI:%[0-9]+]]:_(s64) = G_INSERT [[HI]], %1(s32), 0 ; CHECK: G_STORE %0(s64) ; CHECK: G_STORE [[NEWHI]] %0:_(s64) = COPY %x0 @@ -68,9 +68,9 @@ body: | ; certainly better than the alternative of directly forwarding the value ; which would cause a nasty type mismatch. ; CHECK-LABEL: name: test_inserts_3 - ; CHECK: [[LO:%[0-9]+]](s64) = G_LOAD - ; CHECK: [[HI:%[0-9]+]](s64) = G_LOAD - ; CHECK: [[NEWLO:%[0-9]+]](s64) = G_PTRTOINT %0(p0) + ; CHECK: [[LO:%[0-9]+]]:_(s64) = G_LOAD + ; CHECK: [[HI:%[0-9]+]]:_(s64) = G_LOAD + ; CHECK: [[NEWLO:%[0-9]+]]:_(s64) = G_PTRTOINT %0(p0) ; CHECK: G_STORE [[NEWLO]](s64) ; CHECK: G_STORE [[HI]] %0:_(p0) = COPY %x0 @@ -90,11 +90,13 @@ body: | ; A narrow insert gets surrounded by a G_ANYEXT/G_TRUNC pair. ; CHECK-LABEL: name: test_inserts_4 - ; CHECK: [[VALEXT:%[0-9]+]](s32) = G_ANYEXT %1(s8) - ; CHECK: [[VAL:%[0-9]+]](s32) = G_INSERT [[VALEXT]], %0(s1), 0 - ; CHECK: %3(s8) = G_TRUNC [[VAL]](s32) - %0:_(s1) = COPY %w0 - %1:_(s8) = COPY %w1 + ; CHECK: [[VALEXT:%[0-9]+]]:_(s32) = COPY %2(s32) + ; CHECK: [[VAL:%[0-9]+]]:_(s32) = G_INSERT [[VALEXT]], %1(s1), 0 + ; CHECK: %5:_(s8) = G_TRUNC [[VAL]](s32) + %4:_(s32) = COPY %w0 + %0:_(s1) = G_TRUNC %4 + %5:_(s32) = COPY %w1 + %1:_(s8) = G_TRUNC %5 %2:_(p0) = COPY %x2 %3:_(s8) = G_INSERT %1(s8), %0(s1), 0 G_STORE %3(s8), %2(p0) :: (store 1) @@ -109,11 +111,11 @@ body: | ; CHECK-LABEL: name: test_inserts_5 - ; CHECK: [[INS_LO:%[0-9]+]](s32) = G_EXTRACT %2(s64), 0 - ; CHECK: [[VAL_LO:%[0-9]+]](s64) = G_INSERT %0, [[INS_LO]](s32), 32 - ; CHECK: [[INS_HI:%[0-9]+]](s32) = G_EXTRACT %2(s64), 32 - ; CHECK: [[VAL_HI:%[0-9]+]](s64) = G_INSERT %1, [[INS_HI]](s32), 0 - ; CHECK: %4(s128) = G_MERGE_VALUES [[VAL_LO]](s64), [[VAL_HI]](s64) + ; CHECK: [[INS_LO:%[0-9]+]]:_(s32) = G_EXTRACT %2(s64), 0 + ; CHECK: [[VAL_LO:%[0-9]+]]:_(s64) = G_INSERT %0, [[INS_LO]](s32), 32 + ; CHECK: [[INS_HI:%[0-9]+]]:_(s32) = G_EXTRACT %2(s64), 32 + ; CHECK: [[VAL_HI:%[0-9]+]]:_(s64) = G_INSERT %1, [[INS_HI]](s32), 0 + ; CHECK: %4:_(s128) = G_MERGE_VALUES [[VAL_LO]](s64), [[VAL_HI]](s64) %0:_(s64) = COPY %x0 %1:_(s64) = COPY %x1 %2:_(s64) = COPY %x2 @@ -130,8 +132,8 @@ body: | ; CHECK-LABEL: name: test_inserts_6 - ; CHECK: [[VAL_LO:%[0-9]+]](s64) = G_INSERT %0, %2(s32), 32 - ; CHECK: %4(s128) = G_MERGE_VALUES [[VAL_LO]](s64), %1(s64) + ; CHECK: [[VAL_LO:%[0-9]+]]:_(s64) = G_INSERT %0, %2(s32), 32 + ; CHECK: %4:_(s128) = G_MERGE_VALUES [[VAL_LO]](s64), %1(s64) %0:_(s64) = COPY %x0 %1:_(s64) = COPY %x1 %2:_(s32) = COPY %w2 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir b/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir index 8d782a92e92c6..4ab9bf30914c6 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -29,10 +30,10 @@ name: test_sitofp_s32_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_sitofp_s32_s32 - ; CHECK: %1(s32) = G_SITOFP %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s32) + %0:_(s32) = COPY %w0 %1:_(s32) = G_SITOFP %0 ... @@ -41,10 +42,10 @@ name: test_uitofp_s32_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_uitofp_s32_s32 - ; CHECK: %1(s32) = G_UITOFP %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s32) + %0:_(s32) = COPY %w0 %1:_(s32) = G_UITOFP %0 ... @@ -53,10 +54,10 @@ name: test_sitofp_s32_s64 body: | bb.0: liveins: %x0 - %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_sitofp_s32_s64 - ; CHECK: %1(s32) = G_SITOFP %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[COPY]](s64) + %0:_(s64) = COPY %x0 %1:_(s32) = G_SITOFP %0 ... @@ -65,10 +66,10 @@ name: test_uitofp_s32_s64 body: | bb.0: liveins: %x0 - %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_uitofp_s32_s64 - ; CHECK: %1(s32) = G_UITOFP %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY]](s64) + %0:_(s64) = COPY %x0 %1:_(s32) = G_UITOFP %0 ... @@ -77,10 +78,10 @@ name: test_sitofp_s64_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_sitofp_s64_s32 - ; CHECK: %1(s64) = G_SITOFP %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY]](s32) + %0:_(s32) = COPY %w0 %1:_(s64) = G_SITOFP %0 ... @@ -89,10 +90,10 @@ name: test_uitofp_s64_s32 body: | bb.0: liveins: %w0 - %0:_(s32) = COPY %w0 - ; CHECK-LABEL: name: test_uitofp_s64_s32 - ; CHECK: %1(s64) = G_UITOFP %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY]](s32) + %0:_(s32) = COPY %w0 %1:_(s64) = G_UITOFP %0 ... @@ -101,10 +102,10 @@ name: test_sitofp_s64_s64 body: | bb.0: liveins: %x0 - %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_sitofp_s64_s64 - ; CHECK: %1(s64) = G_SITOFP %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[COPY]](s64) + %0:_(s64) = COPY %x0 %1:_(s64) = G_SITOFP %0 ... @@ -113,10 +114,10 @@ name: test_uitofp_s64_s64 body: | bb.0: liveins: %x0 - %0:_(s64) = COPY %x0 - ; CHECK-LABEL: name: test_uitofp_s64_s64 - ; CHECK: %1(s64) = G_UITOFP %0 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[COPY]](s64) + %0:_(s64) = COPY %x0 %1:_(s64) = G_UITOFP %0 ... @@ -126,15 +127,15 @@ name: test_sitofp_s32_s1 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: test_sitofp_s32_s1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]] + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]] + ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) %0:_(s32) = COPY %w0 %1:_(s1) = G_TRUNC %0 - - ; CHECK-LABEL: name: test_sitofp_s32_s1 - ; CHECK: [[C1:%.*]](s32) = G_CONSTANT i32 31 - ; CHECK: [[SRC:%.*]](s32) = COPY %0(s32) - ; CHECK: [[SHL1:%.*]](s32) = G_SHL [[SRC]], [[C1]] - ; CHECK: [[SEXT:%.*]](s32) = G_ASHR [[SHL1]], [[C1]] - ; CHECK: %2(s32) = G_SITOFP [[SEXT]] %2:_(s32) = G_SITOFP %1 ... @@ -143,14 +144,14 @@ name: test_uitofp_s32_s1 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: test_uitofp_s32_s1 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) %0:_(s32) = COPY %w0 %1:_(s1) = G_TRUNC %0 - - ; CHECK-LABEL: name: test_uitofp_s32_s1 - ; CHECK: [[C:%.*]](s32) = G_CONSTANT i32 1 - ; CHECK: [[SRC:%.*]](s32) = COPY %0(s32) - ; CHECK: [[ZEXT:%.*]](s32) = G_AND [[SRC]], [[C]] - ; CHECK: [[RES:%.*]](s32) = G_UITOFP [[ZEXT]] %2:_(s32) = G_UITOFP %1 ... @@ -159,15 +160,15 @@ name: test_sitofp_s64_s8 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: test_sitofp_s64_s8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]] + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]] + ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[ASHR]](s32) %0:_(s32) = COPY %w0 %1:_(s8) = G_TRUNC %0 - - ; CHECK-LABEL: name: test_sitofp_s64_s8 - ; CHECK: [[C1:%.*]](s32) = G_CONSTANT i32 24 - ; CHECK: [[SRC:%.*]](s32) = COPY %0(s32) - ; CHECK: [[SHL1:%.*]](s32) = G_SHL [[SRC]], [[C1]] - ; CHECK: [[SEXT:%.*]](s32) = G_ASHR [[SHL1]], [[C1]] - ; CHECK: %2(s64) = G_SITOFP [[SEXT]] %2:_(s64) = G_SITOFP %1 ... @@ -176,14 +177,14 @@ name: test_uitofp_s64_s8 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: test_uitofp_s64_s8 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[AND]](s32) %0:_(s32) = COPY %w0 %1:_(s8) = G_TRUNC %0 - - ; CHECK-LABEL: name: test_uitofp_s64_s8 - ; CHECK: [[C:%.*]](s32) = G_CONSTANT i32 255 - ; CHECK: [[SRC:%.*]](s32) = COPY %0(s32) - ; CHECK: [[ZEXT:%.*]](s32) = G_AND [[SRC]], [[C]] - ; CHECK: %2(s64) = G_UITOFP [[ZEXT]] %2:_(s64) = G_UITOFP %1 ... @@ -192,15 +193,15 @@ name: test_sitofp_s32_s16 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: test_sitofp_s32_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]] + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[C]] + ; CHECK: [[SITOFP:%[0-9]+]]:_(s32) = G_SITOFP [[ASHR]](s32) %0:_(s32) = COPY %w0 %1:_(s16) = G_TRUNC %0 - - ; CHECK-LABEL: name: test_sitofp_s32_s16 - ; CHECK: [[C1:%.*]](s32) = G_CONSTANT i32 16 - ; CHECK: [[SRC:%.*]](s32) = COPY %0(s32) - ; CHECK: [[SHL1:%.*]](s32) = G_SHL [[SRC]], [[C1]] - ; CHECK: [[SEXT:%.*]](s32) = G_ASHR [[SHL1]], [[C1]] - ; CHECK: %2(s32) = G_SITOFP [[SEXT]] %2:_(s32) = G_SITOFP %1 ... @@ -209,13 +210,13 @@ name: test_uitofp_s32_s16 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: test_uitofp_s32_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND]](s32) %0:_(s32) = COPY %w0 %1:_(s16) = G_TRUNC %0 - - ; CHECK-LABEL: name: test_uitofp_s32_s16 - ; CHECK: [[C:%.*]](s32) = G_CONSTANT i32 65535 - ; CHECK: [[SRC:%.*]](s32) = COPY %0(s32) - ; CHECK: [[ZEXT:%.*]](s32) = G_AND [[SRC]], [[C]] - ; CHECK: [[RES:%.*]](s32) = G_UITOFP [[ZEXT]] %2:_(s32) = G_UITOFP %1 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir index 0149043f9e5ce..cda82fb46e7ad 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -31,33 +31,33 @@ body: | ; CHECK-LABEL: name: test_load %0(p0) = COPY %x0 - ; CHECK: [[BIT8:%[0-9]+]](s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr) - ; CHECK: %1(s1) = G_TRUNC [[BIT8]] + ; CHECK: [[BIT8:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr) + ; CHECK: %1:_(s1) = G_TRUNC [[BIT8]] %1(s1) = G_LOAD %0 :: (load 1 from %ir.addr) - ; CHECK: %2(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr) + ; CHECK: %2:_(s8) = G_LOAD %0(p0) :: (load 1 from %ir.addr) %2(s8) = G_LOAD %0 :: (load 1 from %ir.addr) - ; CHECK: %3(s16) = G_LOAD %0(p0) :: (load 2 from %ir.addr) + ; CHECK: %3:_(s16) = G_LOAD %0(p0) :: (load 2 from %ir.addr) %3(s16) = G_LOAD %0 :: (load 2 from %ir.addr) - ; CHECK: %4(s32) = G_LOAD %0(p0) :: (load 4 from %ir.addr) + ; CHECK: %4:_(s32) = G_LOAD %0(p0) :: (load 4 from %ir.addr) %4(s32) = G_LOAD %0 :: (load 4 from %ir.addr) - ; CHECK: %5(s64) = G_LOAD %0(p0) :: (load 8 from %ir.addr) + ; CHECK: %5:_(s64) = G_LOAD %0(p0) :: (load 8 from %ir.addr) %5(s64) = G_LOAD %0 :: (load 8 from %ir.addr) - ; CHECK: %6(p0) = G_LOAD %0(p0) :: (load 8 from %ir.addr) + ; CHECK: %6:_(p0) = G_LOAD %0(p0) :: (load 8 from %ir.addr) %6(p0) = G_LOAD %0(p0) :: (load 8 from %ir.addr) - ; CHECK: %7(<2 x s32>) = G_LOAD %0(p0) :: (load 8 from %ir.addr) + ; CHECK: %7:_(<2 x s32>) = G_LOAD %0(p0) :: (load 8 from %ir.addr) %7(<2 x s32>) = G_LOAD %0(p0) :: (load 8 from %ir.addr) - ; CHECK: [[LOAD0:%[0-9]+]](s64) = G_LOAD %0(p0) :: (load 16 from %ir.addr) - ; CHECK: [[OFFSET1:%[0-9]+]](s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP1:%[0-9]+]](p0) = G_GEP %0, [[OFFSET1]](s64) - ; CHECK: [[LOAD1:%[0-9]+]](s64) = G_LOAD [[GEP1]](p0) :: (load 16 from %ir.addr) - ; CHECK: %8(s128) = G_MERGE_VALUES [[LOAD0]](s64), [[LOAD1]](s64) + ; CHECK: [[LOAD0:%[0-9]+]]:_(s64) = G_LOAD %0(p0) :: (load 16 from %ir.addr) + ; CHECK: [[OFFSET1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[OFFSET1]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[GEP1]](p0) :: (load 16 from %ir.addr) + ; CHECK: %8:_(s128) = G_MERGE_VALUES [[LOAD0]](s64), [[LOAD1]](s64) %8(s128) = G_LOAD %0(p0) :: (load 16 from %ir.addr) ... @@ -80,11 +80,11 @@ body: | %0(p0) = COPY %x0 %1(s32) = COPY %w1 - ; CHECK: [[C1:%.*]](s32) = G_CONSTANT i32 1 - ; CHECK: [[B:%.*]](s32) = COPY %1(s32) - ; CHECK: [[COPY_C1:%.*]](s32) = COPY [[C1]] - ; CHECK: [[AND:%.*]](s32) = G_AND [[B]], [[COPY_C1]] - ; CHECK: [[BIT8:%.*]](s8) = G_TRUNC [[AND]] + ; CHECK: [[C1:%.*]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[B:%.*]]:_(s32) = COPY %1(s32) + ; CHECK: [[COPY_C1:%.*]]:_(s32) = COPY [[C1]] + ; CHECK: [[AND:%.*]]:_(s32) = G_AND [[B]], [[COPY_C1]] + ; CHECK: [[BIT8:%.*]]:_(s8) = G_TRUNC [[AND]] ; CHECK: G_STORE [[BIT8]](s8), %0(p0) :: (store 1 into %ir.addr) @@ -110,8 +110,8 @@ body: | G_STORE %0(p0), %0(p0) :: (store 8 into %ir.addr) ; CHECK: G_STORE %5(s64), %0(p0) :: (store 16 into %ir.addr) - ; CHECK: [[OFFSET1:%[0-9]+]](s64) = G_CONSTANT i64 8 - ; CHECK: [[GEP1:%[0-9]+]](p0) = G_GEP %0, [[OFFSET1]](s64) + ; CHECK: [[OFFSET1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_GEP %0, [[OFFSET1]](s64) ; CHECK: G_STORE %6(s64), [[GEP1]](p0) :: (store 16 into %ir.addr) %6(s64) = G_PTRTOINT %0(p0) %7(s128) = G_MERGE_VALUES %5, %6 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir b/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir index 47f0e1fc33cd2..bbc559eb0e1ca 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-mul.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -22,18 +23,22 @@ registers: body: | bb.0.entry: liveins: %x0, %x1, %x2, %x3 - ; CHECK-LABEL: name: test_scalar_mul_small - ; CHECK: [[OP0:%.*]](s32) = G_TRUNC %0 - ; CHECK: [[OP1:%.*]](s32) = G_TRUNC %1 - ; CHECK: [[RES32:%.*]](s32) = G_MUL [[OP0]], [[OP1]] - ; CHECK: [[RES:%.*]](s8) = G_TRUNC [[RES32]](s32) + ; CHECK-LABEL: name: test_scalar_mul_small + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[TRUNC]], [[TRUNC1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[MUL]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s8) + ; CHECK: %x0 = COPY [[ANYEXT]](s64) %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s8) = G_MUL %2, %3 - %5(s64) = G_ANYEXT %2 + %5(s64) = G_ANYEXT %4 %x0 = COPY %5 ... @@ -44,15 +49,16 @@ body: | bb.0: liveins: %x0, %x1, %w2, %w3 + ; CHECK-LABEL: name: test_mul_overflow + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[COPY]], [[COPY1]] + ; CHECK: [[SMULH:%[0-9]+]]:_(s64) = G_SMULH [[COPY]], [[COPY1]] + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SMULH]](s64), [[C]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) %0:_(s64) = COPY %x0 %1:_(s64) = COPY %x1 - - ; CHECK-LABEL: name: test_mul_overflow - ; CHECK: %2(s64) = G_MUL %0, %1 - ; CHECK: [[HI:%[0-9]+]](s64) = G_SMULH %0, %1 - ; CHECK: [[ZERO:%[0-9]+]](s64) = G_CONSTANT i64 0 - ; CHECK: [[CMP:%[0-9]+]](s32) = G_ICMP intpred(ne), [[HI]](s64), [[ZERO]] - ; CHECK: [[TRUNC:%[0-9]+]](s1) = G_TRUNC [[CMP]] %2:_(s64), %3:_(s1) = G_SMULO %0, %1 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir b/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir index 9928ea54d2c98..b0c7d1324bf61 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-nonpowerof2eltsvec.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -18,10 +19,10 @@ body: | bb.0: liveins: %w0, %w1, %w2 ; CHECK-LABEL: name: test_legalize_merge_v3s32 - ; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %w0 - ; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %w1 - ; CHECK: [[ARG3:%[0-9]+]](s32) = COPY %w2 - ; CHECK: (<3 x s32>) = G_MERGE_VALUES [[ARG1]](s32), [[ARG2]](s32), [[ARG3]](s32) + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %w2 + ; CHECK: [[MV:%[0-9]+]]:_(<3 x s32>) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = COPY %w2 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-or.mir b/test/CodeGen/AArch64/GlobalISel/legalize-or.mir index 4d1a88871c95e..9536e8add2be2 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-or.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-or.mir @@ -1,13 +1,5 @@ -# RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s - ---- | - target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" - target triple = "aarch64--" - define void @test_scalar_or_small() { - entry: - ret void - } -... +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- name: test_scalar_or_small @@ -19,19 +11,65 @@ registers: - { id: 4, class: _ } - { id: 5, class: _ } body: | - bb.0.entry: + bb.0: liveins: %x0, %x1, %x2, %x3 - ; CHECK-LABEL: name: test_scalar_or_small - ; CHECK: [[OP0:%.*]](s32) = G_TRUNC %0 - ; CHECK: [[OP1:%.*]](s32) = G_TRUNC %1 - ; CHECK: [[RES32:%.*]](s32) = G_OR [[OP0]], [[OP1]] - ; CHECK: [[RES:%.*]](s8) = G_TRUNC [[RES32]](s32) + ; CHECK-LABEL: name: test_scalar_or_small + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s8) + ; CHECK: %x0 = COPY [[ANYEXT]](s64) %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s8) = G_OR %2, %3 - %5(s64) = G_ANYEXT %2 + %5(s64) = G_ANYEXT %4 %x0 = COPY %5 ... + +--- +name: test_big_scalar_power_of_2 +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: _ } +body: | + bb.0: + liveins: %x0, %x1, %x2, %x3 + ; We have a temporary G_MERGE_VALUES in the legalizer that gets + ; cleaned up with the G_UNMERGE_VALUES, so we end up directly + ; copying the results of the G_OR ops. + + ; CHECK-LABEL: name: test_big_scalar_power_of_2 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY %x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY %x3 + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY2]] + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY1]], [[COPY3]] + ; CHECK: %x0 = COPY [[OR]](s64) + ; CHECK: %x1 = COPY [[OR1]](s64) + ; CHECK: RET_ReallyLR implicit %x0, implicit %x1 + %0(s64) = COPY %x0 + %1(s64) = COPY %x1 + %2(s64) = COPY %x2 + %3(s64) = COPY %x3 + %4(s128) = G_MERGE_VALUES %0, %1 + %5(s128) = G_MERGE_VALUES %2, %3 + %6(s128) = G_OR %4, %5 + %7(s64), %8(s64) = G_UNMERGE_VALUES %6 + %x0 = COPY %7 + %x1 = COPY %8 + RET_ReallyLR implicit %x0, implicit %x1 +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir b/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir index 7821db4e823be..68a8e6d95378b 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-phi.mir @@ -4,12 +4,17 @@ source_filename = "/tmp/test.ll" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-unknown" - + define i32 @legalize_phi(i32 %argc) { entry: ret i32 0 } + define i64* @legalize_phi_ptr(i64* %a, i64* %b, i1 %cond) { + entry: + ret i64* null + } + define i32 @legalize_phi_empty(i32 %argc) { entry: ret i32 0 @@ -43,7 +48,7 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: _, preferred-register: '' } - { id: 1, class: _, preferred-register: '' } - { id: 2, class: _, preferred-register: '' } @@ -55,26 +60,26 @@ registers: - { id: 8, class: _, preferred-register: '' } - { id: 9, class: _, preferred-register: '' } - { id: 10, class: _, preferred-register: '' } -liveins: +liveins: body: | bb.0: ; Test that we insert legalization artifacts(Truncs here) into the correct BBs ; while legalizing the G_PHI to s16. ; CHECK-LABEL: name: legalize_phi ; CHECK-LABEL: bb.1: - ; CHECK: [[ADD_BB1:%.*]](s32) = G_ADD - ; CHECK: [[RES_BB1:%.*]](s16) = G_TRUNC [[ADD_BB1]] + ; CHECK: [[ADD_BB1:%.*]]:_(s32) = G_ADD + ; CHECK: [[RES_BB1:%.*]]:_(s16) = G_TRUNC [[ADD_BB1]] ; CHECK-LABEL: bb.2: - ; CHECK: [[ADD_BB2:%.*]](s32) = G_ADD - ; CHECK: [[RES_BB2:%.*]](s16) = G_TRUNC [[ADD_BB2]] + ; CHECK: [[ADD_BB2:%.*]]:_(s32) = G_ADD + ; CHECK: [[RES_BB2:%.*]]:_(s16) = G_TRUNC [[ADD_BB2]] ; CHECK-LABEL: bb.3: - ; CHECK: [[RES_PHI:%.*]](s16) = G_PHI [[RES_BB1]](s16), %bb.1, [[RES_BB2]](s16), %bb.2 - ; CHECK: [[RES:%.*]](s1) = G_TRUNC [[RES_PHI]] + ; CHECK: [[RES_PHI:%.*]]:_(s16) = G_PHI [[RES_BB1]](s16), %bb.1, [[RES_BB2]](s16), %bb.2 + ; CHECK: [[RES:%.*]]:_(s1) = G_TRUNC [[RES_PHI]] successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: %w0 - + %0(s32) = COPY %w0 %1(s32) = G_CONSTANT i32 0 %3(s32) = G_CONSTANT i32 1 @@ -82,26 +87,74 @@ body: | %2(s1) = G_ICMP intpred(ugt), %0(s32), %1 G_BRCOND %2(s1), %bb.1 G_BR %bb.2 - + bb.1: successors: %bb.3(0x80000000) - + %4(s32) = G_ADD %0, %3 %5(s1) = G_TRUNC %4(s32) G_BR %bb.3 - + bb.2: successors: %bb.3(0x80000000) - + %7(s32) = G_ADD %0, %6 %8(s1) = G_TRUNC %7(s32) - + bb.3: %9(s1) = G_PHI %5(s1), %bb.1, %8(s1), %bb.2 %10(s32) = G_ZEXT %9(s1) %w0 = COPY %10(s32) RET_ReallyLR implicit %w0 +... +--- +name: legalize_phi_ptr +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: _, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } +liveins: +body: | + bb.1: + ; CHECK-LABEL: name: legalize_phi_ptr + ; CHECK-LABEL: bb.0: + ; CHECK: [[A:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[B:%[0-9]+]]:_(p0) = COPY %x1 + ; CHECK: [[CE:%[0-9]+]]:_(s32) = COPY %w2 + ; CHECK: [[C:%[0-9]+]]:_(s1) = G_TRUNC [[CE]] + + ; CHECK-LABEL: bb.1: + ; CHECK-LABEL: bb.2: + ; CHECK: %3:_(p0) = G_PHI [[A]](p0), %bb.0, [[B]](p0), %bb.1 + ; CHECK: %x0 = COPY %3(p0) + successors: %bb.2, %bb.3 + liveins: %w2, %x0, %x1 + + %0(p0) = COPY %x0 + %1(p0) = COPY %x1 + %4(s32) = COPY %w2 + %2(s1) = G_TRUNC %4(s32) + G_BRCOND %2(s1), %bb.2 + G_BR %bb.3 + + bb.2: + successors: %bb.3 + + bb.3: + %3(p0) = G_PHI %0(p0), %bb.1, %1(p0), %bb.2 + %x0 = COPY %3(p0) + RET_ReallyLR implicit %x0 + ... --- name: legalize_phi_empty @@ -131,17 +184,17 @@ body: | ; Test that we properly legalize a phi with a predecessor that's empty ; CHECK-LABEL: name: legalize_phi_empty ; CHECK-LABEL: bb.0: - ; CHECK: [[ENTRY_ADD:%.*]](s32) = G_ADD + ; CHECK: [[ENTRY_ADD:%.*]]:_(s32) = G_ADD ; CHECK-LABEL: bb.1: - ; CHECK: [[ADD_BB1:%.*]](s32) = G_ADD - ; CHECK: [[RES_BB1:%.*]](s16) = G_TRUNC [[ADD_BB1]] + ; CHECK: [[ADD_BB1:%.*]]:_(s32) = G_ADD + ; CHECK: [[RES_BB1:%.*]]:_(s16) = G_TRUNC [[ADD_BB1]] ; CHECK-LABEL: bb.2: - ; CHECK: [[RES_BB2:%.*]](s16) = G_TRUNC [[ENTRY_ADD]] + ; CHECK: [[RES_BB2:%.*]]:_(s16) = G_TRUNC [[ENTRY_ADD]] - ; CHECK: [[RES_PHI:%.*]](s16) = G_PHI [[RES_BB1]](s16), %bb.1, [[RES_BB2]](s16), %bb.2 - ; CHECK: [[RES:%.*]](s1) = G_TRUNC [[RES_PHI]] + ; CHECK: [[RES_PHI:%.*]]:_(s16) = G_PHI [[RES_BB1]](s16), %bb.1, [[RES_BB2]](s16), %bb.2 + ; CHECK: [[RES:%.*]]:_(s1) = G_TRUNC [[RES_PHI]] %0(s32) = COPY %w0 %1(s32) = G_CONSTANT i32 0 @@ -196,13 +249,13 @@ body: | ; Test that we properly legalize a phi that uses a value from the same BB ; CHECK-LABEL: name: legalize_phi_loop ; CHECK-LABEL: bb.0: - ; CHECK: [[C0:%.*]](s32) = G_CONSTANT i32 0 - ; CHECK: [[RES_BB1:%.*]](s16) = G_TRUNC [[C0]] + ; CHECK: [[C0:%.*]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[RES_BB1:%.*]]:_(s16) = G_TRUNC [[C0]] ; CHECK-LABEL: bb.1: - ; CHECK: [[RES_PHI:%.*]](s16) = G_PHI [[RES_BB1]](s16), %bb.0, [[RES_BB2:%.*]](s16), %bb.1 + ; CHECK: [[RES_PHI:%.*]]:_(s16) = G_PHI [[RES_BB1]](s16), %bb.0, [[RES_BB2:%.*]](s16), %bb.1 ; CHECK-NEXT: G_ANYEXT [[RES_PHI]] - ; CHECK: [[RES_BB2]](s16) = G_ANYEXT + ; CHECK: [[RES_BB2]]:_(s16) = G_ANYEXT %0(s32) = COPY %w0 %2(s8) = G_CONSTANT i8 1 %7(s8) = G_CONSTANT i8 0 @@ -244,13 +297,13 @@ body: | ; Test that we properly legalize a phi that uses itself ; CHECK-LABEL: name: legalize_phi_cycle ; CHECK-LABEL: bb.0: - ; CHECK: [[C0:%.*]](s32) = G_CONSTANT i32 0 - ; CHECK: [[RES_BB1:%.*]](s16) = G_TRUNC [[C0]] + ; CHECK: [[C0:%.*]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[RES_BB1:%.*]]:_(s16) = G_TRUNC [[C0]] ; CHECK-LABEL: bb.1: - ; CHECK: [[RES_PHI:%.*]](s16) = G_PHI [[RES_BB1]](s16), %bb.0, [[RES_BB2:%.*]](s16), %bb.1 + ; CHECK: [[RES_PHI:%.*]]:_(s16) = G_PHI [[RES_BB1]](s16), %bb.0, [[RES_BB2:%.*]](s16), %bb.1 ; CHECK-NEXT: G_TRUNC - ; CHECK: [[RES_BB2]](s16) = COPY + ; CHECK: [[RES_BB2]]:_(s16) = COPY %0(s32) = COPY %w0 %4(s8) = G_CONSTANT i8 0 @@ -301,21 +354,21 @@ body: | ; correct location (ie make sure G_PHIs are the first insts in the BB). ; CHECK-LABEL: name: legalize_phi_same_bb ; CHECK-LABEL: bb.0: - ; CHECK: [[C42:%.*]](s32) = G_CONSTANT i32 42 - ; CHECK: [[ENTRY_ADD:%.*]](s32) = G_ADD + ; CHECK: [[C42:%.*]]:_(s32) = G_CONSTANT i32 42 + ; CHECK: [[ENTRY_ADD:%.*]]:_(s32) = G_ADD ; CHECK-LABEL: bb.1: - ; CHECK: [[BB1_ADD:%.*]](s32) = G_ADD - ; CHECK: [[RES1_BB1:%.*]](s16) = G_TRUNC [[BB1_ADD]] - ; CHECK: [[RES2_BB1:%.*]](s16) = G_TRUNC [[BB1_ADD]] + ; CHECK: [[BB1_ADD:%.*]]:_(s32) = G_ADD + ; CHECK: [[RES1_BB1:%.*]]:_(s16) = G_TRUNC [[BB1_ADD]] + ; CHECK: [[RES2_BB1:%.*]]:_(s16) = G_TRUNC [[BB1_ADD]] ; CHECK-LABEL: bb.2: - ; CHECK: [[RES1_BB2:%.*]](s16) = G_TRUNC [[ENTRY_ADD]] - ; CHECK: [[RES2_BB2:%.*]](s16) = G_TRUNC [[C42]] + ; CHECK: [[RES1_BB2:%.*]]:_(s16) = G_TRUNC [[ENTRY_ADD]] + ; CHECK: [[RES2_BB2:%.*]]:_(s16) = G_TRUNC [[C42]] ; CHECK-LABEL: bb.3: - ; CHECK: [[RES1_PHI:%.*]](s16) = G_PHI [[RES1_BB1]](s16), %bb.1, [[RES1_BB2]](s16), %bb.2 - ; CHECK-NEXT: [[RES_PHI:%.*]](s16) = G_PHI [[RES2_BB1]](s16), %bb.1, [[RES2_BB2]](s16), %bb.2 + ; CHECK: [[RES1_PHI:%.*]]:_(s16) = G_PHI [[RES1_BB1]](s16), %bb.1, [[RES1_BB2]](s16), %bb.2 + ; CHECK-NEXT: [[RES_PHI:%.*]]:_(s16) = G_PHI [[RES2_BB1]](s16), %bb.1, [[RES2_BB2]](s16), %bb.2 ; CHECK-NEXT: G_TRUNC ; CHECK-NEXT: G_TRUNC @@ -385,19 +438,19 @@ body: | ; in different BBs. ; CHECK-LABEL: name: legalize_phi_diff_bb ; CHECK-LABEL: bb.0: - ; CHECK: [[C44:%.*]](s32) = G_CONSTANT i32 44 - ; CHECK: [[C43:%.*]](s32) = G_CONSTANT i32 43 - ; CHECK: [[ENTRY_ADD:%.*]](s32) = G_ADD - ; CHECK: [[RES_ENTRY:%.*]](s16) = G_TRUNC [[ENTRY_ADD]] - ; CHECK: [[RES_ENTRY1:%.*]](s16) = G_TRUNC [[ENTRY_ADD]] + ; CHECK: [[C44:%.*]]:_(s32) = G_CONSTANT i32 44 + ; CHECK: [[C43:%.*]]:_(s32) = G_CONSTANT i32 43 + ; CHECK: [[ENTRY_ADD:%.*]]:_(s32) = G_ADD + ; CHECK: [[RES_ENTRY:%.*]]:_(s16) = G_TRUNC [[ENTRY_ADD]] + ; CHECK: [[RES_ENTRY1:%.*]]:_(s16) = G_TRUNC [[ENTRY_ADD]] ; CHECK-LABEL: bb.1: - ; CHECK: [[RES1_PHI:%.*]](s16) = G_PHI [[RES_ENTRY]](s16), %bb.0, [[RES_BB1:%.*]](s16), %bb.1 - ; CHECK: [[RES_BB1:%.*]](s16) = G_TRUNC - ; CHECK: [[RES_FOR_BB2:%.*]](s16) = COPY [[RES1_PHI]] + ; CHECK: [[RES1_PHI:%.*]]:_(s16) = G_PHI [[RES_ENTRY]](s16), %bb.0, [[RES_BB1:%.*]](s16), %bb.1 + ; CHECK: [[RES_BB1:%.*]]:_(s16) = G_TRUNC + ; CHECK: [[RES_FOR_BB2:%.*]]:_(s16) = COPY [[RES1_PHI]] ; CHECK-LABEL: bb.2: - ; CHECK: [[RES2_PHI:%.*]](s16) = G_PHI [[RES_FOR_BB2]](s16), %bb.1, [[RES_ENTRY1:%.*]](s16), %bb.0 + ; CHECK: [[RES2_PHI:%.*]]:_(s16) = G_PHI [[RES_FOR_BB2]](s16), %bb.1, [[RES_ENTRY1:%.*]](s16), %bb.0 ; CHECK-NEXT: G_TRUNC %0(s32) = COPY %w0 @@ -430,4 +483,3 @@ body: | RET_ReallyLR implicit %w0 ... - diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir b/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir index 2becc2e134b50..b3bfddccc56c1 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-pow.mir @@ -26,13 +26,13 @@ body: | ; CHECK: %d0 = COPY %0 ; CHECK: %d1 = COPY %1 ; CHECK: BL $pow, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit-def %d0 - ; CHECK: %4(s64) = COPY %d0 + ; CHECK: %4:_(s64) = COPY %d0 %4:_(s64) = G_FPOW %0, %1 ; CHECK: %s0 = COPY %2 ; CHECK: %s1 = COPY %3 ; CHECK: BL $powf, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %s1, implicit-def %s0 - ; CHECK: %5(s32) = COPY %s0 + ; CHECK: %5:_(s32) = COPY %s0 %5:_(s32) = G_FPOW %2, %3 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir b/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir index ebc1cc270c528..a2bfa81d1b3c6 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-rem.mir @@ -32,9 +32,9 @@ body: | liveins: %x0, %x1, %x2, %x3 ; CHECK-LABEL: name: test_urem_64 - ; CHECK: [[QUOT:%[0-9]+]](s64) = G_UDIV %0, %1 - ; CHECK: [[PROD:%[0-9]+]](s64) = G_MUL [[QUOT]], %1 - ; CHECK: [[RES:%[0-9]+]](s64) = G_SUB %0, [[PROD]] + ; CHECK: [[QUOT:%[0-9]+]]:_(s64) = G_UDIV %0, %1 + ; CHECK: [[PROD:%[0-9]+]]:_(s64) = G_MUL [[QUOT]], %1 + ; CHECK: [[RES:%[0-9]+]]:_(s64) = G_SUB %0, [[PROD]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_UREM %0, %1 @@ -53,11 +53,11 @@ body: | bb.0.entry: liveins: %x0, %x1, %x2, %x3 ; CHECK-LABEL: name: test_srem_32 - ; CHECK: [[T1:%.*]](s32) = G_TRUNC %0(s64) - ; CHECK: [[T2:%.*]](s32) = G_TRUNC %1(s64) - ; CHECK: [[DIV:%.*]](s32) = G_SDIV [[T1]], [[T2]] - ; CHECK: [[MUL:%.*]](s32) = G_MUL [[DIV]], [[T2]] - ; CHECK: [[RES:%.*]](s32) = G_SUB [[T1]], [[MUL]] + ; CHECK: [[T1:%.*]]:_(s32) = G_TRUNC %0(s64) + ; CHECK: [[T2:%.*]]:_(s32) = G_TRUNC %1(s64) + ; CHECK: [[DIV:%.*]]:_(s32) = G_SDIV [[T1]], [[T2]] + ; CHECK: [[MUL:%.*]]:_(s32) = G_MUL [[DIV]], [[T2]] + ; CHECK: [[RES:%.*]]:_(s32) = G_SUB [[T1]], [[MUL]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 @@ -79,22 +79,22 @@ body: | liveins: %x0, %x1, %x2, %x3 ; CHECK-LABEL: name: test_srem_8 - ; CHECK: [[C1:%.*]](s32) = G_CONSTANT i32 24 - ; CHECK: [[SRC1:%.*]](s32) = G_TRUNC %0(s64) - ; CHECK: [[SHL1:%.*]](s32) = G_SHL [[SRC1]], [[C1]] - ; CHECK: [[LHS_SEXT:%.*]](s32) = G_ASHR [[SHL1]], [[C1]] - ; CHECK: [[C2:%.*]](s32) = G_CONSTANT i32 24 - ; CHECK: [[SRC2:%.*]](s32) = G_TRUNC %1(s64) - ; CHECK: [[SHL2:%.*]](s32) = G_SHL [[SRC2]], [[C2]] - ; CHECK: [[RHS_SEXT:%.*]](s32) = G_ASHR [[SHL2]], [[C2]] - ; CHECK: [[SDIV:%.*]](s32) = G_SDIV [[LHS_SEXT]], [[RHS_SEXT]] - ; CHECK: [[A:%.*]](s32) = COPY [[SDIV]] - ; CHECK: [[SRC3:%.*]](s32) = G_TRUNC %1(s64) - ; CHECK: [[MUL:%.*]](s32) = G_MUL [[A]], [[SRC3]] - ; CHECK: [[SRC4:%.*]](s32) = G_TRUNC %0(s64) - ; CHECK: [[SRC5:%.*]](s32) = COPY [[MUL]] - ; CHECK: [[SUB:%.*]](s32) = G_SUB [[SRC4]], [[SRC5]] - ; CHECK: [[RES:%.*]](s8) = G_TRUNC [[SUB]] + ; CHECK: [[C1:%.*]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SRC1:%.*]]:_(s32) = G_TRUNC %0(s64) + ; CHECK: [[SHL1:%.*]]:_(s32) = G_SHL [[SRC1]], [[C1]] + ; CHECK: [[LHS_SEXT:%.*]]:_(s32) = G_ASHR [[SHL1]], [[C1]] + ; CHECK: [[C2:%.*]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SRC2:%.*]]:_(s32) = G_TRUNC %1(s64) + ; CHECK: [[SHL2:%.*]]:_(s32) = G_SHL [[SRC2]], [[C2]] + ; CHECK: [[RHS_SEXT:%.*]]:_(s32) = G_ASHR [[SHL2]], [[C2]] + ; CHECK: [[SDIV:%.*]]:_(s32) = G_SDIV [[LHS_SEXT]], [[RHS_SEXT]] + ; CHECK: [[A:%.*]]:_(s32) = COPY [[SDIV]] + ; CHECK: [[SRC3:%.*]]:_(s32) = G_TRUNC %1(s64) + ; CHECK: [[MUL:%.*]]:_(s32) = G_MUL [[A]], [[SRC3]] + ; CHECK: [[SRC4:%.*]]:_(s32) = G_TRUNC %0(s64) + ; CHECK: [[SRC5:%.*]]:_(s32) = COPY [[MUL]] + ; CHECK: [[SUB:%.*]]:_(s32) = G_SUB [[SRC4]], [[SRC5]] + ; CHECK: [[RES:%.*]]:_(s8) = G_TRUNC [[SUB]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 @@ -119,7 +119,7 @@ body: | ; CHECK: %d0 = COPY %0 ; CHECK: %d1 = COPY %1 ; CHECK: BL $fmod, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit-def %d0 - ; CHECK: [[RES:%.*]](s64) = COPY %d0 + ; CHECK: [[RES:%.*]]:_(s64) = COPY %d0 %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_FREM %0, %1 @@ -127,7 +127,7 @@ body: | ; CHECK: %s0 = COPY %3 ; CHECK: %s1 = COPY %4 ; CHECK: BL $fmodf, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %s1, implicit-def %s0 - ; CHECK: [[RES:%.*]](s32) = COPY %s0 + ; CHECK: [[RES:%.*]]:_(s32) = COPY %s0 %3(s32) = G_TRUNC %0 %4(s32) = G_TRUNC %1 %5(s32) = G_FREM %3, %4 diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir b/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir index b15983471e618..7f8f10b2b27ec 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-shift.mir @@ -27,32 +27,32 @@ body: | %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 - ; CHECK: [[C1:%.*]](s32) = G_CONSTANT i32 24 - ; CHECK: [[SRC:%.*]](s32) = G_TRUNC %0(s64) - ; CHECK: [[SHL1:%.*]](s32) = G_SHL [[SRC]], [[C1]] - ; CHECK: [[SEXT1:%.*]](s32) = G_ASHR [[SHL1]], [[C1]] - ; CHECK: [[C2:%.*]](s32) = G_CONSTANT i32 24 - ; CHECK: [[SRC2:%.*]](s32) = G_TRUNC %1(s64) - ; CHECK: [[SHL2:%.*]](s32) = G_SHL [[SRC2]], [[C2]] - ; CHECK: [[SEXT2:%.*]](s32) = G_ASHR [[SHL2]], [[C2]] - ; CHECK: [[RES32:%[0-9]+]](s32) = G_ASHR [[SEXT1]], [[SEXT2]] - ; CHECK: %4(s8) = G_TRUNC [[RES32]] + ; CHECK: [[C1:%.*]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SRC:%.*]]:_(s32) = G_TRUNC %0(s64) + ; CHECK: [[SHL1:%.*]]:_(s32) = G_SHL [[SRC]], [[C1]] + ; CHECK: [[SEXT1:%.*]]:_(s32) = G_ASHR [[SHL1]], [[C1]] + ; CHECK: [[C2:%.*]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SRC2:%.*]]:_(s32) = G_TRUNC %1(s64) + ; CHECK: [[SHL2:%.*]]:_(s32) = G_SHL [[SRC2]], [[C2]] + ; CHECK: [[SEXT2:%.*]]:_(s32) = G_ASHR [[SHL2]], [[C2]] + ; CHECK: [[RES32:%[0-9]+]]:_(s32) = G_ASHR [[SEXT1]], [[SEXT2]] + ; CHECK: %4:_(s8) = G_TRUNC [[RES32]] %4(s8) = G_ASHR %2, %3 - ; CHECK: [[C1:%.*]](s32) = G_CONSTANT i32 255 - ; CHECK: [[SRC:%.*]](s32) = G_TRUNC %0(s64) - ; CHECK: [[ZEXT:%.*]](s32) = G_AND [[SRC]], [[C1]] - ; CHECK: [[C2:%.*]](s32) = G_CONSTANT i32 255 - ; CHECK: [[SRC2:%.*]](s32) = G_TRUNC %1(s64) - ; CHECK: [[ZEXT2:%.*]](s32) = G_AND [[SRC2]], [[C2]] - ; CHECK: [[RES32:%[0-9]+]](s32) = G_LSHR [[ZEXT]], [[ZEXT2]] - ; CHECK: %5(s8) = G_TRUNC [[RES32]] + ; CHECK: [[C1:%.*]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[SRC:%.*]]:_(s32) = G_TRUNC %0(s64) + ; CHECK: [[ZEXT:%.*]]:_(s32) = G_AND [[SRC]], [[C1]] + ; CHECK: [[C2:%.*]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[SRC2:%.*]]:_(s32) = G_TRUNC %1(s64) + ; CHECK: [[ZEXT2:%.*]]:_(s32) = G_AND [[SRC2]], [[C2]] + ; CHECK: [[RES32:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT]], [[ZEXT2]] + ; CHECK: %5:_(s8) = G_TRUNC [[RES32]] %5(s8) = G_LSHR %2, %3 - ; CHECK: [[OP0:%.*]](s32) = G_TRUNC %0 - ; CHECK: [[OP1:%.*]](s32) = G_TRUNC %1 - ; CHECK: [[RES32:%.*]](s32) = G_SHL [[OP0]], [[OP1]] - ; CHECK: [[RES:%.*]](s8) = G_TRUNC [[RES32]](s32) + ; CHECK: [[OP0:%.*]]:_(s32) = G_TRUNC %0 + ; CHECK: [[OP1:%.*]]:_(s32) = G_TRUNC %1 + ; CHECK: [[RES32:%.*]]:_(s32) = G_SHL [[OP0]], [[OP1]] + ; CHECK: [[RES:%.*]]:_(s8) = G_TRUNC [[RES32]](s32) %6(s8) = G_SHL %2, %3 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir b/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir index d2a02a3f65bc7..0392dcd5cb2d5 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-simple.mir @@ -9,6 +9,9 @@ next: ret void } + define void @bitcast128() { + ret void + } ... --- @@ -42,8 +45,8 @@ body: | %4(s32) = G_TRUNC %0 ; CHECK-LABEL: name: test_simple - ; CHECK: %5(p0) = G_INTTOPTR %0 - ; CHECK: %6(s64) = G_PTRTOINT %5 + ; CHECK: %5:_(p0) = G_INTTOPTR %0 + ; CHECK: %6:_(s64) = G_PTRTOINT %5 %5(p0) = G_INTTOPTR %0 %6(s64) = G_PTRTOINT %5 @@ -52,35 +55,58 @@ body: | bb.1.next: - ; CHECK: [[LHS:%[0-9]+]](s32) = G_TRUNC %0 - ; CHECK: [[RHS:%[0-9]+]](s32) = G_TRUNC %0 - ; CHECK: [[RES:%[0-9]+]](s32) = G_SELECT %1(s1), [[LHS]], [[RHS]] - ; CHECK: %7(s1) = G_TRUNC [[RES]](s32) + ; CHECK: [[LHS:%[0-9]+]]:_(s32) = G_TRUNC %0 + ; CHECK: [[RHS:%[0-9]+]]:_(s32) = G_TRUNC %0 + ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SELECT %1(s1), [[LHS]], [[RHS]] + ; CHECK: %7:_(s1) = G_TRUNC [[RES]](s32) %7(s1) = G_SELECT %1, %1, %1 - ; CHECK: [[LHS:%[0-9]+]](s32) = G_TRUNC %0 - ; CHECK: [[RHS:%[0-9]+]](s32) = G_TRUNC %0 - ; CHECK: [[RES:%[0-9]+]](s32) = G_SELECT %1(s1), [[LHS]], [[RHS]] - ; CHECK: %8(s8) = G_TRUNC [[RES]](s32) + ; CHECK: [[LHS:%[0-9]+]]:_(s32) = G_TRUNC %0 + ; CHECK: [[RHS:%[0-9]+]]:_(s32) = G_TRUNC %0 + ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SELECT %1(s1), [[LHS]], [[RHS]] + ; CHECK: %8:_(s8) = G_TRUNC [[RES]](s32) %8(s8) = G_SELECT %1, %2, %2 - ; CHECK: [[LHS:%[0-9]+]](s32) = G_TRUNC %0 - ; CHECK: [[RHS:%[0-9]+]](s32) = G_TRUNC %0 - ; CHECK: [[RES:%[0-9]+]](s32) = G_SELECT %1(s1), [[LHS]], [[RHS]] - ; CHECK: %9(s16) = G_TRUNC [[RES]](s32) + ; CHECK: [[LHS:%[0-9]+]]:_(s32) = G_TRUNC %0 + ; CHECK: [[RHS:%[0-9]+]]:_(s32) = G_TRUNC %0 + ; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SELECT %1(s1), [[LHS]], [[RHS]] + ; CHECK: %9:_(s16) = G_TRUNC [[RES]](s32) %9(s16) = G_SELECT %1, %3, %3 %10(s32) = G_SELECT %1, %4, %4 %11(s64) = G_SELECT %1, %0, %0 - ; CHECK: %12(<2 x s32>) = G_BITCAST %0 - ; CHECK: %13(s64) = G_BITCAST %12 - ; CHECK: %14(s32) = G_BITCAST %10 - ; CHECK: %15(<4 x s8>) = G_BITCAST %0 - ; CHECK: %16(<2 x s16>) = G_BITCAST %0 + ; CHECK: %12:_(<2 x s32>) = G_BITCAST %0 + ; CHECK: %13:_(s64) = G_BITCAST %12 + ; CHECK: %14:_(s32) = G_BITCAST %10 + ; CHECK: %15:_(<4 x s8>) = G_BITCAST %0 + ; CHECK: %16:_(<2 x s16>) = G_BITCAST %0 %12(<2 x s32>) = G_BITCAST %0 %13(s64) = G_BITCAST %12 %14(s32) = G_BITCAST %10 %15(<4 x s8>) = G_BITCAST %0 %16(<2 x s16>) = G_BITCAST %0 ... + +--- +name: bitcast128 +tracksRegLiveness: true +registers: + - { id: 0, class: _} + - { id: 1, class: _} + - { id: 2, class: _} + - { id: 3, class: _} +body: | + bb.1: + liveins: %x0, %x1 + ; CHECK-LABEL: bitcast128 + ; This is legal and shouldn't be changed. + ; CHECK: %2:_(<2 x s64>) = G_BITCAST %3(s128) + %0(s64) = COPY %x0 + %1(s64) = COPY %x1 + %3(s128) = G_MERGE_VALUES %0(s64), %1(s64) + %2(<2 x s64>) = G_BITCAST %3(s128) + %q0 = COPY %2(<2 x s64>) + RET_ReallyLR implicit %q0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir b/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir index 8743c3143b72f..b1cf197e1e858 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-sub.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -21,17 +22,21 @@ registers: body: | bb.0.entry: liveins: %x0, %x1, %x2, %x3 - ; CHECK-LABEL: name: test_scalar_sub_small - ; CHECK: [[OP0:%.*]](s32) = G_TRUNC %0 - ; CHECK: [[OP1:%.*]](s32) = G_TRUNC %1 - ; CHECK: [[RES32:%.*]](s32) = G_SUB [[OP0]], [[OP1]] - ; CHECK: [[RES:%.*]](s8) = G_TRUNC [[RES32]](s32) + ; CHECK-LABEL: name: test_scalar_sub_small + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[TRUNC1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[SUB]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s8) + ; CHECK: %x0 = COPY [[ANYEXT]](s64) %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s8) = G_SUB %2, %3 - %5(s64) = G_ANYEXT %2 + %5(s64) = G_ANYEXT %4 %x0 = COPY %5 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir b/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir index e7cf59b3394e3..9b59104eb3649 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-undef.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-linux-gnu -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- @@ -6,10 +7,10 @@ registers: body: | bb.0.entry: liveins: - ; CHECK-LABEL: name: test_implicit_def - ; CHECK: [[LO:%[0-9]+]](s64) = G_IMPLICIT_DEF - ; CHECK: [[HI:%[0-9]+]](s64) = G_IMPLICIT_DEF - ; CHECK: %0(s128) = G_MERGE_VALUES [[LO]](s64), [[HI]](s64) + ; CHECK-LABEL: name: test_implicit_def + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[DEF]](s64), [[DEF1]](s64) %0:_(s128) = G_IMPLICIT_DEF ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir b/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir index 8bda08d0a1d12..30e81ad32288f 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-vaarg.mir @@ -13,27 +13,27 @@ body: | %0:_(p0) = COPY %x0 ; CHECK-LABEL: name: test_vaarg - ; CHECK: [[LIST:%[0-9]+]](p0) = G_LOAD %0(p0) :: (load 8) - ; CHECK: %1(s8) = G_LOAD [[LIST]](p0) :: (load 1, align 8) - ; CHECK: [[SLOTSIZE:%[0-9]+]](s64) = G_CONSTANT i64 8 - ; CHECK: [[NEXT:%[0-9]+]](p0) = G_GEP [[LIST]], [[SLOTSIZE]](s64) + ; CHECK: [[LIST:%[0-9]+]]:_(p0) = G_LOAD %0(p0) :: (load 8) + ; CHECK: %1:_(s8) = G_LOAD [[LIST]](p0) :: (load 1, align 8) + ; CHECK: [[SLOTSIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[NEXT:%[0-9]+]]:_(p0) = G_GEP [[LIST]], [[SLOTSIZE]](s64) ; CHECK: G_STORE [[NEXT]](p0), %0(p0) :: (store 8) %1:_(s8) = G_VAARG %0(p0), 1 - ; CHECK: [[LIST:%[0-9]+]](p0) = G_LOAD %0(p0) :: (load 8) - ; CHECK: %2(s64) = G_LOAD [[LIST]](p0) :: (load 8) - ; CHECK: [[SLOTSIZE:%[0-9]+]](s64) = G_CONSTANT i64 8 - ; CHECK: [[NEXT:%[0-9]+]](p0) = G_GEP [[LIST]], [[SLOTSIZE]](s64) + ; CHECK: [[LIST:%[0-9]+]]:_(p0) = G_LOAD %0(p0) :: (load 8) + ; CHECK: %2:_(s64) = G_LOAD [[LIST]](p0) :: (load 8) + ; CHECK: [[SLOTSIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[NEXT:%[0-9]+]]:_(p0) = G_GEP [[LIST]], [[SLOTSIZE]](s64) ; CHECK: G_STORE [[NEXT]](p0), %0(p0) :: (store 8) %2:_(s64) = G_VAARG %0(p0), 8 - ; CHECK: [[LIST:%[0-9]+]](p0) = G_LOAD %0(p0) :: (load 8) - ; CHECK: [[ALIGNM1:%[0-9]+]](s64) = G_CONSTANT i64 15 - ; CHECK: [[ALIGNTMP:%[0-9]+]](p0) = G_GEP [[LIST]], [[ALIGNM1]](s64) - ; CHECK: [[LIST:%[0-9]+]](p0) = G_PTR_MASK [[ALIGNTMP]], 4 - ; CHECK: %3(s64) = G_LOAD [[LIST]](p0) :: (load 8, align 16) - ; CHECK: [[SLOTSIZE:%[0-9]+]](s64) = G_CONSTANT i64 8 - ; CHECK: [[NEXT:%[0-9]+]](p0) = G_GEP [[LIST]], [[SLOTSIZE]](s64) + ; CHECK: [[LIST:%[0-9]+]]:_(p0) = G_LOAD %0(p0) :: (load 8) + ; CHECK: [[ALIGNM1:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK: [[ALIGNTMP:%[0-9]+]]:_(p0) = G_GEP [[LIST]], [[ALIGNM1]](s64) + ; CHECK: [[LIST:%[0-9]+]]:_(p0) = G_PTR_MASK [[ALIGNTMP]], 4 + ; CHECK: %3:_(s64) = G_LOAD [[LIST]](p0) :: (load 8, align 16) + ; CHECK: [[SLOTSIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK: [[NEXT:%[0-9]+]]:_(p0) = G_GEP [[LIST]], [[SLOTSIZE]](s64) ; CHECK: G_STORE [[NEXT]](p0), %0(p0) :: (store 8) %3:_(s64) = G_VAARG %0(p0), 16 ... diff --git a/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir b/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir index 7d3ab5e80bc9f..9f4a6c7880685 100644 --- a/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir +++ b/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -21,17 +22,21 @@ registers: body: | bb.0.entry: liveins: %x0, %x1, %x2, %x3 - ; CHECK-LABEL: name: test_scalar_xor_small - ; CHECK: [[OP0:%.*]](s32) = G_TRUNC %0 - ; CHECK: [[OP1:%.*]](s32) = G_TRUNC %1 - ; CHECK: [[RES32:%.*]](s32) = G_XOR [[OP0]], [[OP1]] - ; CHECK: [[RES:%.*]](s8) = G_TRUNC [[RES32]](s32) + ; CHECK-LABEL: name: test_scalar_xor_small + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %x1 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s8) + ; CHECK: %x0 = COPY [[ANYEXT]](s64) %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s8) = G_TRUNC %0 %3(s8) = G_TRUNC %1 %4(s8) = G_XOR %2, %3 - %5(s64) = G_ANYEXT %2 + %5(s64) = G_ANYEXT %4 %x0 = COPY %5 ... diff --git a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir index 28c926b5d062d..997205bc0ef65 100644 --- a/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir +++ b/test/CodeGen/AArch64/GlobalISel/localizer-in-O0-pipeline.mir @@ -9,16 +9,16 @@ --- | target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64-apple-ios" - + define float @foo(float %arg, i1 %cond) { br i1 %cond, label %true, label %false - + true: ; preds = %0 br label %end - + false: ; preds = %0 br label %end - + end: ; preds = %false, %true %val = phi float [ 1.000000e+00, %true ], [ 2.000000e+00, %false ] %res = fadd float %arg, %val @@ -41,52 +41,55 @@ registers: # CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } # CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } # CHECK-NEXT: - { id: 5, class: fpr, preferred-register: '' } +# CHECK-NEXT: - { id: 6, class: gpr, preferred-register: '' } # The localizer will create two new values to materialize the constants. -# OPTNONE-NEXT: - { id: 6, class: fpr, preferred-register: '' } # OPTNONE-NEXT: - { id: 7, class: fpr, preferred-register: '' } +# OPTNONE-NEXT: - { id: 8, class: fpr, preferred-register: '' } - { id: 0, class: fpr } - { id: 1, class: gpr } - { id: 2, class: fpr } - { id: 3, class: fpr } - { id: 4, class: fpr } - { id: 5, class: fpr } + - { id: 6, class: gpr } # First block remains untouched # CHECK: body -# CHECK: %4(s32) = G_FCONSTANT float 1.000000e+00 -# CHECK: %5(s32) = G_FCONSTANT float 2.000000e+00 +# CHECK: %4:fpr(s32) = G_FCONSTANT float 1.000000e+00 +# CHECK: %5:fpr(s32) = G_FCONSTANT float 2.000000e+00 # Second block will get the constant 1.0 when the localizer is enabled. # CHECK: bb.1.true: # OPT-NOT: G_FCONSTANT -# OPTNONE: [[FONE:%[0-9]+]](s32) = G_FCONSTANT float 1.000000e+00 +# OPTNONE: [[FONE:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 # CHECK: G_BR %bb.3.end # Thrid block will get the constant 2.0 when the localizer is enabled. # CHECK: bb.2.false: # OPT-NOT: G_FCONSTANT -# OPTNONE: [[FTWO:%[0-9]+]](s32) = G_FCONSTANT float 2.000000e+00 +# OPTNONE: [[FTWO:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 2.000000e+00 # CHECK: bb.3.end -# OPTNONE: %2(s32) = PHI [[FONE]](s32), %bb.1.true, [[FTWO]](s32), %bb.2.false -# OPT: %2(s32) = PHI %4(s32), %bb.1.true, %5(s32), %bb.2.false +# OPTNONE: %2:fpr(s32) = PHI [[FONE]](s32), %bb.1.true, [[FTWO]](s32), %bb.2.false +# OPT: %2:fpr(s32) = PHI %4(s32), %bb.1.true, %5(s32), %bb.2.false # CHECK-NEXT: G_FADD %0, %2 body: | bb.0 (%ir-block.0): liveins: %s0, %w0 %0(s32) = COPY %s0 - %1(s1) = COPY %w0 + %6(s32) = COPY %w0 + %1(s1) = G_TRUNC %6 %4(s32) = G_FCONSTANT float 1.000000e+00 %5(s32) = G_FCONSTANT float 2.000000e+00 G_BRCOND %1(s1), %bb.1.true G_BR %bb.2.false - + bb.1.true: G_BR %bb.3.end - + bb.2.false: - + bb.3.end: %2(s32) = PHI %4(s32), %bb.1.true, %5(s32), %bb.2.false %3(s32) = G_FADD %0, %2 diff --git a/test/CodeGen/AArch64/GlobalISel/localizer.mir b/test/CodeGen/AArch64/GlobalISel/localizer.mir index 6a009520e1a8c..5de006a7d3faa 100644 --- a/test/CodeGen/AArch64/GlobalISel/localizer.mir +++ b/test/CodeGen/AArch64/GlobalISel/localizer.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=localizer -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefix=CHECK # Test the localizer. @@ -17,393 +18,286 @@ ... --- -# CHECK-LABEL: name: local_use name: local_use legalized: true regBankSelected: true - -# CHECK: registers: -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - -# CHECK: body: -# CHECK: %0(s32) = G_CONSTANT 1 -# CHECK-NEXT: %1(s32) = G_ADD %0, %0 body: | bb.0: - %0(s32) = G_CONSTANT 1 - %1(s32) = G_ADD %0, %0 + ; CHECK-LABEL: name: local_use + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT 1 + ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]] + %0:gpr(s32) = G_CONSTANT 1 + %1:gpr(s32) = G_ADD %0, %0 ... --- -# CHECK-LABEL: name: non_local_1use name: non_local_1use legalized: true regBankSelected: true - -# CHECK: registers: -# Existing registers should be left untouched -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } - -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - -# CHECK: body: -# CHECK: %0(s32) = G_CONSTANT 1 -# CHECK-NEXT: %1(s32) = G_ADD %0, %0 - -# CHECK: bb.1: -# CHECK: %3(s32) = G_CONSTANT 1 -# CHECK-NEXT: %2(s32) = G_ADD %3, %1 body: | + ; CHECK-LABEL: name: non_local_1use + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT 1 + ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]] + ; CHECK: bb.1: + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT 1 + ; CHECK: [[ADD1:%[0-9]+]]:gpr(s32) = G_ADD [[C1]], [[ADD]] + + ; Existing registers should be left untouched + ; The newly created reg should be on the same regbank/regclass as its origin. + bb.0: successors: %bb.1 - %0(s32) = G_CONSTANT 1 - %1(s32) = G_ADD %0, %0 + %0:gpr(s32) = G_CONSTANT 1 + %1:gpr(s32) = G_ADD %0, %0 bb.1: - %2(s32) = G_ADD %0, %1 + %2:gpr(s32) = G_ADD %0, %1 ... - --- -# CHECK-LABEL: name: non_local_2uses name: non_local_2uses legalized: true regBankSelected: true - -# CHECK: registers: -# Existing registers should be left untouched -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } - -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - -# CHECK: body: -# CHECK: %0(s32) = G_CONSTANT 1 -# CHECK-NEXT: %1(s32) = G_ADD %0, %0 - -# CHECK: bb.1: -# CHECK: %3(s32) = G_CONSTANT 1 -# CHECK-NEXT: %2(s32) = G_ADD %3, %3 body: | + ; CHECK-LABEL: name: non_local_2uses + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT 1 + ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]] + ; CHECK: bb.1: + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT 1 + ; CHECK: [[ADD1:%[0-9]+]]:gpr(s32) = G_ADD [[C1]], [[C1]] + + ; Existing registers should be left untouched + ; The newly created reg should be on the same regbank/regclass as its origin. + bb.0: successors: %bb.1 - %0(s32) = G_CONSTANT 1 - %1(s32) = G_ADD %0, %0 + %0:gpr(s32) = G_CONSTANT 1 + %1:gpr(s32) = G_ADD %0, %0 bb.1: - %2(s32) = G_ADD %0, %0 + %2:gpr(s32) = G_ADD %0, %0 ... --- -# CHECK-LABEL: name: non_local_phi_use name: non_local_phi_use legalized: true regBankSelected: true tracksRegLiveness: true - -# CHECK: registers: -# Existing registers should be left untouched -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } -# The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } - -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - -# CHECK: body: -# CHECK: %0(s32) = G_CONSTANT 1 -# CHECK-NEXT: %1(s32) = G_ADD %0, %0 - -# CHECK: bb.1: -# CHECK: %5(s32) = G_CONSTANT 1 - -# CHECK: bb.2: -# CHECK: %3(s32) = PHI %5(s32), %bb.1 body: | + ; CHECK-LABEL: name: non_local_phi_use + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT 1 + ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]] + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT 1 + ; CHECK: bb.2: + ; CHECK: [[PHI:%[0-9]+]]:gpr(s32) = PHI [[C1]](s32), %bb.1 + ; CHECK: [[ADD1:%[0-9]+]]:gpr(s32) = G_ADD [[PHI]], [[PHI]] + + ; Existing registers should be left untouched + ; The newly created reg should be on the same regbank/regclass as its origin. + bb.0: successors: %bb.1 - %0(s32) = G_CONSTANT 1 - %1(s32) = G_ADD %0, %0 + %0:gpr(s32) = G_CONSTANT 1 + %1:gpr(s32) = G_ADD %0, %0 bb.1: successors: %bb.2 bb.2: - %3(s32) = PHI %0(s32), %bb.1 - %2(s32) = G_ADD %3, %3 + %3:gpr(s32) = PHI %0(s32), %bb.1 + %2:gpr(s32) = G_ADD %3, %3 ... --- -# CHECK-LABEL: name: non_local_phi_use_followed_by_use name: non_local_phi_use_followed_by_use legalized: true regBankSelected: true tracksRegLiveness: true - -# CHECK: registers: -# Existing registers should be left untouched -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } -# The newly created regs should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 6, class: gpr, preferred-register: '' } - -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - -# CHECK: body: -# CHECK: %0(s32) = G_CONSTANT 1 -# CHECK-NEXT: %1(s32) = G_ADD %0, %0 - -# CHECK: bb.1: -# CHECK: %5(s32) = G_CONSTANT 1 - -# CHECK: bb.2: -# CHECK: %3(s32) = PHI %5(s32), %bb.1 -# CHECK-NEXT: %6(s32) = G_CONSTANT 1 -# CHECK-NEXT: %2(s32) = G_ADD %3, %6 body: | + ; CHECK-LABEL: name: non_local_phi_use_followed_by_use + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT 1 + ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[C]], [[C]] + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT 1 + ; CHECK: bb.2: + ; CHECK: [[PHI:%[0-9]+]]:gpr(s32) = PHI [[C1]](s32), %bb.1 + ; CHECK: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT 1 + ; CHECK: [[ADD1:%[0-9]+]]:gpr(s32) = G_ADD [[PHI]], [[C2]] + + ; Existing registers should be left untouched + ; The newly created reg should be on the same regbank/regclass as its origin. + bb.0: successors: %bb.1 - %0(s32) = G_CONSTANT 1 - %1(s32) = G_ADD %0, %0 + %0:gpr(s32) = G_CONSTANT 1 + %1:gpr(s32) = G_ADD %0, %0 bb.1: successors: %bb.2 bb.2: - %3(s32) = PHI %0(s32), %bb.1 - %2(s32) = G_ADD %3, %0 + %3:gpr(s32) = PHI %0(s32), %bb.1 + %2:gpr(s32) = G_ADD %3, %0 ... --- -# CHECK-LABEL: name: non_local_phi_use_followed_by_use_fi name: non_local_phi_use_followed_by_use_fi legalized: true regBankSelected: true tracksRegLiveness: true - -# CHECK: registers: -# Existing registers should be left untouched -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } -# The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } -#CHECK-NEXT: - { id: 6, class: gpr, preferred-register: '' } - -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - -# CHECK: body: -# CHECK: %0(s32) = G_FRAME_INDEX 1 -# CHECK-NEXT: %1(s32) = G_ADD %0, %0 - -# CHECK: bb.1: -# CHECK: %5(s32) = G_FRAME_INDEX 1 - -# CHECK: bb.2: -# CHECK: %3(s32) = PHI %5(s32), %bb.1 -# CHECK-NEXT: %6(s32) = G_FRAME_INDEX 1 -# CHECK-NEXT: %2(s32) = G_ADD %3, %6 body: | + ; CHECK-LABEL: name: non_local_phi_use_followed_by_use_fi + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:gpr(s32) = G_FRAME_INDEX 1 + ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[FRAME_INDEX]], [[FRAME_INDEX]] + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:gpr(s32) = G_FRAME_INDEX 1 + ; CHECK: bb.2: + ; CHECK: [[PHI:%[0-9]+]]:gpr(s32) = PHI [[FRAME_INDEX1]](s32), %bb.1 + ; CHECK: [[FRAME_INDEX2:%[0-9]+]]:gpr(s32) = G_FRAME_INDEX 1 + ; CHECK: [[ADD1:%[0-9]+]]:gpr(s32) = G_ADD [[PHI]], [[FRAME_INDEX2]] + + ; Existing registers should be left untouched + ; The newly created reg should be on the same regbank/regclass as its origin. + bb.0: successors: %bb.1 - %0(s32) = G_FRAME_INDEX 1 - %1(s32) = G_ADD %0, %0 + %0:gpr(s32) = G_FRAME_INDEX 1 + %1:gpr(s32) = G_ADD %0, %0 bb.1: successors: %bb.2 bb.2: - %3(s32) = PHI %0(s32), %bb.1 - %2(s32) = G_ADD %3, %0 + %3:gpr(s32) = PHI %0(s32), %bb.1 + %2:gpr(s32) = G_ADD %3, %0 ... --- -# CHECK-LABEL: name: float_non_local_phi_use_followed_by_use_fi name: float_non_local_phi_use_followed_by_use_fi legalized: true regBankSelected: true tracksRegLiveness: true - -# CHECK: registers: -# Existing registers should be left untouched -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } -# The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 5, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 6, class: fpr, preferred-register: '' } - -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: fpr } - - { id: 3, class: fpr } - - { id: 4, class: fpr } - -# CHECK: body: -# CHECK: %0(s32) = G_FCONSTANT float 1.0 -# CHECK-NEXT: %1(s32) = G_FADD %0, %0 - -# CHECK: bb.1: -# CHECK: %5(s32) = G_FCONSTANT float 1.0 - -# CHECK: bb.2: -# CHECK: %3(s32) = PHI %5(s32), %bb.1 -# CHECK-NEXT: %6(s32) = G_FCONSTANT float 1.0 -# CHECK-NEXT: %2(s32) = G_FADD %3, %6 body: | + ; CHECK-LABEL: name: float_non_local_phi_use_followed_by_use_fi + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[C:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: [[FADD:%[0-9]+]]:fpr(s32) = G_FADD [[C]], [[C]] + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[C1:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: bb.2: + ; CHECK: [[PHI:%[0-9]+]]:fpr(s32) = PHI [[C1]](s32), %bb.1 + ; CHECK: [[C2:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: [[FADD1:%[0-9]+]]:fpr(s32) = G_FADD [[PHI]], [[C2]] + + ; Existing registers should be left untouched + ; The newly created reg should be on the same regbank/regclass as its origin. + bb.0: successors: %bb.1 - %0(s32) = G_FCONSTANT float 1.0 - %1(s32) = G_FADD %0, %0 + %0:fpr(s32) = G_FCONSTANT float 1.0 + %1:fpr(s32) = G_FADD %0, %0 bb.1: successors: %bb.2 bb.2: - %3(s32) = PHI %0(s32), %bb.1 - %2(s32) = G_FADD %3, %0 + %3:fpr(s32) = PHI %0(s32), %bb.1 + %2:fpr(s32) = G_FADD %3, %0 ... --- # Make sure we don't insert a constant before PHIs. # This used to happen for loops of one basic block. -# CHECK-LABEL: name: non_local_phi name: non_local_phi legalized: true regBankSelected: true tracksRegLiveness: true - -# CHECK: registers: -# Existing registers should be left untouched -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } -# The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } - -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: fpr } - - { id: 3, class: fpr } - -# CHECK: body: -# CHECK: %0(s32) = G_FCONSTANT float 1.0 -# CHECK-NEXT: %1(s32) = G_FADD %0, %0 - -# CHECK: bb.1: -# CHECK: %3(s32) = PHI %1(s32), %bb.0, %4(s32), %bb.1 -# CHECK: %4(s32) = G_FCONSTANT float 1.0 - -# CHECK-NEXT: %2(s32) = G_FADD %3, %1 body: | + ; CHECK-LABEL: name: non_local_phi + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[C:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: [[FADD:%[0-9]+]]:fpr(s32) = G_FADD [[C]], [[C]] + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[PHI:%[0-9]+]]:fpr(s32) = PHI [[FADD]](s32), %bb.0, %4(s32), %bb.1 + ; CHECK: [[C1:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: [[FADD1:%[0-9]+]]:fpr(s32) = G_FADD [[PHI]], [[FADD]] + ; CHECK: G_BR %bb.1 + + ; Existing registers should be left untouched + ; The newly created reg should be on the same regbank/regclass as its origin. + bb.0: successors: %bb.1 - %0(s32) = G_FCONSTANT float 1.0 - %1(s32) = G_FADD %0, %0 + %0:fpr(s32) = G_FCONSTANT float 1.0 + %1:fpr(s32) = G_FADD %0, %0 bb.1: successors: %bb.1 - %3(s32) = PHI %1(s32), %bb.0, %0(s32), %bb.1 - %2(s32) = G_FADD %3, %1 + %3:fpr(s32) = PHI %1(s32), %bb.0, %0(s32), %bb.1 + %2:fpr(s32) = G_FADD %3, %1 G_BR %bb.1 ... --- # Make sure we don't insert a constant before EH_LABELs. -# CHECK-LABEL: name: non_local_label name: non_local_label legalized: true regBankSelected: true tracksRegLiveness: true - -# CHECK: registers: -# Existing registers should be left untouched -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 1, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 2, class: fpr, preferred-register: '' } -#CHECK-NEXT: - { id: 3, class: fpr, preferred-register: '' } -# The newly created reg should be on the same regbank/regclass as its origin. -#CHECK-NEXT: - { id: 4, class: fpr, preferred-register: '' } - -registers: - - { id: 0, class: fpr } - - { id: 1, class: fpr } - - { id: 2, class: fpr } - - { id: 3, class: fpr } - -# CHECK: body: -# CHECK: %1(s32) = G_FCONSTANT float 1.0 - -# CHECK: bb.1: -# CHECK: EH_LABEL -# CHECK: %4(s32) = G_FCONSTANT float 1.0 - -# CHECK-NEXT: %2(s32) = G_FADD %0, %4 body: | + ; CHECK-LABEL: name: non_local_label + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: %s0 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[C:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: bb.1: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: EH_LABEL 1 + ; CHECK: [[C1:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: [[FADD:%[0-9]+]]:fpr(s32) = G_FADD [[COPY]], [[C1]] + ; CHECK: G_BR %bb.1 + + ; Existing registers should be left untouched + ; The newly created reg should be on the same regbank/regclass as its origin. + bb.0: liveins: %s0 successors: %bb.1 - %0(s32) = COPY %s0 - %1(s32) = G_FCONSTANT float 1.0 + %0:fpr(s32) = COPY %s0 + %1:fpr(s32) = G_FCONSTANT float 1.0 bb.1: successors: %bb.1 EH_LABEL 1 - %2(s32) = G_FADD %0, %1 + %2:fpr(s32) = G_FADD %0, %1 G_BR %bb.1 ... diff --git a/test/CodeGen/AArch64/GlobalISel/no-regclass.mir b/test/CodeGen/AArch64/GlobalISel/no-regclass.mir index 741d76b830c16..d4d23142ab9c1 100644 --- a/test/CodeGen/AArch64/GlobalISel/no-regclass.mir +++ b/test/CodeGen/AArch64/GlobalISel/no-regclass.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple=aarch64-apple-ios -global-isel -start-before=legalizer -stop-after=instruction-select %s -o - | FileCheck %s # We run the legalizer to combine the trivial EXTRACT_SEQ pair, leaving %1 and @@ -10,19 +11,19 @@ define void @unused_reg() { ret void } --- -# CHECK-LABEL: name: unused_reg name: unused_reg legalized: true regBankSelected: true tracksRegLiveness: true -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %w0 = COPY %0 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: unused_reg + ; CHECK: liveins: %w0 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %w0 + ; CHECK: %w0 = COPY [[COPY]] %0:gpr(s32) = COPY %w0 %1:gpr(s32) = G_MERGE_VALUES %0(s32) %2:gpr(s32) = G_UNMERGE_VALUES %1(s32) diff --git a/test/CodeGen/AArch64/GlobalISel/reg-bank-128bit.mir b/test/CodeGen/AArch64/GlobalISel/reg-bank-128bit.mir index 6ea651d38c535..b675389fd5b38 100644 --- a/test/CodeGen/AArch64/GlobalISel/reg-bank-128bit.mir +++ b/test/CodeGen/AArch64/GlobalISel/reg-bank-128bit.mir @@ -17,5 +17,6 @@ body: | %1:_(s64) = COPY %x1 %2:_(p0) = COPY %x2 %3:_(s128) = G_MERGE_VALUES %0, %1 - %d0 = COPY %3 + %4:_(s64) = G_TRUNC %3 + %d0 = COPY %4 ... diff --git a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir index 82fb80c3bbac1..df40a7f659ac0 100644 --- a/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir +++ b/test/CodeGen/AArch64/GlobalISel/regbankselect-default.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple arm64-- -run-pass=regbankselect -global-isel %s -o - | FileCheck %s # Check the default mappings for various instructions. @@ -66,461 +67,390 @@ define void @test_fptosi_s64_s32() { ret void } define void @test_fptoui_s32_s64() { ret void } + + define void @test_gphi_ptr() { ret void } + ... --- -# CHECK-LABEL: name: test_add_s32 name: test_add_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_ADD %0, %0 + ; CHECK-LABEL: name: test_add_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_ADD %0, %0 ... --- -# CHECK-LABEL: name: test_add_v4s32 name: test_add_v4s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %q0 - ; CHECK: %0(<4 x s32>) = COPY %q0 - ; CHECK: %1(<4 x s32>) = G_ADD %0, %0 + ; CHECK-LABEL: name: test_add_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[ADD:%[0-9]+]]:fpr(<4 x s32>) = G_ADD [[COPY]], [[COPY]] %0(<4 x s32>) = COPY %q0 %1(<4 x s32>) = G_ADD %0, %0 ... --- -# CHECK-LABEL: name: test_sub_s32 name: test_sub_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_SUB %0, %0 + ; CHECK-LABEL: name: test_sub_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[SUB:%[0-9]+]]:gpr(s32) = G_SUB [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_SUB %0, %0 ... --- -# CHECK-LABEL: name: test_sub_v4s32 name: test_sub_v4s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %q0 - ; CHECK: %0(<4 x s32>) = COPY %q0 - ; CHECK: %1(<4 x s32>) = G_SUB %0, %0 + ; CHECK-LABEL: name: test_sub_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[SUB:%[0-9]+]]:fpr(<4 x s32>) = G_SUB [[COPY]], [[COPY]] %0(<4 x s32>) = COPY %q0 %1(<4 x s32>) = G_SUB %0, %0 ... --- -# CHECK-LABEL: name: test_mul_s32 name: test_mul_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_MUL %0, %0 + ; CHECK-LABEL: name: test_mul_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[MUL:%[0-9]+]]:gpr(s32) = G_MUL [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_MUL %0, %0 ... --- -# CHECK-LABEL: name: test_mul_v4s32 name: test_mul_v4s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %q0 - ; CHECK: %0(<4 x s32>) = COPY %q0 - ; CHECK: %1(<4 x s32>) = G_MUL %0, %0 + ; CHECK-LABEL: name: test_mul_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[MUL:%[0-9]+]]:fpr(<4 x s32>) = G_MUL [[COPY]], [[COPY]] %0(<4 x s32>) = COPY %q0 %1(<4 x s32>) = G_MUL %0, %0 ... --- -# CHECK-LABEL: name: test_and_s32 name: test_and_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_AND %0, %0 + ; CHECK-LABEL: name: test_and_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[AND:%[0-9]+]]:gpr(s32) = G_AND [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_AND %0, %0 ... --- -# CHECK-LABEL: name: test_and_v4s32 name: test_and_v4s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %q0 - ; CHECK: %0(<4 x s32>) = COPY %q0 - ; CHECK: %1(<4 x s32>) = G_AND %0, %0 + ; CHECK-LABEL: name: test_and_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[AND:%[0-9]+]]:fpr(<4 x s32>) = G_AND [[COPY]], [[COPY]] %0(<4 x s32>) = COPY %q0 %1(<4 x s32>) = G_AND %0, %0 ... --- -# CHECK-LABEL: name: test_or_s32 name: test_or_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_OR %0, %0 + ; CHECK-LABEL: name: test_or_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[OR:%[0-9]+]]:gpr(s32) = G_OR [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_OR %0, %0 ... --- -# CHECK-LABEL: name: test_or_v4s32 name: test_or_v4s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %q0 - ; CHECK: %0(<4 x s32>) = COPY %q0 - ; CHECK: %1(<4 x s32>) = G_OR %0, %0 + ; CHECK-LABEL: name: test_or_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[OR:%[0-9]+]]:fpr(<4 x s32>) = G_OR [[COPY]], [[COPY]] %0(<4 x s32>) = COPY %q0 %1(<4 x s32>) = G_OR %0, %0 ... --- -# CHECK-LABEL: name: test_xor_s32 name: test_xor_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_XOR %0, %0 + ; CHECK-LABEL: name: test_xor_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[XOR:%[0-9]+]]:gpr(s32) = G_XOR [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_XOR %0, %0 ... --- -# CHECK-LABEL: name: test_xor_v4s32 name: test_xor_v4s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %q0 - ; CHECK: %0(<4 x s32>) = COPY %q0 - ; CHECK: %1(<4 x s32>) = G_XOR %0, %0 + ; CHECK-LABEL: name: test_xor_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[XOR:%[0-9]+]]:fpr(<4 x s32>) = G_XOR [[COPY]], [[COPY]] %0(<4 x s32>) = COPY %q0 %1(<4 x s32>) = G_XOR %0, %0 ... --- -# CHECK-LABEL: name: test_shl_s32 name: test_shl_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_SHL %0, %0 + ; CHECK-LABEL: name: test_shl_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[SHL:%[0-9]+]]:gpr(s32) = G_SHL [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_SHL %0, %0 ... --- -# CHECK-LABEL: name: test_shl_v4s32 name: test_shl_v4s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %q0 - ; CHECK: %0(<4 x s32>) = COPY %q0 - ; CHECK: %1(<4 x s32>) = G_SHL %0, %0 + ; CHECK-LABEL: name: test_shl_v4s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(<4 x s32>) = COPY %q0 + ; CHECK: [[SHL:%[0-9]+]]:fpr(<4 x s32>) = G_SHL [[COPY]], [[COPY]] %0(<4 x s32>) = COPY %q0 %1(<4 x s32>) = G_SHL %0, %0 ... --- -# CHECK-LABEL: name: test_lshr_s32 name: test_lshr_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_LSHR %0, %0 + ; CHECK-LABEL: name: test_lshr_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[LSHR:%[0-9]+]]:gpr(s32) = G_LSHR [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_LSHR %0, %0 ... --- -# CHECK-LABEL: name: test_ashr_s32 name: test_ashr_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_ASHR %0, %0 + ; CHECK-LABEL: name: test_ashr_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[ASHR:%[0-9]+]]:gpr(s32) = G_ASHR [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_ASHR %0, %0 ... --- -# CHECK-LABEL: name: test_sdiv_s32 name: test_sdiv_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_SDIV %0, %0 + ; CHECK-LABEL: name: test_sdiv_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[SDIV:%[0-9]+]]:gpr(s32) = G_SDIV [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_SDIV %0, %0 ... --- -# CHECK-LABEL: name: test_udiv_s32 name: test_udiv_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_UDIV %0, %0 + ; CHECK-LABEL: name: test_udiv_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[UDIV:%[0-9]+]]:gpr(s32) = G_UDIV [[COPY]], [[COPY]] %0(s32) = COPY %w0 %1(s32) = G_UDIV %0, %0 ... --- -# CHECK-LABEL: name: test_anyext_s64_s32 name: test_anyext_s64_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s64) = G_ANYEXT %0 + ; CHECK-LABEL: name: test_anyext_s64_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[ANYEXT:%[0-9]+]]:gpr(s64) = G_ANYEXT [[COPY]](s32) %0(s32) = COPY %w0 %1(s64) = G_ANYEXT %0 ... --- -# CHECK-LABEL: name: test_sext_s64_s32 name: test_sext_s64_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s64) = G_SEXT %0 + ; CHECK-LABEL: name: test_sext_s64_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[SEXT:%[0-9]+]]:gpr(s64) = G_SEXT [[COPY]](s32) %0(s32) = COPY %w0 %1(s64) = G_SEXT %0 ... --- -# CHECK-LABEL: name: test_zext_s64_s32 name: test_zext_s64_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s64) = G_ZEXT %0 + ; CHECK-LABEL: name: test_zext_s64_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[ZEXT:%[0-9]+]]:gpr(s64) = G_ZEXT [[COPY]](s32) %0(s32) = COPY %w0 %1(s64) = G_ZEXT %0 ... --- -# CHECK-LABEL: name: test_trunc_s32_s64 name: test_trunc_s32_s64 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %x0 - ; CHECK: %0(s64) = COPY %x0 - ; CHECK: %1(s32) = G_TRUNC %0 + ; CHECK-LABEL: name: test_trunc_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY %x0 + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s32) = G_TRUNC [[COPY]](s64) %0(s64) = COPY %x0 %1(s32) = G_TRUNC %0 ... --- -# CHECK-LABEL: name: test_constant_s32 name: test_constant_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } body: | bb.0: - ; CHECK: %0(s32) = G_CONSTANT 123 + ; CHECK-LABEL: name: test_constant_s32 + ; CHECK: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT 123 %0(s32) = G_CONSTANT 123 ... --- -# CHECK-LABEL: name: test_constant_p0 name: test_constant_p0 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } body: | bb.0: - ; CHECK: %0(p0) = G_CONSTANT 0 + ; CHECK-LABEL: name: test_constant_p0 + ; CHECK: [[C:%[0-9]+]]:gpr(p0) = G_CONSTANT 0 %0(p0) = G_CONSTANT 0 ... --- -# CHECK-LABEL: name: test_icmp_s32 name: test_icmp_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -528,21 +458,18 @@ registers: body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s32) = G_ICMP intpred(ne), %0(s32), %0 - ; CHECK: %2(s1) = G_TRUNC %1(s32) + ; CHECK-LABEL: name: test_icmp_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY]] + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) %0(s32) = COPY %w0 %1(s32) = G_ICMP intpred(ne), %0, %0 %2(s1) = G_TRUNC %1(s32) ... --- -# CHECK-LABEL: name: test_icmp_p0 name: test_icmp_p0 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -550,243 +477,206 @@ registers: body: | bb.0: liveins: %x0 - ; CHECK: %0(p0) = COPY %x0 - ; CHECK: %1(s32) = G_ICMP intpred(ne), %0(p0), %0 - ; CHECK: %2(s1) = G_TRUNC %1(s32) + ; CHECK-LABEL: name: test_icmp_p0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY %x0 + ; CHECK: [[ICMP:%[0-9]+]]:gpr(s32) = G_ICMP intpred(ne), [[COPY]](p0), [[COPY]] + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[ICMP]](s32) %0(p0) = COPY %x0 %1(s32) = G_ICMP intpred(ne), %0, %0 %2(s1) = G_TRUNC %1(s32) ... --- -# CHECK-LABEL: name: test_frame_index_p0 name: test_frame_index_p0 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } stack: - { id: 0, name: ptr0, offset: 0, size: 8, alignment: 8 } body: | bb.0: - ; CHECK: %0(p0) = G_FRAME_INDEX %stack.0.ptr0 + ; CHECK-LABEL: name: test_frame_index_p0 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %stack.0.ptr0 %0(p0) = G_FRAME_INDEX %stack.0.ptr0 ... --- -# CHECK-LABEL: name: test_ptrtoint_s64_p0 name: test_ptrtoint_s64_p0 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %x0 - ; CHECK: %0(p0) = COPY %x0 - ; CHECK: %1(s64) = G_PTRTOINT %0 + ; CHECK-LABEL: name: test_ptrtoint_s64_p0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY %x0 + ; CHECK: [[PTRTOINT:%[0-9]+]]:gpr(s64) = G_PTRTOINT [[COPY]](p0) %0(p0) = COPY %x0 %1(s64) = G_PTRTOINT %0 ... --- -# CHECK-LABEL: name: test_inttoptr_p0_s64 name: test_inttoptr_p0_s64 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %x0 - ; CHECK: %0(s64) = COPY %x0 - ; CHECK: %1(p0) = G_INTTOPTR %0 + ; CHECK-LABEL: name: test_inttoptr_p0_s64 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY %x0 + ; CHECK: [[INTTOPTR:%[0-9]+]]:gpr(p0) = G_INTTOPTR [[COPY]](s64) %0(s64) = COPY %x0 %1(p0) = G_INTTOPTR %0 ... --- -# CHECK-LABEL: name: test_load_s32_p0 name: test_load_s32_p0 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %x0 - ; CHECK: %0(p0) = COPY %x0 - ; CHECK: %1(s32) = G_LOAD %0 + ; CHECK-LABEL: name: test_load_s32_p0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY %x0 + ; CHECK: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load 4) %0(p0) = COPY %x0 %1(s32) = G_LOAD %0 :: (load 4) ... --- -# CHECK-LABEL: name: test_store_s32_p0 name: test_store_s32_p0 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %x0, %w1 - ; CHECK: %0(p0) = COPY %x0 - ; CHECK: %1(s32) = COPY %w1 - ; CHECK: G_STORE %1(s32), %0(p0) + ; CHECK-LABEL: name: test_store_s32_p0 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr(s32) = COPY %w1 + ; CHECK: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store 4) %0(p0) = COPY %x0 %1(s32) = COPY %w1 G_STORE %1, %0 :: (store 4) ... --- -# CHECK-LABEL: name: test_fadd_s32 name: test_fadd_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %s0 - ; CHECK: %0(s32) = COPY %s0 - ; CHECK: %1(s32) = G_FADD %0, %0 + ; CHECK-LABEL: name: test_fadd_s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[FADD:%[0-9]+]]:fpr(s32) = G_FADD [[COPY]], [[COPY]] %0(s32) = COPY %s0 %1(s32) = G_FADD %0, %0 ... --- -# CHECK-LABEL: name: test_fsub_s32 name: test_fsub_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %s0 - ; CHECK: %0(s32) = COPY %s0 - ; CHECK: %1(s32) = G_FSUB %0, %0 + ; CHECK-LABEL: name: test_fsub_s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[FSUB:%[0-9]+]]:fpr(s32) = G_FSUB [[COPY]], [[COPY]] %0(s32) = COPY %s0 %1(s32) = G_FSUB %0, %0 ... --- -# CHECK-LABEL: name: test_fmul_s32 name: test_fmul_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %s0 - ; CHECK: %0(s32) = COPY %s0 - ; CHECK: %1(s32) = G_FMUL %0, %0 + ; CHECK-LABEL: name: test_fmul_s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[FMUL:%[0-9]+]]:fpr(s32) = G_FMUL [[COPY]], [[COPY]] %0(s32) = COPY %s0 %1(s32) = G_FMUL %0, %0 ... --- -# CHECK-LABEL: name: test_fdiv_s32 name: test_fdiv_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %s0 - ; CHECK: %0(s32) = COPY %s0 - ; CHECK: %1(s32) = G_FDIV %0, %0 + ; CHECK-LABEL: name: test_fdiv_s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[FDIV:%[0-9]+]]:fpr(s32) = G_FDIV [[COPY]], [[COPY]] %0(s32) = COPY %s0 %1(s32) = G_FDIV %0, %0 ... --- -# CHECK-LABEL: name: test_fpext_s64_s32 name: test_fpext_s64_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %s0 - ; CHECK: %0(s32) = COPY %s0 - ; CHECK: %1(s64) = G_FPEXT %0 + ; CHECK-LABEL: name: test_fpext_s64_s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[FPEXT:%[0-9]+]]:fpr(s64) = G_FPEXT [[COPY]](s32) %0(s32) = COPY %s0 %1(s64) = G_FPEXT %0 ... --- -# CHECK-LABEL: name: test_fptrunc_s32_s64 name: test_fptrunc_s32_s64 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %d0 - ; CHECK: %0(s64) = COPY %d0 - ; CHECK: %1(s32) = G_FPTRUNC %0 + ; CHECK-LABEL: name: test_fptrunc_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY %d0 + ; CHECK: [[FPTRUNC:%[0-9]+]]:fpr(s32) = G_FPTRUNC [[COPY]](s64) %0(s64) = COPY %d0 %1(s32) = G_FPTRUNC %0 ... --- -# CHECK-LABEL: name: test_fconstant_s32 name: test_fconstant_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } body: | bb.0: - ; CHECK: %0(s32) = G_FCONSTANT float 1.0 + ; CHECK-LABEL: name: test_fconstant_s32 + ; CHECK: [[C:%[0-9]+]]:fpr(s32) = G_FCONSTANT float 1.000000e+00 %0(s32) = G_FCONSTANT float 1.0 ... --- -# CHECK-LABEL: name: test_fcmp_s32 name: test_fcmp_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -794,86 +684,125 @@ registers: body: | bb.0: liveins: %s0 - ; CHECK: %0(s32) = COPY %s0 - ; CHECK: [[FCMP:%[0-9]+]](s32) = G_FCMP floatpred(olt), %0(s32), %0 - ; CHECK: [[TRUNC:%[0-9]+]](s1) = G_TRUNC [[FCMP]] + ; CHECK-LABEL: name: test_fcmp_s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[FCMP:%[0-9]+]]:gpr(s32) = G_FCMP floatpred(olt), [[COPY]](s32), [[COPY]] + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[FCMP]](s32) %0(s32) = COPY %s0 %1(s32) = G_FCMP floatpred(olt), %0, %0 %2(s1) = G_TRUNC %1(s32) ... --- -# CHECK-LABEL: name: test_sitofp_s64_s32 name: test_sitofp_s64_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %w0 - ; CHECK: %0(s32) = COPY %w0 - ; CHECK: %1(s64) = G_SITOFP %0 + ; CHECK-LABEL: name: test_sitofp_s64_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %w0 + ; CHECK: [[SITOFP:%[0-9]+]]:fpr(s64) = G_SITOFP [[COPY]](s32) %0(s32) = COPY %w0 %1(s64) = G_SITOFP %0 ... --- -# CHECK-LABEL: name: test_uitofp_s32_s64 name: test_uitofp_s32_s64 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: gpr, preferred-register: '' } -# CHECK: - { id: 1, class: fpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %x0 - ; CHECK: %0(s64) = COPY %x0 - ; CHECK: %1(s32) = G_UITOFP %0 + ; CHECK-LABEL: name: test_uitofp_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:gpr(s64) = COPY %x0 + ; CHECK: [[UITOFP:%[0-9]+]]:fpr(s32) = G_UITOFP [[COPY]](s64) %0(s64) = COPY %x0 %1(s32) = G_UITOFP %0 ... --- -# CHECK-LABEL: name: test_fptosi_s64_s32 name: test_fptosi_s64_s32 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %s0 - ; CHECK: %0(s32) = COPY %s0 - ; CHECK: %1(s64) = G_FPTOSI %0 + ; CHECK-LABEL: name: test_fptosi_s64_s32 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s32) = COPY %s0 + ; CHECK: [[FPTOSI:%[0-9]+]]:gpr(s64) = G_FPTOSI [[COPY]](s32) %0(s32) = COPY %s0 %1(s64) = G_FPTOSI %0 ... --- -# CHECK-LABEL: name: test_fptoui_s32_s64 name: test_fptoui_s32_s64 legalized: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr, preferred-register: '' } -# CHECK: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } body: | bb.0: liveins: %d0 - ; CHECK: %0(s64) = COPY %d0 - ; CHECK: %1(s32) = G_FPTOUI %0 + ; CHECK-LABEL: name: test_fptoui_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:fpr(s64) = COPY %d0 + ; CHECK: [[FPTOUI:%[0-9]+]]:gpr(s32) = G_FPTOUI [[COPY]](s64) %0(s64) = COPY %d0 %1(s32) = G_FPTOUI %0 ... + +--- +name: test_gphi_ptr +legalized: true +tracksRegLiveness: true +registers: + - { id: 0, class: _, preferred-register: '' } + - { id: 1, class: _, preferred-register: '' } + - { id: 2, class: _, preferred-register: '' } + - { id: 3, class: _, preferred-register: '' } + - { id: 4, class: _, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } +body: | + ; CHECK-LABEL: name: test_gphi_ptr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: %w2, %x0, %x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr(p0) = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr(p0) = COPY %x1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr(s32) = COPY %w2 + ; CHECK: [[TRUNC:%[0-9]+]]:gpr(s1) = G_TRUNC [[COPY2]](s32) + ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.1 + ; CHECK: G_BR %bb.2 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: bb.2: + ; CHECK: [[PHI:%[0-9]+]]:gpr(p0) = G_PHI [[COPY]](p0), %bb.0, [[COPY1]](p0), %bb.1 + ; CHECK: %x0 = COPY [[PHI]](p0) + ; CHECK: RET_ReallyLR implicit %x0 + bb.0: + successors: %bb.1, %bb.2 + liveins: %w2, %x0, %x1 + + %0(p0) = COPY %x0 + %1(p0) = COPY %x1 + %4(s32) = COPY %w2 + %2(s1) = G_TRUNC %4(s32) + G_BRCOND %2(s1), %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + + bb.2: + %3(p0) = G_PHI %0(p0), %bb.0, %1(p0), %bb.1 + %x0 = COPY %3(p0) + RET_ReallyLR implicit %x0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-binop.mir b/test/CodeGen/AArch64/GlobalISel/select-binop.mir index 70cda516d5f10..1badcf35492d1 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-binop.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-binop.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -58,28 +59,24 @@ --- # Check that we select a 32-bit GPR G_ADD into ADDWrr on GPR32. # Also check that we constrain the register class of the COPY to GPR32. -# CHECK-LABEL: name: add_s32_gpr name: add_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = ADDWrr %0, %1 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: add_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; CHECK: %w0 = COPY [[ADDWrr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_ADD %0, %1 @@ -88,28 +85,24 @@ body: | --- # Same as add_s32_gpr, for 64-bit operations. -# CHECK-LABEL: name: add_s64_gpr name: add_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = ADDXrr %0, %1 body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: add_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]] + ; CHECK: %x0 = COPY [[ADDXrr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_ADD %0, %1 @@ -117,27 +110,23 @@ body: | ... --- -# CHECK-LABEL: name: add_imm_s32_gpr name: add_imm_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32sp, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %2 = ADDWri %0, 1, 0 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: add_imm_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY %w0 + ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY]], 1, 0 + ; CHECK: %w0 = COPY [[ADDWri]] %0(s32) = COPY %w0 %1(s32) = G_CONSTANT i32 1 %2(s32) = G_ADD %0, %1 @@ -145,27 +134,23 @@ body: | ... --- -# CHECK-LABEL: name: add_imm_s64_gpr name: add_imm_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64sp, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %2 = ADDXri %0, 1, 0 body: | bb.0: liveins: %x0, %w1 + ; CHECK-LABEL: name: add_imm_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri [[COPY]], 1, 0 + ; CHECK: %x0 = COPY [[ADDXri]] %0(s64) = COPY %x0 %1(s64) = G_CONSTANT i32 1 %2(s64) = G_ADD %0, %1 @@ -173,25 +158,24 @@ body: | ... --- -# CHECK-LABEL: name: add_imm_s32_gpr_bb name: add_imm_s32_gpr_bb legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32sp, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: bb.1: -# CHECK: %2 = ADDWri %0, 1, 0 body: | + ; CHECK-LABEL: name: add_imm_s32_gpr_bb + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[COPY:%[0-9]+]]:gpr32sp = COPY %w0 + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY]], 1, 0 + ; CHECK: %w0 = COPY [[ADDWri]] bb.0: liveins: %w0, %w1 successors: %bb.1 @@ -207,28 +191,24 @@ body: | --- # Same as add_s32_gpr, for G_SUB operations. -# CHECK-LABEL: name: sub_s32_gpr name: sub_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = SUBSWrr %0, %1, implicit-def %nzcv body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: sub_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY1]], implicit-def %nzcv + ; CHECK: %w0 = COPY [[SUBSWrr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_SUB %0, %1 @@ -237,28 +217,24 @@ body: | --- # Same as add_s64_gpr, for G_SUB operations. -# CHECK-LABEL: name: sub_s64_gpr name: sub_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = SUBSXrr %0, %1, implicit-def %nzcv body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: sub_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def %nzcv + ; CHECK: %x0 = COPY [[SUBSXrr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_SUB %0, %1 @@ -267,28 +243,24 @@ body: | --- # Same as add_s32_gpr, for G_OR operations. -# CHECK-LABEL: name: or_s32_gpr name: or_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = ORRWrr %0, %1 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: or_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr [[COPY]], [[COPY1]] + ; CHECK: %w0 = COPY [[ORRWrr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_OR %0, %1 @@ -297,28 +269,24 @@ body: | --- # Same as add_s64_gpr, for G_OR operations. -# CHECK-LABEL: name: or_s64_gpr name: or_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = ORRXrr %0, %1 body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: or_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[ORRXrr:%[0-9]+]]:gpr64 = ORRXrr [[COPY]], [[COPY1]] + ; CHECK: %x0 = COPY [[ORRXrr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_OR %0, %1 @@ -327,30 +295,26 @@ body: | --- # 64-bit G_OR on vector registers. -# CHECK-LABEL: name: or_v2s32_fpr name: or_v2s32_fpr legalized: true regBankSelected: true # -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } - { id: 2, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = COPY %d1 # The actual OR does not matter as long as it is operating # on 64-bit width vector. -# CHECK: %2 = ORRv8i8 %0, %1 body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: or_v2s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[ORRv8i8_:%[0-9]+]]:fpr64 = ORRv8i8 [[COPY]], [[COPY1]] + ; CHECK: %d0 = COPY [[ORRv8i8_]] %0(<2 x s32>) = COPY %d0 %1(<2 x s32>) = COPY %d1 %2(<2 x s32>) = G_OR %0, %1 @@ -359,28 +323,24 @@ body: | --- # Same as add_s32_gpr, for G_AND operations. -# CHECK-LABEL: name: and_s32_gpr name: and_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = ANDWrr %0, %1 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: and_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[ANDWrr:%[0-9]+]]:gpr32 = ANDWrr [[COPY]], [[COPY1]] + ; CHECK: %w0 = COPY [[ANDWrr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_AND %0, %1 @@ -389,28 +349,24 @@ body: | --- # Same as add_s64_gpr, for G_AND operations. -# CHECK-LABEL: name: and_s64_gpr name: and_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = ANDXrr %0, %1 body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: and_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[ANDXrr:%[0-9]+]]:gpr64 = ANDXrr [[COPY]], [[COPY1]] + ; CHECK: %x0 = COPY [[ANDXrr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_AND %0, %1 @@ -419,28 +375,24 @@ body: | --- # Same as add_s32_gpr, for G_SHL operations. -# CHECK-LABEL: name: shl_s32_gpr name: shl_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = LSLVWr %0, %1 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: shl_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[LSLVWr:%[0-9]+]]:gpr32 = LSLVWr [[COPY]], [[COPY1]] + ; CHECK: %w0 = COPY [[LSLVWr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_SHL %0, %1 @@ -449,28 +401,24 @@ body: | --- # Same as add_s64_gpr, for G_SHL operations. -# CHECK-LABEL: name: shl_s64_gpr name: shl_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = LSLVXr %0, %1 body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: shl_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[LSLVXr:%[0-9]+]]:gpr64 = LSLVXr [[COPY]], [[COPY1]] + ; CHECK: %x0 = COPY [[LSLVXr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_SHL %0, %1 @@ -479,28 +427,24 @@ body: | --- # Same as add_s32_gpr, for G_LSHR operations. -# CHECK-LABEL: name: lshr_s32_gpr name: lshr_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = LSRVWr %0, %1 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: lshr_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[LSRVWr:%[0-9]+]]:gpr32 = LSRVWr [[COPY]], [[COPY1]] + ; CHECK: %w0 = COPY [[LSRVWr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_LSHR %0, %1 @@ -509,28 +453,24 @@ body: | --- # Same as add_s64_gpr, for G_LSHR operations. -# CHECK-LABEL: name: lshr_s64_gpr name: lshr_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = LSRVXr %0, %1 body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: lshr_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[LSRVXr:%[0-9]+]]:gpr64 = LSRVXr [[COPY]], [[COPY1]] + ; CHECK: %x0 = COPY [[LSRVXr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_LSHR %0, %1 @@ -539,28 +479,24 @@ body: | --- # Same as add_s32_gpr, for G_ASHR operations. -# CHECK-LABEL: name: ashr_s32_gpr name: ashr_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = ASRVWr %0, %1 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: ashr_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[ASRVWr:%[0-9]+]]:gpr32 = ASRVWr [[COPY]], [[COPY1]] + ; CHECK: %w0 = COPY [[ASRVWr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_ASHR %0, %1 @@ -569,28 +505,24 @@ body: | --- # Same as add_s64_gpr, for G_ASHR operations. -# CHECK-LABEL: name: ashr_s64_gpr name: ashr_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = ASRVXr %0, %1 body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: ashr_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[ASRVXr:%[0-9]+]]:gpr64 = ASRVXr [[COPY]], [[COPY1]] + ; CHECK: %x0 = COPY [[ASRVXr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_ASHR %0, %1 @@ -600,28 +532,24 @@ body: | --- # Check that we select s32 GPR G_MUL. This is trickier than other binops because # there is only MADDWrrr, and we have to use the WZR physreg. -# CHECK-LABEL: name: mul_s32_gpr name: mul_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = MADDWrrr %0, %1, %wzr body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: mul_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY]], [[COPY1]], %wzr + ; CHECK: %w0 = COPY [[MADDWrrr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_MUL %0, %1 @@ -630,28 +558,24 @@ body: | --- # Same as mul_s32_gpr for the s64 type. -# CHECK-LABEL: name: mul_s64_gpr name: mul_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = MADDXrrr %0, %1, %xzr body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: mul_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY]], [[COPY1]], %xzr + ; CHECK: %x0 = COPY [[MADDXrrr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_MUL %0, %1 @@ -660,26 +584,22 @@ body: | --- # Same as mul_s32_gpr for the s64 type. -# CHECK-LABEL: name: mulh_s64_gpr name: mulh_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr64, preferred-register: '' } - -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = SMULHrr %0, %1 -# CHECK: %3 = UMULHrr %0, %1 + body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: mulh_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[SMULHrr:%[0-9]+]]:gpr64 = SMULHrr [[COPY]], [[COPY1]] + ; CHECK: [[UMULHrr:%[0-9]+]]:gpr64 = UMULHrr [[COPY]], [[COPY1]] + ; CHECK: %x0 = COPY [[SMULHrr]] + ; CHECK: %x0 = COPY [[UMULHrr]] %0:gpr(s64) = COPY %x0 %1:gpr(s64) = COPY %x1 %2:gpr(s64) = G_SMULH %0, %1 @@ -690,28 +610,24 @@ body: | --- # Same as add_s32_gpr, for G_SDIV operations. -# CHECK-LABEL: name: sdiv_s32_gpr name: sdiv_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = SDIVWr %0, %1 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: sdiv_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[COPY]], [[COPY1]] + ; CHECK: %w0 = COPY [[SDIVWr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_SDIV %0, %1 @@ -720,28 +636,24 @@ body: | --- # Same as add_s64_gpr, for G_SDIV operations. -# CHECK-LABEL: name: sdiv_s64_gpr name: sdiv_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = SDIVXr %0, %1 body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: sdiv_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[SDIVXr:%[0-9]+]]:gpr64 = SDIVXr [[COPY]], [[COPY1]] + ; CHECK: %x0 = COPY [[SDIVXr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_SDIV %0, %1 @@ -750,28 +662,24 @@ body: | --- # Same as add_s32_gpr, for G_UDIV operations. -# CHECK-LABEL: name: udiv_s32_gpr name: udiv_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = UDIVWr %0, %1 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: udiv_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[UDIVWr:%[0-9]+]]:gpr32 = UDIVWr [[COPY]], [[COPY1]] + ; CHECK: %w0 = COPY [[UDIVWr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_UDIV %0, %1 @@ -780,28 +688,24 @@ body: | --- # Same as add_s64_gpr, for G_UDIV operations. -# CHECK-LABEL: name: udiv_s64_gpr name: udiv_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = UDIVXr %0, %1 body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: udiv_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[UDIVXr:%[0-9]+]]:gpr64 = UDIVXr [[COPY]], [[COPY1]] + ; CHECK: %x0 = COPY [[UDIVXr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_UDIV %0, %1 @@ -810,28 +714,24 @@ body: | --- # Check that we select a s32 FPR G_FADD into FADDSrr. -# CHECK-LABEL: name: fadd_s32_fpr name: fadd_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } - { id: 2, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = COPY %s1 -# CHECK: %2 = FADDSrr %0, %1 body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: fadd_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[COPY]], [[COPY1]] + ; CHECK: %s0 = COPY [[FADDSrr]] %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s32) = G_FADD %0, %1 @@ -839,28 +739,24 @@ body: | ... --- -# CHECK-LABEL: name: fadd_s64_fpr name: fadd_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } - { id: 2, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = COPY %d1 -# CHECK: %2 = FADDDrr %0, %1 body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: fadd_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[FADDDrr:%[0-9]+]]:fpr64 = FADDDrr [[COPY]], [[COPY1]] + ; CHECK: %d0 = COPY [[FADDDrr]] %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s64) = G_FADD %0, %1 @@ -868,28 +764,24 @@ body: | ... --- -# CHECK-LABEL: name: fsub_s32_fpr name: fsub_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } - { id: 2, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = COPY %s1 -# CHECK: %2 = FSUBSrr %0, %1 body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: fsub_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: [[FSUBSrr:%[0-9]+]]:fpr32 = FSUBSrr [[COPY]], [[COPY1]] + ; CHECK: %s0 = COPY [[FSUBSrr]] %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s32) = G_FSUB %0, %1 @@ -897,28 +789,24 @@ body: | ... --- -# CHECK-LABEL: name: fsub_s64_fpr name: fsub_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } - { id: 2, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = COPY %d1 -# CHECK: %2 = FSUBDrr %0, %1 body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: fsub_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[FSUBDrr:%[0-9]+]]:fpr64 = FSUBDrr [[COPY]], [[COPY1]] + ; CHECK: %d0 = COPY [[FSUBDrr]] %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s64) = G_FSUB %0, %1 @@ -926,28 +814,24 @@ body: | ... --- -# CHECK-LABEL: name: fmul_s32_fpr name: fmul_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } - { id: 2, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = COPY %s1 -# CHECK: %2 = FMULSrr %0, %1 body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: fmul_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: [[FMULSrr:%[0-9]+]]:fpr32 = FMULSrr [[COPY]], [[COPY1]] + ; CHECK: %s0 = COPY [[FMULSrr]] %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s32) = G_FMUL %0, %1 @@ -955,28 +839,24 @@ body: | ... --- -# CHECK-LABEL: name: fmul_s64_fpr name: fmul_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } - { id: 2, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = COPY %d1 -# CHECK: %2 = FMULDrr %0, %1 body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: fmul_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[FMULDrr:%[0-9]+]]:fpr64 = FMULDrr [[COPY]], [[COPY1]] + ; CHECK: %d0 = COPY [[FMULDrr]] %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s64) = G_FMUL %0, %1 @@ -984,28 +864,24 @@ body: | ... --- -# CHECK-LABEL: name: fdiv_s32_fpr name: fdiv_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } - { id: 2, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = COPY %s1 -# CHECK: %2 = FDIVSrr %0, %1 body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: fdiv_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: [[FDIVSrr:%[0-9]+]]:fpr32 = FDIVSrr [[COPY]], [[COPY1]] + ; CHECK: %s0 = COPY [[FDIVSrr]] %0(s32) = COPY %s0 %1(s32) = COPY %s1 %2(s32) = G_FDIV %0, %1 @@ -1013,28 +889,24 @@ body: | ... --- -# CHECK-LABEL: name: fdiv_s64_fpr name: fdiv_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } - { id: 2, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = COPY %d1 -# CHECK: %2 = FDIVDrr %0, %1 body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: fdiv_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: [[FDIVDrr:%[0-9]+]]:fpr64 = FDIVDrr [[COPY]], [[COPY1]] + ; CHECK: %d0 = COPY [[FDIVDrr]] %0(s64) = COPY %d0 %1(s64) = COPY %d1 %2(s64) = G_FDIV %0, %1 diff --git a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir index fe077a25f7cbe..e323aa310d5c3 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-bitcast.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -16,248 +17,218 @@ ... --- -# CHECK-LABEL: name: bitcast_s32_gpr name: bitcast_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: bitcast_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]] + ; CHECK: %w0 = COPY [[COPY1]] %0(s32) = COPY %w0 %1(s32) = G_BITCAST %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: bitcast_s32_fpr name: bitcast_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %s0 + ; CHECK-LABEL: name: bitcast_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]] + ; CHECK: %s0 = COPY [[COPY1]] %0(s32) = COPY %s0 %1(s32) = G_BITCAST %0 %s0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: bitcast_s32_gpr_fpr name: bitcast_s32_gpr_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: bitcast_s32_gpr_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[COPY]] + ; CHECK: %s0 = COPY [[COPY1]] %0(s32) = COPY %w0 %1(s32) = G_BITCAST %0 %s0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: bitcast_s32_fpr_gpr name: bitcast_s32_fpr_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %s0 + ; CHECK-LABEL: name: bitcast_s32_fpr_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: %w0 = COPY [[COPY1]] %0(s32) = COPY %s0 %1(s32) = G_BITCAST %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: bitcast_s64_gpr name: bitcast_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: bitcast_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[COPY]] + ; CHECK: %x0 = COPY [[COPY1]] %0(s64) = COPY %x0 %1(s64) = G_BITCAST %0 %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: bitcast_s64_fpr name: bitcast_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: bitcast_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]] + ; CHECK: %d0 = COPY [[COPY1]] %0(s64) = COPY %d0 %1(s64) = G_BITCAST %0 %d0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: bitcast_s64_gpr_fpr name: bitcast_s64_gpr_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: bitcast_s64_gpr_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]] + ; CHECK: %d0 = COPY [[COPY1]] %0(s64) = COPY %x0 %1(s64) = G_BITCAST %0 %d0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: bitcast_s64_fpr_gpr name: bitcast_s64_fpr_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: bitcast_s64_fpr_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] + ; CHECK: %x0 = COPY [[COPY1]] %0(s64) = COPY %d0 %1(s64) = G_BITCAST %0 %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: bitcast_s64_v2f32_fpr name: bitcast_s64_v2f32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: bitcast_s64_v2f32_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]] + ; CHECK: %x0 = COPY [[COPY1]] %0(s64) = COPY %d0 %1(<2 x s32>) = G_BITCAST %0 %x0 = COPY %1(<2 x s32>) ... --- -# CHECK-LABEL: name: bitcast_s64_v8i8_fpr name: bitcast_s64_v8i8_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: bitcast_s64_v8i8_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY [[COPY]] + ; CHECK: %x0 = COPY [[COPY1]] %0(s64) = COPY %d0 %1(<8 x s8>) = G_BITCAST %0 %x0 = COPY %1(<8 x s8>) diff --git a/test/CodeGen/AArch64/GlobalISel/select-br.mir b/test/CodeGen/AArch64/GlobalISel/select-br.mir index f46f190260f64..0d6108fe322d4 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-br.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-br.mir @@ -33,6 +33,7 @@ regBankSelected: true registers: - { id: 0, class: gpr } + - { id: 1, class: gpr } # CHECK: body: # CHECK: bb.0: @@ -41,7 +42,8 @@ registers: body: | bb.0: successors: %bb.0, %bb.1 - %0(s1) = COPY %w0 + %1(s32) = COPY %w0 + %0(s1) = G_TRUNC %1 G_BRCOND %0(s1), %bb.1 G_BR %bb.0 @@ -59,7 +61,7 @@ registers: # CHECK: body: # CHECK: bb.0: -# CHECK: %0 = COPY %x0 +# CHECK: %0:gpr64 = COPY %x0 # CHECK: BR %0 body: | bb.0: diff --git a/test/CodeGen/AArch64/GlobalISel/select-bswap.mir b/test/CodeGen/AArch64/GlobalISel/select-bswap.mir index 56a964f106c41..17394fe86d2c1 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-bswap.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-bswap.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -8,52 +9,44 @@ ... --- -# CHECK-LABEL: name: bswap_s32 name: bswap_s32 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = REVWr %0 -# CHECK: %w0 = COPY %1 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: bswap_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[REVWr:%[0-9]+]]:gpr32 = REVWr [[COPY]] + ; CHECK: %w0 = COPY [[REVWr]] %0(s32) = COPY %w0 %1(s32) = G_BSWAP %0 %w0 = COPY %1 ... --- -# CHECK-LABEL: name: bswap_s64 name: bswap_s64 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = REVXr %0 -# CHECK: %x0 = COPY %1 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: bswap_s64 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[REVXr:%[0-9]+]]:gpr64 = REVXr [[COPY]] + ; CHECK: %x0 = COPY [[REVXr]] %0(s64) = COPY %x0 %1(s64) = G_BSWAP %0 %x0 = COPY %1 diff --git a/test/CodeGen/AArch64/GlobalISel/select-cbz.mir b/test/CodeGen/AArch64/GlobalISel/select-cbz.mir index e13fa1e021d52..f8f0126bdc3da 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-cbz.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-cbz.mir @@ -15,7 +15,7 @@ regBankSelected: true # CHECK: body: # CHECK: bb.0: -# CHECK: %0 = COPY %w0 +# CHECK: %0:gpr32 = COPY %w0 # CHECK: CBZW %0, %bb.1 # CHECK: B %bb.0 body: | @@ -41,7 +41,7 @@ regBankSelected: true # CHECK: body: # CHECK: bb.0: -# CHECK: %0 = COPY %x0 +# CHECK: %0:gpr64 = COPY %x0 # CHECK: CBZX %0, %bb.1 # CHECK: B %bb.0 body: | @@ -67,7 +67,7 @@ regBankSelected: true # CHECK: body: # CHECK: bb.0: -# CHECK: %0 = COPY %w0 +# CHECK: %0:gpr32 = COPY %w0 # CHECK: CBNZW %0, %bb.1 # CHECK: B %bb.0 body: | @@ -93,7 +93,7 @@ regBankSelected: true # CHECK: body: # CHECK: bb.0: -# CHECK: %0 = COPY %x0 +# CHECK: %0:gpr64 = COPY %x0 # CHECK: CBNZX %0, %bb.1 # CHECK: B %bb.0 body: | diff --git a/test/CodeGen/AArch64/GlobalISel/select-constant.mir b/test/CodeGen/AArch64/GlobalISel/select-constant.mir index 1a5bac9fb7d6f..fbe2ef1f2c8a9 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-constant.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-constant.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -8,70 +9,104 @@ define i32 @fconst_s32() { ret i32 42 } define i64 @fconst_s64() { ret i64 1234567890123 } + define float @fconst_s32_0() { ret float 0.0 } + define double @fconst_s64_0() { ret double 0.0 } ... --- -# CHECK-LABEL: name: const_s32 name: const_s32 legalized: true regBankSelected: true registers: - { id: 0, class: gpr } -# CHECK: body: -# CHECK: %0 = MOVi32imm 42 body: | bb.0: + ; CHECK-LABEL: name: const_s32 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 42 + ; CHECK: %w0 = COPY [[MOVi32imm]] %0(s32) = G_CONSTANT i32 42 %w0 = COPY %0(s32) ... --- -# CHECK-LABEL: name: const_s64 name: const_s64 legalized: true regBankSelected: true registers: - { id: 0, class: gpr } -# CHECK: body: -# CHECK: %0 = MOVi64imm 1234567890123 body: | bb.0: + ; CHECK-LABEL: name: const_s64 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 1234567890123 + ; CHECK: %x0 = COPY [[MOVi64imm]] %0(s64) = G_CONSTANT i64 1234567890123 %x0 = COPY %0(s64) ... --- -# CHECK-LABEL: name: fconst_s32 name: fconst_s32 legalized: true regBankSelected: true registers: - { id: 0, class: fpr } -# CHECK: body: -# CHECK: [[TMP:%[0-9]+]] = MOVi32imm 1080033280 -# CHECK: %0 = COPY [[TMP]] body: | bb.0: + ; CHECK-LABEL: name: fconst_s32 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1080033280 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]] + ; CHECK: %s0 = COPY [[COPY]] %0(s32) = G_FCONSTANT float 3.5 %s0 = COPY %0(s32) ... --- -# CHECK-LABEL: name: fconst_s64 name: fconst_s64 legalized: true regBankSelected: true registers: - { id: 0, class: fpr } -# CHECK: body: -# CHECK: [[TMP:%[0-9]+]] = MOVi64imm 4607182418800017408 -# CHECK: %0 = COPY [[TMP]] body: | bb.0: + ; CHECK-LABEL: name: fconst_s64 + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 4607182418800017408 + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY [[MOVi64imm]] + ; CHECK: %d0 = COPY [[COPY]] %0(s64) = G_FCONSTANT double 1.0 %d0 = COPY %0(s64) ... + +--- +name: fconst_s32_0 +legalized: true +regBankSelected: true +registers: + - { id: 0, class: fpr } + +body: | + bb.0: + ; CHECK-LABEL: name: fconst_s32_0 + ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0 + ; CHECK: %s0 = COPY [[FMOVS0_]] + %0(s32) = G_FCONSTANT float 0.0 + %s0 = COPY %0(s32) +... + +--- +name: fconst_s64_0 +legalized: true +regBankSelected: true +registers: + - { id: 0, class: fpr } + +body: | + bb.0: + ; CHECK-LABEL: name: fconst_s64_0 + ; CHECK: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0 + ; CHECK: %x0 = COPY [[FMOVD0_]] + %0(s64) = G_FCONSTANT double 0.0 + %x0 = COPY %0(s64) +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir b/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir index 96245e3ec625d..af83be5c075ed 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-dbg-value.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple arm64-- -run-pass=instruction-select -global-isel %s -o - | FileCheck %s --- | @@ -35,37 +36,33 @@ ... --- -# CHECK-LABEL: name: test_dbg_value name: test_dbg_value legalized: true regBankSelected: true body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: test_dbg_value + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY]] + ; CHECK: %w0 = COPY [[ADDWrr]] + ; CHECK: DBG_VALUE debug-use [[ADDWrr]], debug-use _, !7, !DIExpression(), debug-location !9 %0:gpr(s32) = COPY %w0 %1:gpr(s32) = G_ADD %0, %0 %w0 = COPY %1(s32) - - ; CHECK: %0 = COPY %w0 - ; CHECK-NEXT: %1 = ADDWrr %0, %0 - ; CHECK-NEXT: %w0 = COPY %1 - ; CHECK-NEXT: DBG_VALUE debug-use %1, debug-use _, !7, !DIExpression(), debug-location !9 - DBG_VALUE debug-use %1(s32), debug-use _, !7, !DIExpression(), debug-location !9 ... --- -# CHECK-LABEL: name: test_dbg_value_dead name: test_dbg_value_dead legalized: true regBankSelected: true body: | bb.0: liveins: %w0 - %0:gpr(s32) = COPY %w0 - + ; CHECK-LABEL: name: test_dbg_value_dead ; CHECK-NOT: COPY ; CHECK: DBG_VALUE debug-use _, debug-use _, !7, !DIExpression(), debug-location !9 - + %0:gpr(s32) = COPY %w0 DBG_VALUE debug-use %0(s32), debug-use _, !7, !DIExpression(), debug-location !9 ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-fma.mir b/test/CodeGen/AArch64/GlobalISel/select-fma.mir index 3b2f3746b5877..3e8743c3ce80e 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-fma.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-fma.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -7,35 +8,29 @@ ... --- -# CHECK-LABEL: name: FMADDSrrr_fpr name: FMADDSrrr_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } - { id: 2, class: fpr } - { id: 3, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = COPY %w2 -# CHECK: %3 = FMADDSrrr %0, %1, %2 body: | bb.0: liveins: %w0, %w1, %w2 + ; CHECK-LABEL: name: FMADDSrrr_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %w1 + ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY %w2 + ; CHECK: [[FMADDSrrr:%[0-9]+]]:fpr32 = FMADDSrrr [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK: %w0 = COPY [[FMADDSrrr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = COPY %w2 %3(s32) = G_FMA %0, %1, %2 - %x0 = COPY %3 + %w0 = COPY %3 ... - diff --git a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir index 3c34319355778..a163ba1db3289 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-fp-casts.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -33,550 +34,484 @@ ... --- -# CHECK-LABEL: name: fptrunc_s16_s32_fpr name: fptrunc_s16_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK: - { id: 1, class: fpr16, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = FCVTHSr %0 body: | bb.0: liveins: %s0 + ; CHECK-LABEL: name: fptrunc_s16_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]] + ; CHECK: %h0 = COPY [[FCVTHSr]] %0(s32) = COPY %s0 %1(s16) = G_FPTRUNC %0 %h0 = COPY %1(s16) ... --- -# CHECK-LABEL: name: fptrunc_s16_s64_fpr name: fptrunc_s16_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK: - { id: 1, class: fpr16, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = FCVTHDr %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: fptrunc_s16_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[FCVTHDr:%[0-9]+]]:fpr16 = FCVTHDr [[COPY]] + ; CHECK: %h0 = COPY [[FCVTHDr]] %0(s64) = COPY %d0 %1(s16) = G_FPTRUNC %0 %h0 = COPY %1(s16) ... --- -# CHECK-LABEL: name: fptrunc_s32_s64_fpr name: fptrunc_s32_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = FCVTSDr %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: fptrunc_s32_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[FCVTSDr:%[0-9]+]]:fpr32 = FCVTSDr [[COPY]] + ; CHECK: %s0 = COPY [[FCVTSDr]] %0(s64) = COPY %d0 %1(s32) = G_FPTRUNC %0 %s0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: fpext_s32_s16_fpr name: fpext_s32_s16_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr16, preferred-register: '' } -# CHECK: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %h0 -# CHECK: %1 = FCVTSHr %0 body: | bb.0: liveins: %h0 + ; CHECK-LABEL: name: fpext_s32_s16_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY %h0 + ; CHECK: [[FCVTSHr:%[0-9]+]]:fpr32 = FCVTSHr [[COPY]] + ; CHECK: %s0 = COPY [[FCVTSHr]] %0(s16) = COPY %h0 %1(s32) = G_FPEXT %0 %s0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: fpext_s64_s16_fpr name: fpext_s64_s16_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr16, preferred-register: '' } -# CHECK: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %h0 -# CHECK: %1 = FCVTDHr %0 body: | bb.0: liveins: %h0 + ; CHECK-LABEL: name: fpext_s64_s16_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr16 = COPY %h0 + ; CHECK: [[FCVTDHr:%[0-9]+]]:fpr64 = FCVTDHr [[COPY]] + ; CHECK: %d0 = COPY [[FCVTDHr]] %0(s16) = COPY %h0 %1(s64) = G_FPEXT %0 %d0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: fpext_s64_s32_fpr name: fpext_s64_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = FCVTDSr %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: fpext_s64_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[FCVTDSr:%[0-9]+]]:fpr64 = FCVTDSr [[COPY]] + ; CHECK: %d0 = COPY [[FCVTDSr]] %0(s32) = COPY %s0 %1(s64) = G_FPEXT %0 %d0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: sitofp_s32_s32_fpr name: sitofp_s32_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = SCVTFUWSri %0 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: sitofp_s32_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[SCVTFUWSri:%[0-9]+]]:fpr32 = SCVTFUWSri [[COPY]] + ; CHECK: %s0 = COPY [[SCVTFUWSri]] %0(s32) = COPY %w0 %1(s32) = G_SITOFP %0 %s0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: sitofp_s32_s64_fpr name: sitofp_s32_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = SCVTFUXSri %0 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: sitofp_s32_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[SCVTFUXSri:%[0-9]+]]:fpr32 = SCVTFUXSri [[COPY]] + ; CHECK: %s0 = COPY [[SCVTFUXSri]] %0(s64) = COPY %x0 %1(s32) = G_SITOFP %0 %s0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: sitofp_s64_s32_fpr name: sitofp_s64_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = SCVTFUWDri %0 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: sitofp_s64_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[SCVTFUWDri:%[0-9]+]]:fpr64 = SCVTFUWDri [[COPY]] + ; CHECK: %d0 = COPY [[SCVTFUWDri]] %0(s32) = COPY %w0 %1(s64) = G_SITOFP %0 %d0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: sitofp_s64_s64_fpr name: sitofp_s64_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = SCVTFUXDri %0 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: sitofp_s64_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[SCVTFUXDri:%[0-9]+]]:fpr64 = SCVTFUXDri [[COPY]] + ; CHECK: %d0 = COPY [[SCVTFUXDri]] %0(s64) = COPY %x0 %1(s64) = G_SITOFP %0 %d0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: uitofp_s32_s32_fpr name: uitofp_s32_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = UCVTFUWSri %0 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: uitofp_s32_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[UCVTFUWSri:%[0-9]+]]:fpr32 = UCVTFUWSri [[COPY]] + ; CHECK: %s0 = COPY [[UCVTFUWSri]] %0(s32) = COPY %w0 %1(s32) = G_UITOFP %0 %s0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: uitofp_s32_s64_fpr name: uitofp_s32_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = UCVTFUXSri %0 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: uitofp_s32_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[UCVTFUXSri:%[0-9]+]]:fpr32 = UCVTFUXSri [[COPY]] + ; CHECK: %s0 = COPY [[UCVTFUXSri]] %0(s64) = COPY %x0 %1(s32) = G_UITOFP %0 %s0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: uitofp_s64_s32_fpr name: uitofp_s64_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = UCVTFUWDri %0 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: uitofp_s64_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[UCVTFUWDri:%[0-9]+]]:fpr64 = UCVTFUWDri [[COPY]] + ; CHECK: %d0 = COPY [[UCVTFUWDri]] %0(s32) = COPY %w0 %1(s64) = G_UITOFP %0 %d0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: uitofp_s64_s64_fpr name: uitofp_s64_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = UCVTFUXDri %0 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: uitofp_s64_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[UCVTFUXDri:%[0-9]+]]:fpr64 = UCVTFUXDri [[COPY]] + ; CHECK: %d0 = COPY [[UCVTFUXDri]] %0(s64) = COPY %x0 %1(s64) = G_UITOFP %0 %d0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: fptosi_s32_s32_gpr name: fptosi_s32_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = FCVTZSUWSr %0 body: | bb.0: liveins: %s0 + ; CHECK-LABEL: name: fptosi_s32_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[FCVTZSUWSr:%[0-9]+]]:gpr32 = FCVTZSUWSr [[COPY]] + ; CHECK: %w0 = COPY [[FCVTZSUWSr]] %0(s32) = COPY %s0 %1(s32) = G_FPTOSI %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: fptosi_s32_s64_gpr name: fptosi_s32_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = FCVTZSUWDr %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: fptosi_s32_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[FCVTZSUWDr:%[0-9]+]]:gpr32 = FCVTZSUWDr [[COPY]] + ; CHECK: %w0 = COPY [[FCVTZSUWDr]] %0(s64) = COPY %d0 %1(s32) = G_FPTOSI %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: fptosi_s64_s32_gpr name: fptosi_s64_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = FCVTZSUXSr %0 body: | bb.0: liveins: %s0 + ; CHECK-LABEL: name: fptosi_s64_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[FCVTZSUXSr:%[0-9]+]]:gpr64 = FCVTZSUXSr [[COPY]] + ; CHECK: %x0 = COPY [[FCVTZSUXSr]] %0(s32) = COPY %s0 %1(s64) = G_FPTOSI %0 %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: fptosi_s64_s64_gpr name: fptosi_s64_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = FCVTZSUXDr %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: fptosi_s64_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[FCVTZSUXDr:%[0-9]+]]:gpr64 = FCVTZSUXDr [[COPY]] + ; CHECK: %x0 = COPY [[FCVTZSUXDr]] %0(s64) = COPY %d0 %1(s64) = G_FPTOSI %0 %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: fptoui_s32_s32_gpr name: fptoui_s32_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = FCVTZUUWSr %0 body: | bb.0: liveins: %s0 + ; CHECK-LABEL: name: fptoui_s32_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[FCVTZUUWSr:%[0-9]+]]:gpr32 = FCVTZUUWSr [[COPY]] + ; CHECK: %w0 = COPY [[FCVTZUUWSr]] %0(s32) = COPY %s0 %1(s32) = G_FPTOUI %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: fptoui_s32_s64_gpr name: fptoui_s32_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = FCVTZUUWDr %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: fptoui_s32_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[FCVTZUUWDr:%[0-9]+]]:gpr32 = FCVTZUUWDr [[COPY]] + ; CHECK: %w0 = COPY [[FCVTZUUWDr]] %0(s64) = COPY %d0 %1(s32) = G_FPTOUI %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: fptoui_s64_s32_gpr name: fptoui_s64_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %s0 -# CHECK: %1 = FCVTZUUXSr %0 body: | bb.0: liveins: %s0 + ; CHECK-LABEL: name: fptoui_s64_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY %s0 + ; CHECK: [[FCVTZUUXSr:%[0-9]+]]:gpr64 = FCVTZUUXSr [[COPY]] + ; CHECK: %x0 = COPY [[FCVTZUUXSr]] %0(s32) = COPY %s0 %1(s64) = G_FPTOUI %0 %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: fptoui_s64_s64_gpr name: fptoui_s64_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %1 = FCVTZUUXDr %0 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: fptoui_s64_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[FCVTZUUXDr:%[0-9]+]]:gpr64 = FCVTZUUXDr [[COPY]] + ; CHECK: %x0 = COPY [[FCVTZUUXDr]] %0(s64) = COPY %d0 %1(s64) = G_FPTOUI %0 %x0 = COPY %1(s64) diff --git a/test/CodeGen/AArch64/GlobalISel/select-imm.mir b/test/CodeGen/AArch64/GlobalISel/select-imm.mir index 1fc20ff98f7fa..28fb4b396531b 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-imm.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-imm.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -10,44 +11,40 @@ --- # Check that we select a 32-bit immediate into a MOVi32imm. -# CHECK-LABEL: name: imm_s32_gpr name: imm_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } -# CHECK: body: -# CHECK: %0 = MOVi32imm -1234 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: imm_s32_gpr + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm -1234 + ; CHECK: %w0 = COPY [[MOVi32imm]] %0(s32) = G_CONSTANT i32 -1234 %w0 = COPY %0(s32) ... --- # Check that we select a 64-bit immediate into a MOVi64imm. -# CHECK-LABEL: name: imm_s64_gpr name: imm_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } -# CHECK: body: -# CHECK: %0 = MOVi64imm 1234 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: imm_s64_gpr + ; CHECK: [[MOVi64imm:%[0-9]+]]:gpr64 = MOVi64imm 1234 + ; CHECK: %x0 = COPY [[MOVi64imm]] %0(s64) = G_CONSTANT i64 1234 - %w0 = COPY %0(s64) + %x0 = COPY %0(s64) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir b/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir index 8604b2769ba30..7b65fe3bf7dc9 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-implicit-def.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -7,23 +8,19 @@ ... --- -# CHECK-LABEL: name: implicit_def name: implicit_def legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: [[DEF:%[0-9]+]] = IMPLICIT_DEF -# CHECK: [[ADD:%[0-9]+]] = ADDWrr [[DEF]], [[DEF]] -# CHECK: %w0 = COPY [[ADD]] body: | bb.0: + ; CHECK-LABEL: name: implicit_def + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[DEF]], [[DEF]] + ; CHECK: %w0 = COPY [[ADDWrr]] %0(s32) = G_IMPLICIT_DEF %1(s32) = G_ADD %0, %0 %w0 = COPY %1(s32) diff --git a/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir b/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir index e88e151bd2483..c7b7ec9b6fe6d 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir @@ -15,12 +15,12 @@ body: | %1:gpr(s64) = G_IMPLICIT_DEF ; CHECK: body: - ; CHECK: [[TMP:%[0-9]+]] = SUBREG_TO_REG 0, %0, 15 - ; CHECK: %2 = BFMXri %1, [[TMP]], 0, 31 + ; CHECK: [[TMP:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %0, 15 + ; CHECK: %2:gpr64 = BFMXri %1, [[TMP]], 0, 31 %2:gpr(s64) = G_INSERT %1, %0, 0 - ; CHECK: [[TMP:%[0-9]+]] = SUBREG_TO_REG 0, %0, 15 - ; CHECK: %3 = BFMXri %1, [[TMP]], 51, 31 + ; CHECK: [[TMP:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %0, 15 + ; CHECK: %3:gpr64 = BFMXri %1, [[TMP]], 51, 31 %3:gpr(s64) = G_INSERT %1, %0, 13 %x0 = COPY %2 @@ -41,12 +41,12 @@ body: | %0:gpr(s64) = COPY %x0 ; CHECK: body: - ; CHECK: [[TMP:%[0-9]+]] = UBFMXri %0, 0, 31 - ; CHECK: %1 = COPY [[TMP]].sub_32 + ; CHECK: [[TMP:%[0-9]+]]:gpr64 = UBFMXri %0, 0, 31 + ; CHECK: %1:gpr32 = COPY [[TMP]].sub_32 %1:gpr(s32) = G_EXTRACT %0, 0 - ; CHECK: [[TMP:%[0-9]+]] = UBFMXri %0, 13, 44 - ; CHECK: %2 = COPY [[TMP]].sub_32 + ; CHECK: [[TMP:%[0-9]+]]:gpr64 = UBFMXri %0, 13, 44 + ; CHECK: %2:gpr32 = COPY [[TMP]].sub_32 %2:gpr(s32) = G_EXTRACT %0, 13 %w0 = COPY %1 diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir index 5f29f8b62fab1..2c2e475a87a86 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-int-ext.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -18,257 +19,242 @@ ... --- -# CHECK-LABEL: name: anyext_s64_from_s32 name: anyext_s64_from_s32 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %2 = SUBREG_TO_REG 0, %0, 15 -# CHECK: %1 = COPY %2 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: anyext_s64_from_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %w0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[COPY]], 15 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[SUBREG_TO_REG]] + ; CHECK: %x0 = COPY [[COPY1]] %0(s32) = COPY %w0 %1(s64) = G_ANYEXT %0 %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: anyext_s32_from_s8 name: anyext_s32_from_s8 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32all, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %w0 - %0(s8) = COPY %w0 + ; CHECK-LABEL: name: anyext_s32_from_s8 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] + ; CHECK: %w0 = COPY [[COPY2]] + %2:gpr(s32) = COPY %w0 + %0(s8) = G_TRUNC %2 %1(s32) = G_ANYEXT %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: zext_s64_from_s32 name: zext_s64_from_s32 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %2 = SUBREG_TO_REG 0, %0, 15 -# CHECK: %1 = UBFMXri %2, 0, 31 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: zext_s64_from_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], 15 + ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: %x0 = COPY [[UBFMXri]] %0(s32) = COPY %w0 %1(s64) = G_ZEXT %0 %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: zext_s32_from_s16 name: zext_s32_from_s16 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = UBFMWri %0, 0, 15 body: | bb.0: liveins: %w0 - %0(s16) = COPY %w0 + ; CHECK-LABEL: name: zext_s32_from_s16 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 15 + ; CHECK: %w0 = COPY [[UBFMWri]] + %2:gpr(s32) = COPY %w0 + %0(s16) = G_TRUNC %2 %1(s32) = G_ZEXT %0 %w0 = COPY %1 ... --- -# CHECK-LABEL: name: zext_s32_from_s8 name: zext_s32_from_s8 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = UBFMWri %0, 0, 7 body: | bb.0: liveins: %w0 - %0(s8) = COPY %w0 + ; CHECK-LABEL: name: zext_s32_from_s8 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 15 + ; CHECK: %w0 = COPY [[UBFMWri]] + %2:gpr(s32) = COPY %w0 + %0(s16) = G_TRUNC %2 %1(s32) = G_ZEXT %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: zext_s16_from_s8 name: zext_s16_from_s8 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = UBFMWri %0, 0, 7 body: | bb.0: liveins: %w0 - %0(s8) = COPY %w0 + ; CHECK-LABEL: name: zext_s16_from_s8 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[COPY1]], 0, 7 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[UBFMWri]] + ; CHECK: %w0 = COPY [[COPY2]] + %2:gpr(s32) = COPY %w0 + %0(s8) = G_TRUNC %2 %1(s16) = G_ZEXT %0 - %w0 = COPY %1(s16) + %3:gpr(s32) = G_ANYEXT %1 + %w0 = COPY %3(s32) ... --- -# CHECK-LABEL: name: sext_s64_from_s32 name: sext_s64_from_s32 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %2 = SUBREG_TO_REG 0, %0, 15 -# CHECK: %1 = SBFMXri %2, 0, 31 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: sext_s64_from_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[COPY]], 15 + ; CHECK: [[SBFMXri:%[0-9]+]]:gpr64 = SBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: %x0 = COPY [[SBFMXri]] %0(s32) = COPY %w0 %1(s64) = G_SEXT %0 %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: sext_s32_from_s16 name: sext_s32_from_s16 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = SBFMWri %0, 0, 15 body: | bb.0: liveins: %w0 - %0(s16) = COPY %w0 + ; CHECK-LABEL: name: sext_s32_from_s16 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[SBFMWri:%[0-9]+]]:gpr32 = SBFMWri [[COPY1]], 0, 15 + ; CHECK: %w0 = COPY [[SBFMWri]] + %2:gpr(s32) = COPY %w0 + %0(s16) = G_TRUNC %2 %1(s32) = G_SEXT %0 %w0 = COPY %1 ... --- -# CHECK-LABEL: name: sext_s32_from_s8 name: sext_s32_from_s8 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = SBFMWri %0, 0, 7 body: | bb.0: liveins: %w0 - %0(s8) = COPY %w0 + ; CHECK-LABEL: name: sext_s32_from_s8 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[SBFMWri:%[0-9]+]]:gpr32 = SBFMWri [[COPY1]], 0, 7 + ; CHECK: %w0 = COPY [[SBFMWri]] + %2:gpr(s32) = COPY %w0 + %0(s8) = G_TRUNC %2 %1(s32) = G_SEXT %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: sext_s16_from_s8 name: sext_s16_from_s8 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = SBFMWri %0, 0, 7 body: | bb.0: liveins: %w0 - %0(s8) = COPY %w0 + ; CHECK-LABEL: name: sext_s16_from_s8 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[SBFMWri:%[0-9]+]]:gpr32 = SBFMWri [[COPY1]], 0, 7 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[SBFMWri]] + ; CHECK: %w0 = COPY [[COPY2]] + %2:gpr(s32) = COPY %w0 + %0(s8) = G_TRUNC %2 %1(s16) = G_SEXT %0 - %w0 = COPY %1(s16) + %3:gpr(s32) = G_ANYEXT %1 + %w0 = COPY %3(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir index b71a9a3d731eb..405634a00aa72 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-int-ptr-casts.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -12,139 +13,127 @@ ... --- -# CHECK-LABEL: name: inttoptr_p0_s64 name: inttoptr_p0_s64 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64all, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64all, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: inttoptr_p0_s64 + ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[COPY]] + ; CHECK: %x0 = COPY [[COPY1]] %0(s64) = COPY %x0 %1(p0) = G_INTTOPTR %0 %x0 = COPY %1(p0) ... --- -# CHECK-LABEL: name: ptrtoint_s64_p0 name: ptrtoint_s64_p0 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: ptrtoint_s64_p0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY [[COPY]] + ; CHECK: %x0 = COPY [[COPY1]] %0(p0) = COPY %x0 %1(s64) = G_PTRTOINT %0 %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: ptrtoint_s32_p0 name: ptrtoint_s32_p0 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %0.sub_32 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: ptrtoint_s32_p0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32 + ; CHECK: %w0 = COPY [[COPY1]] %0(p0) = COPY %x0 %1(s32) = G_PTRTOINT %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: ptrtoint_s16_p0 name: ptrtoint_s16_p0 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %0.sub_32 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: ptrtoint_s16_p0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] + ; CHECK: %w0 = COPY [[COPY2]] %0(p0) = COPY %x0 %1(s16) = G_PTRTOINT %0 - %w0 = COPY %1(s16) + %2:gpr(s32) = G_ANYEXT %1 + %w0 = COPY %2(s32) ... --- -# CHECK-LABEL: name: ptrtoint_s8_p0 name: ptrtoint_s8_p0 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %0.sub_32 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: ptrtoint_s8_p0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] + ; CHECK: %w0 = COPY [[COPY2]] %0(p0) = COPY %x0 %1(s8) = G_PTRTOINT %0 - %w0 = COPY %1(s8) + %2:gpr(s32) = G_ANYEXT %1 + %w0 = COPY %2(s32) ... --- -# CHECK-LABEL: name: ptrtoint_s1_p0 name: ptrtoint_s1_p0 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %0.sub_32 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: ptrtoint_s1_p0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] + ; CHECK: %w0 = COPY [[COPY2]] %0(p0) = COPY %x0 %1(s1) = G_PTRTOINT %0 - %w0 = COPY %1(s1) + %2:gpr(s32) = G_ANYEXT %1 + %w0 = COPY %2(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-sdiv.mir b/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-sdiv.mir index 43e682c6b6ca5..0387d7ab8ba47 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-sdiv.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-intrinsic-aarch64-sdiv.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -9,28 +10,24 @@ --- # Check that we select a 32-bit GPR sdiv intrinsic into SDIVWrr for GPR32. # Also check that we constrain the register class of the COPY to GPR32. -# CHECK-LABEL: name: sdiv_s32_gpr name: sdiv_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = SDIVWr %0, %1 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: sdiv_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[SDIVWr:%[0-9]+]]:gpr32 = SDIVWr [[COPY]], [[COPY1]] + ; CHECK: %w0 = COPY [[SDIVWr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_INTRINSIC intrinsic(@llvm.aarch64.sdiv.i32), %0, %1 diff --git a/test/CodeGen/AArch64/GlobalISel/select-load.mir b/test/CodeGen/AArch64/GlobalISel/select-load.mir index d00b98d148be5..00f6c9418b7b2 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-load.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-load.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -28,117 +29,106 @@ define void @load_gep_64_s16_fpr(i16* %addr) { ret void } define void @load_gep_32_s8_fpr(i8* %addr) { ret void } + define void @load_v2s32(i64 *%addr) { ret void } ... --- -# CHECK-LABEL: name: load_s64_gpr name: load_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = LDRXui %0, 0 :: (load 8 from %ir.addr) body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 0 :: (load 8 from %ir.addr) + ; CHECK: %x0 = COPY [[LDRXui]] %0(p0) = COPY %x0 %1(s64) = G_LOAD %0 :: (load 8 from %ir.addr) %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: load_s32_gpr name: load_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = LDRWui %0, 0 :: (load 4 from %ir.addr) body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 0 :: (load 4 from %ir.addr) + ; CHECK: %w0 = COPY [[LDRWui]] %0(p0) = COPY %x0 %1(s32) = G_LOAD %0 :: (load 4 from %ir.addr) %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: load_s16_gpr name: load_s16_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = LDRHHui %0, 0 :: (load 2 from %ir.addr) body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_s16_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] + ; CHECK: %w0 = COPY [[COPY1]] %0(p0) = COPY %x0 %1(s16) = G_LOAD %0 :: (load 2 from %ir.addr) - %w0 = COPY %1(s16) + %2:gpr(s32) = G_ANYEXT %1 + %w0 = COPY %2(s32) ... --- -# CHECK-LABEL: name: load_s8_gpr name: load_s8_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = LDRBBui %0, 0 :: (load 1 from %ir.addr) body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_s8_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 0 :: (load 1 from %ir.addr) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] + ; CHECK: %w0 = COPY [[COPY1]] %0(p0) = COPY %x0 %1(s8) = G_LOAD %0 :: (load 1 from %ir.addr) - %w0 = COPY %1(s8) + %2:gpr(s32) = G_ANYEXT %1 + %w0 = COPY %2(s32) ... --- -# CHECK-LABEL: name: load_fi_s64_gpr name: load_fi_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -146,43 +136,37 @@ registers: stack: - { id: 0, name: ptr0, offset: 0, size: 8, alignment: 8 } -# CHECK: body: -# CHECK: %1 = LDRXui %stack.0.ptr0, 0 :: (load 8) -# CHECK: %x0 = COPY %1 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_fi_s64_gpr + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui %stack.0.ptr0, 0 :: (load 8) + ; CHECK: %x0 = COPY [[LDRXui]] %0(p0) = G_FRAME_INDEX %stack.0.ptr0 %1(s64) = G_LOAD %0 :: (load 8) %x0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: load_gep_128_s64_gpr name: load_gep_128_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %3 = LDRXui %0, 16 :: (load 8 from %ir.addr) -# CHECK: %x0 = COPY %3 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_gep_128_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[COPY]], 16 :: (load 8 from %ir.addr) + ; CHECK: %x0 = COPY [[LDRXui]] %0(p0) = COPY %x0 %1(s64) = G_CONSTANT i64 128 %2(p0) = G_GEP %0, %1 @@ -191,30 +175,24 @@ body: | ... --- -# CHECK-LABEL: name: load_gep_512_s32_gpr name: load_gep_512_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %3 = LDRWui %0, 128 :: (load 4 from %ir.addr) -# CHECK: %w0 = COPY %3 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_gep_512_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRWui:%[0-9]+]]:gpr32 = LDRWui [[COPY]], 128 :: (load 4 from %ir.addr) + ; CHECK: %w0 = COPY [[LDRWui]] %0(p0) = COPY %x0 %1(s64) = G_CONSTANT i64 512 %2(p0) = G_GEP %0, %1 @@ -223,194 +201,168 @@ body: | ... --- -# CHECK-LABEL: name: load_gep_64_s16_gpr name: load_gep_64_s16_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %3 = LDRHHui %0, 32 :: (load 2 from %ir.addr) -# CHECK: %w0 = COPY %3 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_gep_64_s16_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 32 :: (load 2 from %ir.addr) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRHHui]] + ; CHECK: %w0 = COPY [[COPY1]] %0(p0) = COPY %x0 %1(s64) = G_CONSTANT i64 64 %2(p0) = G_GEP %0, %1 %3(s16) = G_LOAD %2 :: (load 2 from %ir.addr) - %w0 = COPY %3 + %4:gpr(s32) = G_ANYEXT %3 + %w0 = COPY %4 ... --- -# CHECK-LABEL: name: load_gep_1_s8_gpr name: load_gep_1_s8_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %3 = LDRBBui %0, 1 :: (load 1 from %ir.addr) -# CHECK: %w0 = COPY %3 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_gep_1_s8_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRBBui:%[0-9]+]]:gpr32 = LDRBBui [[COPY]], 1 :: (load 1 from %ir.addr) + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[LDRBBui]] + ; CHECK: %w0 = COPY [[COPY1]] %0(p0) = COPY %x0 %1(s64) = G_CONSTANT i64 1 %2(p0) = G_GEP %0, %1 %3(s8) = G_LOAD %2 :: (load 1 from %ir.addr) - %w0 = COPY %3 + %4:gpr(s32) = G_ANYEXT %3 + %w0 = COPY %4 ... --- -# CHECK-LABEL: name: load_s64_fpr name: load_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = LDRDui %0, 0 :: (load 8 from %ir.addr) body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load 8 from %ir.addr) + ; CHECK: %d0 = COPY [[LDRDui]] %0(p0) = COPY %x0 %1(s64) = G_LOAD %0 :: (load 8 from %ir.addr) %d0 = COPY %1(s64) ... --- -# CHECK-LABEL: name: load_s32_fpr name: load_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = LDRSui %0, 0 :: (load 4 from %ir.addr) body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 0 :: (load 4 from %ir.addr) + ; CHECK: %s0 = COPY [[LDRSui]] %0(p0) = COPY %x0 %1(s32) = G_LOAD %0 :: (load 4 from %ir.addr) %s0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: load_s16_fpr name: load_s16_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = LDRHui %0, 0 :: (load 2 from %ir.addr) body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_s16_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 0 :: (load 2 from %ir.addr) + ; CHECK: %h0 = COPY [[LDRHui]] %0(p0) = COPY %x0 %1(s16) = G_LOAD %0 :: (load 2 from %ir.addr) %h0 = COPY %1(s16) ... --- -# CHECK-LABEL: name: load_s8_fpr name: load_s8_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = LDRBui %0, 0 :: (load 1 from %ir.addr) body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_s8_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 0 :: (load 1 from %ir.addr) + ; CHECK: %b0 = COPY [[LDRBui]] %0(p0) = COPY %x0 %1(s8) = G_LOAD %0 :: (load 1 from %ir.addr) %b0 = COPY %1(s8) ... --- -# CHECK-LABEL: name: load_gep_8_s64_fpr name: load_gep_8_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %3 = LDRDui %0, 1 :: (load 8 from %ir.addr) -# CHECK: %d0 = COPY %3 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_gep_8_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 1 :: (load 8 from %ir.addr) + ; CHECK: %d0 = COPY [[LDRDui]] %0(p0) = COPY %x0 %1(s64) = G_CONSTANT i64 8 %2(p0) = G_GEP %0, %1 @@ -419,30 +371,24 @@ body: | ... --- -# CHECK-LABEL: name: load_gep_16_s32_fpr name: load_gep_16_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %3 = LDRSui %0, 4 :: (load 4 from %ir.addr) -# CHECK: %s0 = COPY %3 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_gep_16_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRSui:%[0-9]+]]:fpr32 = LDRSui [[COPY]], 4 :: (load 4 from %ir.addr) + ; CHECK: %s0 = COPY [[LDRSui]] %0(p0) = COPY %x0 %1(s64) = G_CONSTANT i64 16 %2(p0) = G_GEP %0, %1 @@ -451,30 +397,24 @@ body: | ... --- -# CHECK-LABEL: name: load_gep_64_s16_fpr name: load_gep_64_s16_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: fpr16, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %3 = LDRHui %0, 32 :: (load 2 from %ir.addr) -# CHECK: %h0 = COPY %3 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_gep_64_s16_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRHui:%[0-9]+]]:fpr16 = LDRHui [[COPY]], 32 :: (load 2 from %ir.addr) + ; CHECK: %h0 = COPY [[LDRHui]] %0(p0) = COPY %x0 %1(s64) = G_CONSTANT i64 64 %2(p0) = G_GEP %0, %1 @@ -483,33 +423,48 @@ body: | ... --- -# CHECK-LABEL: name: load_gep_32_s8_fpr name: load_gep_32_s8_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: fpr8, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %3 = LDRBui %0, 32 :: (load 1 from %ir.addr) -# CHECK: %b0 = COPY %3 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: load_gep_32_s8_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRBui:%[0-9]+]]:fpr8 = LDRBui [[COPY]], 32 :: (load 1 from %ir.addr) + ; CHECK: %b0 = COPY [[LDRBui]] %0(p0) = COPY %x0 %1(s64) = G_CONSTANT i64 32 %2(p0) = G_GEP %0, %1 %3(s8) = G_LOAD %2 :: (load 1 from %ir.addr) %b0 = COPY %3 ... +--- +name: load_v2s32 +legalized: true +regBankSelected: true + +registers: + - { id: 0, class: gpr } + - { id: 1, class: fpr } + +body: | + bb.0: + liveins: %x0 + + ; CHECK-LABEL: name: load_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[COPY]], 0 :: (load 8 from %ir.addr) + ; CHECK: %d0 = COPY [[LDRDui]] + %0(p0) = COPY %x0 + %1(<2 x s32>) = G_LOAD %0 :: (load 8 from %ir.addr) + %d0 = COPY %1(<2 x s32>) +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir index cd7a79f17d952..0771504032c5e 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-muladd.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-muladd.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -7,19 +8,10 @@ ... --- -# CHECK-LABEL: name: SMADDLrrr_gpr name: SMADDLrrr_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 4, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 5, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 6, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -29,15 +21,16 @@ registers: - { id: 5, class: gpr } - { id: 6, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = COPY %w2 -# CHECK: %6 = SMADDLrrr %1, %2, %0 body: | bb.0: liveins: %x0, %w1, %w2 + ; CHECK-LABEL: name: SMADDLrrr_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY %w2 + ; CHECK: [[SMADDLrrr:%[0-9]+]]:gpr64 = SMADDLrrr [[COPY1]], [[COPY2]], [[COPY]] + ; CHECK: %x0 = COPY [[SMADDLrrr]] %0(s64) = COPY %x0 %1(s32) = COPY %w1 %2(s32) = COPY %w2 diff --git a/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir b/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir index a7a33acab2597..def06daae0b42 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-neon-vcvtfxu2fp.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -mattr=+neon,+fullfp16 -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -8,28 +9,23 @@ --- # Check that we select a 64-bit FPR vcvtfxu2fp intrinsic into UCVTFd for FPR64. -# CHECK-LABEL: name: vcvtfxu2fp_s64_fpr name: vcvtfxu2fp_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } registers: - { id: 0, class: fpr } - { id: 1, class: gpr } - { id: 2, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %d0 -# CHECK: %2 = UCVTFd %0, 12 -# CHECK: %d1 = COPY %2 body: | bb.0: liveins: %d0 + ; CHECK-LABEL: name: vcvtfxu2fp_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY %d0 + ; CHECK: [[UCVTFd:%[0-9]+]]:fpr64 = UCVTFd [[COPY]], 12 + ; CHECK: %d1 = COPY [[UCVTFd]] %0(s64) = COPY %d0 %1(s32) = G_CONSTANT i32 12 %2(s64) = G_INTRINSIC intrinsic(@llvm.aarch64.neon.vcvtfxu2fp.f64), %0, %1 diff --git a/test/CodeGen/AArch64/GlobalISel/select-phi.mir b/test/CodeGen/AArch64/GlobalISel/select-phi.mir index 5a56e79c46e0b..3454ffadcce0a 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-phi.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-phi.mir @@ -4,25 +4,30 @@ source_filename = "/tmp/test.ll" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-unknown" - + define i32 @test_phi(i32 %argc) { entry: %cmp = icmp ugt i32 %argc, 0 br i1 %cmp, label %case1, label %case2 - + case1: ; preds = %entry %tmp1 = add i32 %argc, 1 br label %return - + case2: ; preds = %entry %tmp2 = add i32 %argc, 2 br label %return - + return: ; preds = %case2, %case1 %res = phi i32 [ %tmp1, %case1 ], [ %tmp2, %case2 ] ret i32 %res } + define i64* @test_phi_ptr(i64* %a, i64* %b, i1 %cond) { + entry: + ret i64* null + } + ... --- name: test_phi @@ -32,7 +37,7 @@ legalized: true regBankSelected: true selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr, preferred-register: '' } - { id: 1, class: gpr, preferred-register: '' } - { id: 2, class: gpr, preferred-register: '' } @@ -42,14 +47,14 @@ registers: - { id: 6, class: gpr, preferred-register: '' } - { id: 7, class: gpr, preferred-register: '' } - { id: 8, class: gpr, preferred-register: '' } -liveins: +liveins: body: | bb.1.entry: successors: %bb.2.case1(0x40000000), %bb.3.case2(0x40000000) liveins: %w0 ; CHECK-LABEL: name: test_phi - ; CHECK: [[RES:%.*]] = PHI - + ; CHECK: [[RES:%.*]]:gpr32 = PHI + %0(s32) = COPY %w0 %1(s32) = G_CONSTANT i32 0 %3(s32) = G_CONSTANT i32 1 @@ -58,21 +63,62 @@ body: | %2(s1) = G_TRUNC %8(s32) G_BRCOND %2(s1), %bb.2.case1 G_BR %bb.3.case2 - + bb.2.case1: successors: %bb.4.return(0x80000000) - + %4(s32) = G_ADD %0, %3 G_BR %bb.4.return - + bb.3.case2: successors: %bb.4.return(0x80000000) - + %6(s32) = G_ADD %0, %5 - + bb.4.return: %7(s32) = G_PHI %4(s32), %bb.2.case1, %6(s32), %bb.3.case2 %w0 = COPY %7(s32) RET_ReallyLR implicit %w0 ... + +--- +name: test_phi_ptr +alignment: 2 +exposesReturnsTwice: false +legalized: true +regBankSelected: true +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr, preferred-register: '' } + - { id: 1, class: gpr, preferred-register: '' } + - { id: 2, class: gpr, preferred-register: '' } + - { id: 3, class: gpr, preferred-register: '' } + - { id: 4, class: _, preferred-register: '' } + - { id: 5, class: _, preferred-register: '' } +liveins: +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %w2, %x0, %x1 + ; CHECK-LABEL: name: test_phi_ptr + + %0(p0) = COPY %x0 + %1(p0) = COPY %x1 + %6:gpr(s32) = COPY %w2 + %2(s1) = G_TRUNC %6 + G_BRCOND %2(s1), %bb.1 + G_BR %bb.2 + + bb.1: + successors: %bb.2 + + + bb.2: + ; CHECK: %{{[0-9]+}}:gpr64 = PHI %{{[0-9]+}}, %bb.0, %{{[0-9]+}}, %bb.1 + %3(p0) = G_PHI %0(p0), %bb.0, %1(p0), %bb.1 + %x0 = COPY %3(p0) + RET_ReallyLR implicit %x0 + +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir b/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir index c35d1719f84c8..5e0ead2dbdb37 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-pr32733.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -5,11 +6,10 @@ entry: ret i32 0 } - + declare i32 @printf(i8*, ...) ... --- -# CHECK-LABEL: name: main name: main alignment: 2 exposesReturnsTwice: false @@ -17,7 +17,7 @@ legalized: true regBankSelected: true selected: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } @@ -34,7 +34,7 @@ registers: - { id: 13, class: gpr } - { id: 14, class: gpr } - { id: 15, class: gpr } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -48,13 +48,15 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false -# CHECK: body: -# CHECK: %1 = COPY %w0 -# CHECK-NOT: %2 = ORNWrr %wzr, %1 -# CHECK: %4 = EONWrr %1, %3 body: | bb.1.entry: liveins: %w0 + ; CHECK-LABEL: name: main + ; CHECK: liveins: %w0 + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[EONWrr:%[0-9]+]]:gpr32 = EONWrr [[COPY]], [[MOVi32imm]] + ; CHECK: %w0 = COPY [[EONWrr]] %0(s32) = G_CONSTANT i32 -1 %3(s32) = G_CONSTANT i32 1 %1(s32) = COPY %w0 diff --git a/test/CodeGen/AArch64/GlobalISel/select-store.mir b/test/CodeGen/AArch64/GlobalISel/select-store.mir index 536e236c27387..11710031e21c9 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-store.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-store.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -26,29 +27,27 @@ define void @store_gep_8_s64_fpr(i64* %addr) { ret void } define void @store_gep_8_s32_fpr(i32* %addr) { ret void } + + define void @store_v2s32(i64 *%addr) { ret void } ... --- -# CHECK-LABEL: name: store_s64_gpr name: store_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: STRXui %1, %0, 0 :: (store 8 into %ir.addr) body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: store_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: STRXui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) %0(p0) = COPY %x0 %1(s64) = COPY %x1 G_STORE %1, %0 :: (store 8 into %ir.addr) @@ -56,26 +55,22 @@ body: | ... --- -# CHECK-LABEL: name: store_s32_gpr name: store_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %w1 -# CHECK: STRWui %1, %0, 0 :: (store 4 into %ir.addr) body: | bb.0: liveins: %x0, %w1 + ; CHECK-LABEL: name: store_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: STRWui [[COPY1]], [[COPY]], 0 :: (store 4 into %ir.addr) %0(p0) = COPY %x0 %1(s32) = COPY %w1 G_STORE %1, %0 :: (store 4 into %ir.addr) @@ -83,79 +78,71 @@ body: | ... --- -# CHECK-LABEL: name: store_s16_gpr name: store_s16_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %w1 -# CHECK: STRHHui %1, %0, 0 :: (store 2 into %ir.addr) body: | bb.0: liveins: %x0, %w1 + ; CHECK-LABEL: name: store_s16_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] + ; CHECK: STRHHui [[COPY2]], [[COPY]], 0 :: (store 2 into %ir.addr) %0(p0) = COPY %x0 - %1(s16) = COPY %w1 + %2:gpr(s32) = COPY %w1 + %1(s16) = G_TRUNC %2 G_STORE %1, %0 :: (store 2 into %ir.addr) ... --- -# CHECK-LABEL: name: store_s8_gpr name: store_s8_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %w1 -# CHECK: STRBBui %1, %0, 0 :: (store 1 into %ir.addr) body: | bb.0: liveins: %x0, %w1 + ; CHECK-LABEL: name: store_s8_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] + ; CHECK: STRBBui [[COPY2]], [[COPY]], 0 :: (store 1 into %ir.addr) %0(p0) = COPY %x0 - %1(s8) = COPY %w1 + %2:gpr(s32) = COPY %w1 + %1(s8) = G_TRUNC %2 G_STORE %1, %0 :: (store 1 into %ir.addr) ... --- -# CHECK-LABEL: name: store_zero_s64_gpr name: store_zero_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: STRXui %xzr, %0, 0 :: (store 8 into %ir.addr) body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: store_zero_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: STRXui %xzr, [[COPY]], 0 :: (store 8 into %ir.addr) %0(p0) = COPY %x0 %1(s64) = G_CONSTANT i64 0 G_STORE %1, %0 :: (store 8 into %ir.addr) @@ -163,25 +150,21 @@ body: | ... --- -# CHECK-LABEL: name: store_zero_s32_gpr name: store_zero_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: STRWui %wzr, %0, 0 :: (store 4 into %ir.addr) body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: store_zero_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: STRWui %wzr, [[COPY]], 0 :: (store 4 into %ir.addr) %0(p0) = COPY %x0 %1(s32) = G_CONSTANT i32 0 G_STORE %1, %0 :: (store 4 into %ir.addr) @@ -189,14 +172,10 @@ body: | ... --- -# CHECK-LABEL: name: store_fi_s64_gpr name: store_fi_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } @@ -204,43 +183,37 @@ registers: stack: - { id: 0, name: ptr0, offset: 0, size: 8, alignment: 8 } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: STRXui %0, %stack.0.ptr0, 0 :: (store 8) body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: store_fi_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: STRXui [[COPY]], %stack.0.ptr0, 0 :: (store 8) %0(p0) = COPY %x0 %1(p0) = G_FRAME_INDEX %stack.0.ptr0 G_STORE %0, %1 :: (store 8) ... --- -# CHECK-LABEL: name: store_gep_128_s64_gpr name: store_gep_128_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: STRXui %1, %0, 16 :: (store 8 into %ir.addr) body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: store_gep_128_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: STRXui [[COPY1]], [[COPY]], 16 :: (store 8 into %ir.addr) %0(p0) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_CONSTANT i64 128 @@ -249,30 +222,24 @@ body: | ... --- -# CHECK-LABEL: name: store_gep_512_s32_gpr name: store_gep_512_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %w1 -# CHECK: STRWui %1, %0, 128 :: (store 4 into %ir.addr) body: | bb.0: liveins: %x0, %w1 + ; CHECK-LABEL: name: store_gep_512_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: STRWui [[COPY1]], [[COPY]], 128 :: (store 4 into %ir.addr) %0(p0) = COPY %x0 %1(s32) = COPY %w1 %2(s64) = G_CONSTANT i64 512 @@ -281,90 +248,78 @@ body: | ... --- -# CHECK-LABEL: name: store_gep_64_s16_gpr name: store_gep_64_s16_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %w1 -# CHECK: STRHHui %1, %0, 32 :: (store 2 into %ir.addr) body: | bb.0: liveins: %x0, %w1 + ; CHECK-LABEL: name: store_gep_64_s16_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] + ; CHECK: STRHHui [[COPY2]], [[COPY]], 32 :: (store 2 into %ir.addr) %0(p0) = COPY %x0 - %1(s16) = COPY %w1 + %4:gpr(s32) = COPY %w1 + %1(s16) = G_TRUNC %4 %2(s64) = G_CONSTANT i64 64 %3(p0) = G_GEP %0, %2 G_STORE %1, %3 :: (store 2 into %ir.addr) ... --- -# CHECK-LABEL: name: store_gep_1_s8_gpr name: store_gep_1_s8_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %w1 -# CHECK: STRBBui %1, %0, 1 :: (store 1 into %ir.addr) body: | bb.0: liveins: %x0, %w1 + ; CHECK-LABEL: name: store_gep_1_s8_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] + ; CHECK: STRBBui [[COPY2]], [[COPY]], 1 :: (store 1 into %ir.addr) %0(p0) = COPY %x0 - %1(s8) = COPY %w1 + %4:gpr(s32) = COPY %w1 + %1(s8) = G_TRUNC %4 %2(s64) = G_CONSTANT i64 1 %3(p0) = G_GEP %0, %2 G_STORE %1, %3 :: (store 1 into %ir.addr) ... --- -# CHECK-LABEL: name: store_s64_fpr name: store_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %d1 -# CHECK: STRDui %1, %0, 0 :: (store 8 into %ir.addr) body: | bb.0: liveins: %x0, %d1 + ; CHECK-LABEL: name: store_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) %0(p0) = COPY %x0 %1(s64) = COPY %d1 G_STORE %1, %0 :: (store 8 into %ir.addr) @@ -372,26 +327,22 @@ body: | ... --- -# CHECK-LABEL: name: store_s32_fpr name: store_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %s1 -# CHECK: STRSui %1, %0, 0 :: (store 4 into %ir.addr) body: | bb.0: liveins: %x0, %s1 + ; CHECK-LABEL: name: store_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: STRSui [[COPY1]], [[COPY]], 0 :: (store 4 into %ir.addr) %0(p0) = COPY %x0 %1(s32) = COPY %s1 G_STORE %1, %0 :: (store 4 into %ir.addr) @@ -399,30 +350,24 @@ body: | ... --- -# CHECK-LABEL: name: store_gep_8_s64_fpr name: store_gep_8_s64_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } - { id: 2, class: gpr } - { id: 3, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %d1 -# CHECK: STRDui %1, %0, 1 :: (store 8 into %ir.addr) body: | bb.0: liveins: %x0, %d1 + ; CHECK-LABEL: name: store_gep_8_s64_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: STRDui [[COPY1]], [[COPY]], 1 :: (store 8 into %ir.addr) %0(p0) = COPY %x0 %1(s64) = COPY %d1 %2(s64) = G_CONSTANT i64 8 @@ -431,33 +376,49 @@ body: | ... --- -# CHECK-LABEL: name: store_gep_8_s32_fpr name: store_gep_8_s32_fpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: fpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 3, class: gpr, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: fpr } - { id: 2, class: gpr } - { id: 3, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %s1 -# CHECK: STRSui %1, %0, 2 :: (store 4 into %ir.addr) body: | bb.0: liveins: %x0, %s1 + ; CHECK-LABEL: name: store_gep_8_s32_fpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY %s1 + ; CHECK: STRSui [[COPY1]], [[COPY]], 2 :: (store 4 into %ir.addr) %0(p0) = COPY %x0 %1(s32) = COPY %s1 %2(s64) = G_CONSTANT i64 8 %3(p0) = G_GEP %0, %2 G_STORE %1, %3 :: (store 4 into %ir.addr) ... +--- +name: store_v2s32 +legalized: true +regBankSelected: true + +registers: + - { id: 0, class: gpr } + - { id: 1, class: fpr } + +body: | + bb.0: + liveins: %x0, %d1 + + ; CHECK-LABEL: name: store_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:fpr64 = COPY %d1 + ; CHECK: STRDui [[COPY1]], [[COPY]], 0 :: (store 8 into %ir.addr) + %0(p0) = COPY %x0 + %1(<2 x s32>) = COPY %d1 + G_STORE %1, %0 :: (store 8 into %ir.addr) + +... diff --git a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir index f43a9ab34ffd2..421a676f7a433 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-trunc.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-trunc.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -9,73 +10,71 @@ ... --- -# CHECK-LABEL: name: trunc_s32_s64 name: trunc_s32_s64 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32sp, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %1 = COPY %0.sub_32 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: trunc_s32_s64 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32 + ; CHECK: %w0 = COPY [[COPY1]] %0(s64) = COPY %x0 %1(s32) = G_TRUNC %0 %w0 = COPY %1(s32) ... --- -# CHECK-LABEL: name: trunc_s8_s64 name: trunc_s8_s64 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %1 = COPY %0.sub_32 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: trunc_s8_s64 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] + ; CHECK: %w0 = COPY [[COPY2]] %0(s64) = COPY %x0 %1(s8) = G_TRUNC %0 - %w0 = COPY %1(s8) + %2:gpr(s32) = G_ANYEXT %1 + %w0 = COPY %2(s32) ... --- -# CHECK-LABEL: name: trunc_s1_s32 name: trunc_s1_s32 legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } -# CHECK: body: -# CHECK: %1 = COPY %0 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: trunc_s1_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]] + ; CHECK: [[COPY2:%[0-9]+]]:gpr32all = COPY [[COPY1]] + ; CHECK: %w0 = COPY [[COPY2]] %0(s32) = COPY %w0 %1(s1) = G_TRUNC %0 - %w0 = COPY %1(s1) + %2:gpr(s32) = G_ANYEXT %1 + %w0 = COPY %2(s32) ... diff --git a/test/CodeGen/AArch64/GlobalISel/select-xor.mir b/test/CodeGen/AArch64/GlobalISel/select-xor.mir index 7190fda15b8ee..8f0b0dccca6e6 100644 --- a/test/CodeGen/AArch64/GlobalISel/select-xor.mir +++ b/test/CodeGen/AArch64/GlobalISel/select-xor.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s --- | @@ -14,28 +15,24 @@ --- # Check that we select a 32-bit GPR G_XOR into EORWrr on GPR32. # Also check that we constrain the register class of the COPY to GPR32. -# CHECK-LABEL: name: xor_s32_gpr name: xor_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %1 = COPY %w1 -# CHECK: %2 = EORWrr %0, %1 body: | bb.0: liveins: %w0, %w1 + ; CHECK-LABEL: name: xor_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY %w1 + ; CHECK: [[EORWrr:%[0-9]+]]:gpr32 = EORWrr [[COPY]], [[COPY1]] + ; CHECK: %w0 = COPY [[EORWrr]] %0(s32) = COPY %w0 %1(s32) = COPY %w1 %2(s32) = G_XOR %0, %1 @@ -44,28 +41,24 @@ body: | --- # Same as xor_s64_gpr, for 64-bit operations. -# CHECK-LABEL: name: xor_s64_gpr name: xor_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %1 = COPY %x1 -# CHECK: %2 = EORXrr %0, %1 body: | bb.0: liveins: %x0, %x1 + ; CHECK-LABEL: name: xor_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY %x1 + ; CHECK: [[EORXrr:%[0-9]+]]:gpr64 = EORXrr [[COPY]], [[COPY1]] + ; CHECK: %x0 = COPY [[EORXrr]] %0(s64) = COPY %x0 %1(s64) = COPY %x1 %2(s64) = G_XOR %0, %1 @@ -75,27 +68,23 @@ body: | --- # Check that we select a 32-bit GPR G_XOR into EORWrr on GPR32. # Also check that we constrain the register class of the COPY to GPR32. -# CHECK-LABEL: name: xor_constant_n1_s32_gpr name: xor_constant_n1_s32_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %w0 -# CHECK: %2 = ORNWrr %wzr, %0 body: | bb.0: liveins: %w0 + ; CHECK-LABEL: name: xor_constant_n1_s32_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr %wzr, [[COPY]] + ; CHECK: %w0 = COPY [[ORNWrr]] %0(s32) = COPY %w0 %1(s32) = G_CONSTANT i32 -1 %2(s32) = G_XOR %0, %1 @@ -104,27 +93,23 @@ body: | --- # Same as xor_constant_n1_s64_gpr, for 64-bit operations. -# CHECK-LABEL: name: xor_constant_n1_s64_gpr name: xor_constant_n1_s64_gpr legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr64, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: %0 = COPY %x0 -# CHECK: %2 = ORNXrr %xzr, %0 body: | bb.0: liveins: %x0 + ; CHECK-LABEL: name: xor_constant_n1_s64_gpr + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[ORNXrr:%[0-9]+]]:gpr64 = ORNXrr %xzr, [[COPY]] + ; CHECK: %x0 = COPY [[ORNXrr]] %0(s64) = COPY %x0 %1(s64) = G_CONSTANT i64 -1 %2(s64) = G_XOR %0, %1 @@ -133,26 +118,25 @@ body: | --- # Check that we can obtain constants from other basic blocks. -# CHECK-LABEL: name: xor_constant_n1_s32_gpr_2bb name: xor_constant_n1_s32_gpr_2bb legalized: true regBankSelected: true -# CHECK: registers: -# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } -# CHECK-NEXT: - { id: 1, class: gpr, preferred-register: '' } -# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } -# CHECK: body: -# CHECK: B %bb.1 -# CHECK: %0 = COPY %w0 -# CHECK: %2 = ORNWrr %wzr, %0 body: | + ; CHECK-LABEL: name: xor_constant_n1_s32_gpr_2bb + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %w0 + ; CHECK: [[ORNWrr:%[0-9]+]]:gpr32 = ORNWrr %wzr, [[COPY]] + ; CHECK: %w0 = COPY [[ORNWrr]] bb.0: liveins: %w0, %w1 successors: %bb.1 diff --git a/test/CodeGen/AArch64/GlobalISel/select.mir b/test/CodeGen/AArch64/GlobalISel/select.mir index dfd81337844a8..c13b27adbb182 100644 --- a/test/CodeGen/AArch64/GlobalISel/select.mir +++ b/test/CodeGen/AArch64/GlobalISel/select.mir @@ -43,7 +43,7 @@ stack: - { id: 0, name: ptr0, offset: 0, size: 8, alignment: 8 } # CHECK: body: -# CHECK: %0 = ADDXri %stack.0.ptr0, 0, 0 +# CHECK: %0:gpr64sp = ADDXri %stack.0.ptr0, 0, 0 body: | bb.0: %0(p0) = G_FRAME_INDEX %stack.0.ptr0 @@ -61,8 +61,8 @@ registers: - { id: 2, class: gpr } # CHECK: body: -# CHECK: %1 = MOVi64imm 42 -# CHECK: %2 = ADDXrr %0, %1 +# CHECK: %1:gpr64 = MOVi64imm 42 +# CHECK: %2:gpr64 = ADDXrr %0, %1 body: | bb.0: liveins: %x0 @@ -79,7 +79,7 @@ legalized: true regBankSelected: true # CHECK: body: -# CHECK: %1 = ANDXri %0, 8060 +# CHECK: %1:gpr64sp = ANDXri %0, 8060 body: | bb.0: liveins: %x0 @@ -98,9 +98,9 @@ registers: - { id: 0, class: gpr } # CHECK: body: -# IOS: %0 = MOVaddr target-flags(aarch64-page) @var_local, target-flags(aarch64-pageoff, aarch64-nc) @var_local -# LINUX-DEFAULT: %0 = MOVaddr target-flags(aarch64-page) @var_local, target-flags(aarch64-pageoff, aarch64-nc) @var_local -# LINUX-PIC: %0 = LOADgot target-flags(aarch64-got) @var_local +# IOS: %0:gpr64 = MOVaddr target-flags(aarch64-page) @var_local, target-flags(aarch64-pageoff, aarch64-nc) @var_local +# LINUX-DEFAULT: %0:gpr64 = MOVaddr target-flags(aarch64-page) @var_local, target-flags(aarch64-pageoff, aarch64-nc) @var_local +# LINUX-PIC: %0:gpr64 = LOADgot target-flags(aarch64-got) @var_local body: | bb.0: %0(p0) = G_GLOBAL_VALUE @var_local @@ -116,9 +116,9 @@ registers: - { id: 0, class: gpr } # CHECK: body: -# IOS: %0 = LOADgot target-flags(aarch64-got) @var_got -# LINUX-DEFAULT: %0 = MOVaddr target-flags(aarch64-page) @var_got, target-flags(aarch64-pageoff, aarch64-nc) @var_got -# LINUX-PIC: %0 = LOADgot target-flags(aarch64-got) @var_got +# IOS: %0:gpr64 = LOADgot target-flags(aarch64-got) @var_got +# LINUX-DEFAULT: %0:gpr64 = MOVaddr target-flags(aarch64-page) @var_got, target-flags(aarch64-pageoff, aarch64-nc) @var_got +# LINUX-PIC: %0:gpr64 = LOADgot target-flags(aarch64-got) @var_got body: | bb.0: %0(p0) = G_GLOBAL_VALUE @var_got @@ -148,16 +148,19 @@ registers: - { id: 6, class: gpr } - { id: 7, class: gpr } - { id: 8, class: gpr } + - { id: 9, class: gpr } + - { id: 10, class: gpr } + - { id: 11, class: gpr } # CHECK: body: # CHECK: %wzr = SUBSWrr %0, %0, implicit-def %nzcv -# CHECK: %1 = CSINCWr %wzr, %wzr, 1, implicit %nzcv +# CHECK: %1:gpr32 = CSINCWr %wzr, %wzr, 1, implicit %nzcv # CHECK: %xzr = SUBSXrr %2, %2, implicit-def %nzcv -# CHECK: %3 = CSINCWr %wzr, %wzr, 3, implicit %nzcv +# CHECK: %3:gpr32 = CSINCWr %wzr, %wzr, 3, implicit %nzcv # CHECK: %xzr = SUBSXrr %4, %4, implicit-def %nzcv -# CHECK: %5 = CSINCWr %wzr, %wzr, 0, implicit %nzcv +# CHECK: %5:gpr32 = CSINCWr %wzr, %wzr, 0, implicit %nzcv body: | bb.0: @@ -166,17 +169,20 @@ body: | %0(s32) = COPY %w0 %1(s32) = G_ICMP intpred(eq), %0, %0 %6(s1) = G_TRUNC %1(s32) - %w0 = COPY %6(s1) + %9(s32) = G_ANYEXT %6 + %w0 = COPY %9(s32) %2(s64) = COPY %x0 %3(s32) = G_ICMP intpred(uge), %2, %2 %7(s1) = G_TRUNC %3(s32) - %w0 = COPY %7(s1) + %10(s32) = G_ANYEXT %7 + %w0 = COPY %10(s32) %4(p0) = COPY %x0 %5(s32) = G_ICMP intpred(ne), %4, %4 %8(s1) = G_TRUNC %5(s32) - %w0 = COPY %8(s1) + %11(s32) = G_ANYEXT %8 + %w0 = COPY %11(s32) ... --- @@ -199,15 +205,17 @@ registers: - { id: 3, class: gpr } - { id: 4, class: gpr } - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } # CHECK: body: # CHECK: FCMPSrr %0, %0, implicit-def %nzcv -# CHECK: [[TST_MI:%[0-9]+]] = CSINCWr %wzr, %wzr, 5, implicit %nzcv -# CHECK: [[TST_GT:%[0-9]+]] = CSINCWr %wzr, %wzr, 13, implicit %nzcv -# CHECK: %1 = ORRWrr [[TST_MI]], [[TST_GT]] +# CHECK: [[TST_MI:%[0-9]+]]:gpr32 = CSINCWr %wzr, %wzr, 5, implicit %nzcv +# CHECK: [[TST_GT:%[0-9]+]]:gpr32 = CSINCWr %wzr, %wzr, 13, implicit %nzcv +# CHECK: %1:gpr32 = ORRWrr [[TST_MI]], [[TST_GT]] # CHECK: FCMPDrr %2, %2, implicit-def %nzcv -# CHECK: %3 = CSINCWr %wzr, %wzr, 4, implicit %nzcv +# CHECK: %3:gpr32 = CSINCWr %wzr, %wzr, 4, implicit %nzcv body: | bb.0: @@ -216,12 +224,14 @@ body: | %0(s32) = COPY %s0 %1(s32) = G_FCMP floatpred(one), %0, %0 %4(s1) = G_TRUNC %1(s32) - %w0 = COPY %4(s1) + %6(s32) = G_ANYEXT %4 + %w0 = COPY %6(s32) %2(s64) = COPY %d0 %3(s32) = G_FCMP floatpred(uge), %2, %2 %5(s1) = G_TRUNC %3(s32) - %w0 = COPY %5(s1) + %7(s32) = G_ANYEXT %5 + %w0 = COPY %7(s32) ... @@ -243,14 +253,15 @@ registers: # CHECK: body: # CHECK: bb.1: -# CHECK: %2 = PHI %0, %bb.0, %2, %bb.1 +# CHECK: %2:fpr32 = PHI %0, %bb.0, %2, %bb.1 body: | bb.0: liveins: %s0, %w0 successors: %bb.1 %0(s32) = COPY %s0 - %1(s1) = COPY %w0 + %3:gpr(s32) = COPY %w0 + %1(s1) = G_TRUNC %3 bb.1: successors: %bb.1, %bb.2 @@ -294,15 +305,16 @@ registers: # CHECK: body: # CHECK: %wzr = ANDSWri %0, 0, implicit-def %nzcv -# CHECK: %3 = CSELWr %1, %2, 1, implicit %nzcv +# CHECK: %3:gpr32 = CSELWr %1, %2, 1, implicit %nzcv # CHECK: %wzr = ANDSWri %0, 0, implicit-def %nzcv -# CHECK: %6 = CSELXr %4, %5, 1, implicit %nzcv +# CHECK: %6:gpr64 = CSELXr %4, %5, 1, implicit %nzcv # CHECK: %wzr = ANDSWri %0, 0, implicit-def %nzcv -# CHECK: %9 = CSELXr %7, %8, 1, implicit %nzcv +# CHECK: %9:gpr64 = CSELXr %7, %8, 1, implicit %nzcv body: | bb.0: liveins: %w0, %w1, %w2 - %0(s1) = COPY %w0 + %10:gpr(s32) = COPY %w0 + %0(s1) = G_TRUNC %10 %1(s32) = COPY %w1 %2(s32) = COPY %w2 diff --git a/test/CodeGen/AArch64/GlobalISel/translate-gep.ll b/test/CodeGen/AArch64/GlobalISel/translate-gep.ll index e4c18757418d0..865315bbe0a31 100644 --- a/test/CodeGen/AArch64/GlobalISel/translate-gep.ll +++ b/test/CodeGen/AArch64/GlobalISel/translate-gep.ll @@ -4,9 +4,9 @@ define %type* @first_offset_const(%type* %addr) { ; CHECK-LABEL: name: first_offset_const -; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[OFFSET:%[0-9]+]](s64) = G_CONSTANT i64 32 -; CHECK: [[RES:%[0-9]+]](p0) = G_GEP [[BASE]], [[OFFSET]](s64) +; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 +; CHECK: [[RES:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET]](s64) ; CHECK: %x0 = COPY [[RES]](p0) %res = getelementptr %type, %type* %addr, i32 1 @@ -15,8 +15,8 @@ define %type* @first_offset_const(%type* %addr) { define %type* @first_offset_trivial(%type* %addr) { ; CHECK-LABEL: name: first_offset_trivial -; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[TRIVIAL:%[0-9]+]](p0) = COPY [[BASE]](p0) +; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[TRIVIAL:%[0-9]+]]:_(p0) = COPY [[BASE]](p0) ; CHECK: %x0 = COPY [[TRIVIAL]](p0) %res = getelementptr %type, %type* %addr, i32 0 @@ -25,12 +25,12 @@ define %type* @first_offset_trivial(%type* %addr) { define %type* @first_offset_variable(%type* %addr, i64 %idx) { ; CHECK-LABEL: name: first_offset_variable -; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[IDX:%[0-9]+]](s64) = COPY %x1 -; CHECK: [[SIZE:%[0-9]+]](s64) = G_CONSTANT i64 32 -; CHECK: [[OFFSET:%[0-9]+]](s64) = G_MUL [[SIZE]], [[IDX]] -; CHECK: [[STEP0:%[0-9]+]](p0) = G_GEP [[BASE]], [[OFFSET]](s64) -; CHECK: [[RES:%[0-9]+]](p0) = COPY [[STEP0]](p0) +; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[IDX:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 +; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_MUL [[SIZE]], [[IDX]] +; CHECK: [[STEP0:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET]](s64) +; CHECK: [[RES:%[0-9]+]]:_(p0) = COPY [[STEP0]](p0) ; CHECK: %x0 = COPY [[RES]](p0) %res = getelementptr %type, %type* %addr, i64 %idx @@ -39,13 +39,13 @@ define %type* @first_offset_variable(%type* %addr, i64 %idx) { define %type* @first_offset_ext(%type* %addr, i32 %idx) { ; CHECK-LABEL: name: first_offset_ext -; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[IDX32:%[0-9]+]](s32) = COPY %w1 -; CHECK: [[SIZE:%[0-9]+]](s64) = G_CONSTANT i64 32 -; CHECK: [[IDX64:%[0-9]+]](s64) = G_SEXT [[IDX32]](s32) -; CHECK: [[OFFSET:%[0-9]+]](s64) = G_MUL [[SIZE]], [[IDX64]] -; CHECK: [[STEP0:%[0-9]+]](p0) = G_GEP [[BASE]], [[OFFSET]](s64) -; CHECK: [[RES:%[0-9]+]](p0) = COPY [[STEP0]](p0) +; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[IDX32:%[0-9]+]]:_(s32) = COPY %w1 +; CHECK: [[SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 +; CHECK: [[IDX64:%[0-9]+]]:_(s64) = G_SEXT [[IDX32]](s32) +; CHECK: [[OFFSET:%[0-9]+]]:_(s64) = G_MUL [[SIZE]], [[IDX64]] +; CHECK: [[STEP0:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET]](s64) +; CHECK: [[RES:%[0-9]+]]:_(p0) = COPY [[STEP0]](p0) ; CHECK: %x0 = COPY [[RES]](p0) %res = getelementptr %type, %type* %addr, i32 %idx @@ -55,14 +55,14 @@ define %type* @first_offset_ext(%type* %addr, i32 %idx) { %type1 = type [4 x [4 x i32]] define i32* @const_then_var(%type1* %addr, i64 %idx) { ; CHECK-LABEL: name: const_then_var -; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[IDX:%[0-9]+]](s64) = COPY %x1 -; CHECK: [[OFFSET1:%[0-9]+]](s64) = G_CONSTANT i64 272 -; CHECK: [[SIZE:%[0-9]+]](s64) = G_CONSTANT i64 4 -; CHECK: [[BASE1:%[0-9]+]](p0) = G_GEP [[BASE]], [[OFFSET1]](s64) -; CHECK: [[OFFSET2:%[0-9]+]](s64) = G_MUL [[SIZE]], [[IDX]] -; CHECK: [[BASE2:%[0-9]+]](p0) = G_GEP [[BASE1]], [[OFFSET2]](s64) -; CHECK: [[RES:%[0-9]+]](p0) = COPY [[BASE2]](p0) +; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[IDX:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[OFFSET1:%[0-9]+]]:_(s64) = G_CONSTANT i64 272 +; CHECK: [[SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +; CHECK: [[BASE1:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET1]](s64) +; CHECK: [[OFFSET2:%[0-9]+]]:_(s64) = G_MUL [[SIZE]], [[IDX]] +; CHECK: [[BASE2:%[0-9]+]]:_(p0) = G_GEP [[BASE1]], [[OFFSET2]](s64) +; CHECK: [[RES:%[0-9]+]]:_(p0) = COPY [[BASE2]](p0) ; CHECK: %x0 = COPY [[RES]](p0) %res = getelementptr %type1, %type1* %addr, i32 4, i32 1, i64 %idx @@ -71,13 +71,13 @@ define i32* @const_then_var(%type1* %addr, i64 %idx) { define i32* @var_then_const(%type1* %addr, i64 %idx) { ; CHECK-LABEL: name: var_then_const -; CHECK: [[BASE:%[0-9]+]](p0) = COPY %x0 -; CHECK: [[IDX:%[0-9]+]](s64) = COPY %x1 -; CHECK: [[SIZE:%[0-9]+]](s64) = G_CONSTANT i64 64 -; CHECK: [[OFFSET2:%[0-9]+]](s64) = G_CONSTANT i64 40 -; CHECK: [[OFFSET1:%[0-9]+]](s64) = G_MUL [[SIZE]], [[IDX]] -; CHECK: [[BASE1:%[0-9]+]](p0) = G_GEP [[BASE]], [[OFFSET1]](s64) -; CHECK: [[BASE2:%[0-9]+]](p0) = G_GEP [[BASE1]], [[OFFSET2]](s64) +; CHECK: [[BASE:%[0-9]+]]:_(p0) = COPY %x0 +; CHECK: [[IDX:%[0-9]+]]:_(s64) = COPY %x1 +; CHECK: [[SIZE:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 +; CHECK: [[OFFSET2:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 +; CHECK: [[OFFSET1:%[0-9]+]]:_(s64) = G_MUL [[SIZE]], [[IDX]] +; CHECK: [[BASE1:%[0-9]+]]:_(p0) = G_GEP [[BASE]], [[OFFSET1]](s64) +; CHECK: [[BASE2:%[0-9]+]]:_(p0) = G_GEP [[BASE1]], [[OFFSET2]](s64) ; CHECK: %x0 = COPY [[BASE2]](p0) %res = getelementptr %type1, %type1* %addr, i64 %idx, i32 2, i32 2 diff --git a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll index af0ab57b0b9fe..f92a5721a4eed 100644 --- a/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll +++ b/test/CodeGen/AArch64/GlobalISel/varargs-ios-translator.ll @@ -6,8 +6,8 @@ define void @test_varargs_sentinel(i8* %list, i64, i64, i64, i64, i64, i64, i64, ; CHECK: fixedStack: ; CHECK: - { id: [[VARARGS_SLOT:[0-9]+]], type: default, offset: 8 ; CHECK: body: -; CHECK: [[LIST:%[0-9]+]] = COPY %x0 -; CHECK: [[VARARGS_AREA:%[0-9]+]] = ADDXri %fixed-stack.[[VARARGS_SLOT]], 0, 0 +; CHECK: [[LIST:%[0-9]+]]:gpr64sp = COPY %x0 +; CHECK: [[VARARGS_AREA:%[0-9]+]]:gpr64common = ADDXri %fixed-stack.[[VARARGS_SLOT]], 0, 0 ; CHECK: STRXui [[VARARGS_AREA]], [[LIST]], 0 :: (store 8 into %ir.list, align 0) call void @llvm.va_start(i8* %list) ret void diff --git a/test/CodeGen/AArch64/GlobalISel/vastart.ll b/test/CodeGen/AArch64/GlobalISel/vastart.ll index ae44e8fc5dea2..1fb3eb55e677e 100644 --- a/test/CodeGen/AArch64/GlobalISel/vastart.ll +++ b/test/CodeGen/AArch64/GlobalISel/vastart.ll @@ -5,7 +5,7 @@ declare void @llvm.va_start(i8*) define void @test_va_start(i8* %list) { ; CHECK-LABEL: name: test_va_start -; CHECK: [[LIST:%[0-9]+]](p0) = COPY %x0 +; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY %x0 ; CHECK-IOS: G_VASTART [[LIST]](p0) :: (store 8 into %ir.list, align 0) ; CHECK-LINUX: G_VASTART [[LIST]](p0) :: (store 32 into %ir.list, align 0) call void @llvm.va_start(i8* %list) diff --git a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll index 2b4e438a13aa2..1b2ed4b89521b 100644 --- a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll +++ b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll @@ -19,9 +19,9 @@ entry: do.body.i: ; CHECK-LABEL: do.body.i: -; CHECK: %uglygep1 = getelementptr i8, i8* %uglygep, i64 %3 -; CHECK-NEXT: %4 = bitcast i8* %uglygep1 to i32* -; CHECK-NOT: %uglygep1 = getelementptr i8, i8* %uglygep, i64 1032 +; CHECK: %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3 +; CHECK-NEXT: %4 = bitcast i8* %uglygep2 to i32* +; CHECK-NOT: %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032 %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ] diff --git a/test/CodeGen/AArch64/arm64-jumptable.ll b/test/CodeGen/AArch64/arm64-jumptable.ll index c7f213fa8464a..f5c2ee6da0bfc 100644 --- a/test/CodeGen/AArch64/arm64-jumptable.ll +++ b/test/CodeGen/AArch64/arm64-jumptable.ll @@ -21,7 +21,7 @@ bb3: store i32 3, i32* %to br label %exit bb4: - store i32 4, i32* %to + store i32 5, i32* %to br label %exit exit: ret void diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll index 2585676e1bd29..1a1a20b3d1f65 100644 --- a/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -140,7 +140,7 @@ define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { ; CHECK-LABEL: ins1f2: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d %tmp3 = extractelement <1 x double> %tmp1, i32 0 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 ret <2 x double> %tmp4 diff --git a/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll b/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll index 7efb4bf6d5963..f61f98a4d5119 100644 --- a/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll +++ b/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll @@ -36,7 +36,9 @@ ; HOTNESS-NOT: Executing Pass ; HOTNESS: block-frequency: empty_func ; HOTNESS-NOT: Executing Pass -; HOTNESS: Executing Pass 'AArch64 Assembly Printer' +; HOTNESS: Executing Pass 'MachineDominator Tree Construction' +; HOTNESS-NEXT: Executing Pass 'Machine Natural Loop Construction' +; HOTNESS-NEXT: Executing Pass 'AArch64 Assembly Printer' ; HOTNESS: arm64-summary-remarks.ll:5:0: 1 instructions in function (hotness: 33) @@ -45,6 +47,8 @@ ; NO_HOTNESS-NEXT: Freeing Pass 'Implement the 'patchable-function' attribute' ; NO_HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis' ; NO_HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter' +; NO_HOTNESS-NEXT: Executing Pass 'MachineDominator Tree Construction' +; NO_HOTNESS-NEXT: Executing Pass 'Machine Natural Loop Construction' ; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Assembly Printer' ; NO_HOTNESS: arm64-summary-remarks.ll:5:0: 1 instructions in function{{$}} diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll index f68a9debd5f21..ccd12cdf67449 100644 --- a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll +++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll @@ -13,7 +13,6 @@ define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) { entry: ; CHECK-LABEL: jscall_patchpoint_codegen: -; CHECK: Lcfi ; CHECK: str x{{.+}}, [sp] ; CHECK-NEXT: mov x0, x{{.+}} ; CHECK: Ltmp @@ -22,7 +21,6 @@ entry: ; CHECK: movk x16, #48879 ; CHECK-NEXT: blr x16 ; FAST-LABEL: jscall_patchpoint_codegen: -; FAST: Lcfi ; FAST: str x{{.+}}, [sp] ; FAST: Ltmp ; FAST-NEXT: mov x16, #281470681743360 @@ -40,7 +38,6 @@ entry: define i64 @jscall_patchpoint_codegen2(i64 %callee) { entry: ; CHECK-LABEL: jscall_patchpoint_codegen2: -; CHECK: Lcfi ; CHECK: orr w[[REG:[0-9]+]], wzr, #0x6 ; CHECK-NEXT: str x[[REG]], [sp, #24] ; CHECK-NEXT: orr w[[REG:[0-9]+]], wzr, #0x4 @@ -53,7 +50,6 @@ entry: ; CHECK-NEXT: movk x16, #48879 ; CHECK-NEXT: blr x16 ; FAST-LABEL: jscall_patchpoint_codegen2: -; FAST: Lcfi ; FAST: orr [[REG1:x[0-9]+]], xzr, #0x2 ; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 ; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 @@ -74,7 +70,6 @@ entry: define i64 @jscall_patchpoint_codegen3(i64 %callee) { entry: ; CHECK-LABEL: jscall_patchpoint_codegen3: -; CHECK: Lcfi ; CHECK: mov w[[REG:[0-9]+]], #10 ; CHECK-NEXT: str x[[REG]], [sp, #48] ; CHECK-NEXT: orr w[[REG:[0-9]+]], wzr, #0x8 @@ -91,7 +86,6 @@ entry: ; CHECK-NEXT: movk x16, #48879 ; CHECK-NEXT: blr x16 ; FAST-LABEL: jscall_patchpoint_codegen3: -; FAST: Lcfi ; FAST: orr [[REG1:x[0-9]+]], xzr, #0x2 ; FAST-NEXT: orr [[REG2:w[0-9]+]], wzr, #0x4 ; FAST-NEXT: orr [[REG3:x[0-9]+]], xzr, #0x6 diff --git a/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir b/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir index 9ad47c721c3a0..43d20394be454 100644 --- a/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir +++ b/test/CodeGen/AArch64/arm64-regress-opt-cmp.mir @@ -1,5 +1,5 @@ # RUN: llc -mtriple=aarch64-linux-gnu -run-pass peephole-opt -o - %s | FileCheck %s -# CHECK: %1 = ANDWri {{.*}} +# CHECK: %1:gpr32common = ANDWri {{.*}} # CHECK-NEXT: %wzr = SUBSWri {{.*}} --- | define i32 @test01() nounwind { diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll index 8b212aa6c1dab..fc167d2f34d42 100644 --- a/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/test/CodeGen/AArch64/arm64-xaluo.ll @@ -282,6 +282,17 @@ entry: ret i32 %ret } +define i1 @saddo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: saddo.not.i32 +; CHECK: cmn w0, w1 +; CHECK-NEXT: cset w0, vc + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i64 @saddo.select.i64(i64 %v1, i64 %v2) { entry: ; CHECK-LABEL: saddo.select.i64 @@ -293,6 +304,17 @@ entry: ret i64 %ret } +define i1 @saddo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: saddo.not.i64 +; CHECK: cmn x0, x1 +; CHECK-NEXT: cset w0, vc + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i32 @uaddo.select.i32(i32 %v1, i32 %v2) { entry: ; CHECK-LABEL: uaddo.select.i32 @@ -304,6 +326,17 @@ entry: ret i32 %ret } +define i1 @uaddo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: uaddo.not.i32 +; CHECK: cmn w0, w1 +; CHECK-NEXT: cset w0, lo + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i64 @uaddo.select.i64(i64 %v1, i64 %v2) { entry: ; CHECK-LABEL: uaddo.select.i64 @@ -315,6 +348,17 @@ entry: ret i64 %ret } +define i1 @uaddo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: uaddo.not.i64 +; CHECK: cmn x0, x1 +; CHECK-NEXT: cset w0, lo + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i32 @ssubo.select.i32(i32 %v1, i32 %v2) { entry: ; CHECK-LABEL: ssubo.select.i32 @@ -326,6 +370,17 @@ entry: ret i32 %ret } +define i1 @ssubo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: ssubo.not.i32 +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, vc + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i64 @ssubo.select.i64(i64 %v1, i64 %v2) { entry: ; CHECK-LABEL: ssubo.select.i64 @@ -337,6 +392,17 @@ entry: ret i64 %ret } +define i1 @ssub.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: ssub.not.i64 +; CHECK: cmp x0, x1 +; CHECK-NEXT: cset w0, vc + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i32 @usubo.select.i32(i32 %v1, i32 %v2) { entry: ; CHECK-LABEL: usubo.select.i32 @@ -348,6 +414,17 @@ entry: ret i32 %ret } +define i1 @usubo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: usubo.not.i32 +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, hs + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i64 @usubo.select.i64(i64 %v1, i64 %v2) { entry: ; CHECK-LABEL: usubo.select.i64 @@ -359,6 +436,17 @@ entry: ret i64 %ret } +define i1 @usubo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: usubo.not.i64 +; CHECK: cmp x0, x1 +; CHECK-NEXT: cset w0, hs + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i32 @smulo.select.i32(i32 %v1, i32 %v2) { entry: ; CHECK-LABEL: smulo.select.i32 @@ -372,6 +460,19 @@ entry: ret i32 %ret } +define i1 @smulo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: smulo.not.i32 +; CHECK: smull x[[MREG:[0-9]+]], w0, w1 +; CHECK-NEXT: lsr x[[SREG:[0-9]+]], x[[MREG]], #32 +; CHECK-NEXT: cmp w[[SREG]], w[[MREG]], asr #31 +; CHECK-NEXT: cset w0, eq + %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i64 @smulo.select.i64(i64 %v1, i64 %v2) { entry: ; CHECK-LABEL: smulo.select.i64 @@ -385,6 +486,19 @@ entry: ret i64 %ret } +define i1 @smulo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: smulo.not.i64 +; CHECK: mul [[MREG:x[0-9]+]], x0, x1 +; CHECK-NEXT: smulh [[HREG:x[0-9]+]], x0, x1 +; CHECK-NEXT: cmp [[HREG]], [[MREG]], asr #63 +; CHECK-NEXT: cset w0, eq + %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i32 @umulo.select.i32(i32 %v1, i32 %v2) { entry: ; CHECK-LABEL: umulo.select.i32 @@ -397,6 +511,18 @@ entry: ret i32 %ret } +define i1 @umulo.not.i32(i32 %v1, i32 %v2) { +entry: +; CHECK-LABEL: umulo.not.i32 +; CHECK: umull [[MREG:x[0-9]+]], w0, w1 +; CHECK-NEXT: cmp xzr, [[MREG]], lsr #32 +; CHECK-NEXT: cset w0, eq + %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) + %obit = extractvalue {i32, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + define i64 @umulo.select.i64(i64 %v1, i64 %v2) { entry: ; CHECK-LABEL: umulo.select.i64 @@ -409,6 +535,18 @@ entry: ret i64 %ret } +define i1 @umulo.not.i64(i64 %v1, i64 %v2) { +entry: +; CHECK-LABEL: umulo.not.i64 +; CHECK: umulh [[MREG:x[0-9]+]], x0, x1 +; CHECK-NEXT: cmp xzr, [[MREG]] +; CHECK-NEXT: cset w0, eq + %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) + %obit = extractvalue {i64, i1} %t, 1 + %ret = xor i1 %obit, true + ret i1 %ret +} + ; ; Check the use of the overflow bit in combination with a branch instruction. diff --git a/test/CodeGen/AArch64/cmp-frameindex.ll b/test/CodeGen/AArch64/cmp-frameindex.ll new file mode 100644 index 0000000000000..2d01b76e186c4 --- /dev/null +++ b/test/CodeGen/AArch64/cmp-frameindex.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s + +; CHECK: test_frameindex_cmp: +; CHECK: cmn sp, #{{[0-9]+}} +define void @test_frameindex_cmp() { + %stack = alloca i8 + %stack.int = ptrtoint i8* %stack to i64 + %cmp = icmp ne i64 %stack.int, 0 + br i1 %cmp, label %bb1, label %bb2 + +bb1: + call void @bar() + ret void + +bb2: + ret void +} + +declare void @bar() diff --git a/test/CodeGen/AArch64/cmpxchg-idioms.ll b/test/CodeGen/AArch64/cmpxchg-idioms.ll index 0c008c2697942..cae09b289797a 100644 --- a/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -91,3 +91,63 @@ end: declare void @bar() declare void @baz() + +define i1 @test_conditional2(i32 %a, i32 %b, i32* %c) { +; CHECK-LABEL: test_conditional2: +; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]: +; CHECK: ldaxr [[LOADED:w[0-9]+]], [x19] +; CHECK: cmp [[LOADED]], w21 +; CHECK: b.ne [[FAILED:LBB[0-9]+_[0-9]+]] + +; CHECK: stlxr [[STATUS:w[0-9]+]], w20, [x19] +; CHECK: cbnz [[STATUS]], [[LOOP]] +; CHECK: orr [[STATUS]], wzr, #0x1 +; CHECK: b [[PH:LBB[0-9]+_[0-9]+]] + +; CHECK: [[FAILED]]: +; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} + +; verify the preheader is simplified by latesimplifycfg. +; CHECK: [[PH]]: +; CHECK: orr w22, wzr, #0x2 +; CHECK-NOT: orr w22, wzr, #0x4 +; CHECK-NOT: cmn w22, #4 +; CHECK: b [[LOOP2:LBB[0-9]+_[0-9]+]] +; CHECK-NOT: b.ne [[LOOP2]] +; CHECK-NOT: b {{LBB[0-9]+_[0-9]+}} +; CHECK: bl _foo +entry: + %pair = cmpxchg i32* %c, i32 %a, i32 %b seq_cst seq_cst + %success = extractvalue { i32, i1 } %pair, 1 + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %i.0 = phi i32 [ 2, %entry ], [ %dec, %if.end ] + %changed.0.off0 = phi i1 [ %success, %entry ], [ %changed.1.off0, %if.end ] + %dec = add nsw i32 %i.0, -1 + %tobool = icmp eq i32 %i.0, 0 + br i1 %tobool, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.cond + %changed.0.off0.lcssa = phi i1 [ %changed.0.off0, %for.cond ] + ret i1 %changed.0.off0.lcssa + +for.body: ; preds = %for.cond + %or = or i32 %a, %b + %idxprom = sext i32 %dec to i64 + %arrayidx = getelementptr inbounds i32, i32* %c, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %or, %0 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %for.body + store i32 %or, i32* %arrayidx, align 4 + tail call void @foo() + br label %if.end + +if.end: ; preds = %for.body, %if.then + %changed.1.off0 = phi i1 [ false, %if.then ], [ %changed.0.off0, %for.body ] + br label %for.cond +} + +declare void @foo() diff --git a/test/CodeGen/AArch64/dllimport.ll b/test/CodeGen/AArch64/dllimport.ll new file mode 100644 index 0000000000000..fad049a54cd22 --- /dev/null +++ b/test/CodeGen/AArch64/dllimport.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s + +@var = external dllimport global i32 +@ext = external global i32 +declare dllimport i32 @external() +declare i32 @internal() + +define i32 @get_var() { + %1 = load i32, i32* @var, align 4 + ret i32 %1 +} + +; CHECK-LABEL: get_var +; CHECK: adrp x8, __imp_var +; CHECK: ldr x8, [x8, __imp_var] +; CHECK: ldr w0, [x8] +; CHECK: ret + +define i32 @get_ext() { + %1 = load i32, i32* @ext, align 4 + ret i32 %1 +} + +; CHECK-LABEL: get_ext +; CHECK: adrp x8, ext +; CHECK: ldr w0, [x8, ext] +; CHECK: ret + +define i32* @get_var_pointer() { + ret i32* @var +} + +; CHECK-LABEL: get_var_pointer +; CHECK: adrp x0, __imp_var +; CHECK: ldr x0, [x0, __imp_var] +; CHECK: ret + +define i32 @call_external() { + %call = tail call i32 @external() + ret i32 %call +} + +; CHECK-LABEL: call_external +; CHECK: adrp x0, __imp_external +; CHECK: ldr x0, [x0, __imp_external] +; CHECK: br x0 + +define i32 @call_internal() { + %call = tail call i32 @internal() + ret i32 %call +} + +; CHECK-LABEL: call_internal +; CHECK: b internal diff --git a/test/CodeGen/AArch64/fp16-v16-instructions.ll b/test/CodeGen/AArch64/fp16-v16-instructions.ll index 1af2bd10912f4..d21a150b88cc6 100644 --- a/test/CodeGen/AArch64/fp16-v16-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v16-instructions.ll @@ -11,7 +11,7 @@ define <16 x half> @sitofp_i32(<16 x i32> %a) #0 { ; CHECK-DAG: fcvtn v1.4h, [[S2]] ; CHECK-DAG: v[[R1:[0-9]+]].4h, [[S1]] ; CHECK-DAG: v[[R3:[0-9]+]].4h, [[S3]] -; CHECK-DAg: ins v0.d[1], v[[R1]].d[0] +; CHECK-DAG: ins v0.d[1], v[[R1]].d[0] ; CHECK-DAG: ins v1.d[1], v[[R3]].d[0] %1 = sitofp <16 x i32> %a to <16 x half> @@ -62,7 +62,7 @@ define <16 x half> @uitofp_i32(<16 x i32> %a) #0 { ; CHECK-DAG: fcvtn v1.4h, [[S2]] ; CHECK-DAG: v[[R1:[0-9]+]].4h, [[S1]] ; CHECK-DAG: v[[R3:[0-9]+]].4h, [[S3]] -; CHECK-DAg: ins v0.d[1], v[[R1]].d[0] +; CHECK-DAG: ins v0.d[1], v[[R1]].d[0] ; CHECK-DAG: ins v1.d[1], v[[R3]].d[0] %1 = uitofp <16 x i32> %a to <16 x half> diff --git a/test/CodeGen/AArch64/machine-combiner.mir b/test/CodeGen/AArch64/machine-combiner.mir new file mode 100644 index 0000000000000..0f90ef70e4afe --- /dev/null +++ b/test/CodeGen/AArch64/machine-combiner.mir @@ -0,0 +1,48 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -enable-unsafe-fp-math \ +# RUN: -run-pass machine-combiner -machine-combiner-inc-threshold=0 \ +# RUN: -verify-machineinstrs -o - %s | FileCheck %s +--- +# Test incremental depth updates succeed when triggered after the removal of +# the first instruction in a basic block. + +# CHECK-LABEL: name: inc_update_iterator_test +name: inc_update_iterator_test +registers: + - { id: 0, class: fpr64 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32 } + - { id: 6, class: gpr32 } + - { id: 7, class: fpr64 } + - { id: 8, class: fpr64 } + - { id: 9, class: fpr64 } +body: | + bb.0: + successors: %bb.1, %bb.2 + + %3 = COPY %w2 + %2 = COPY %w1 + %1 = COPY %w0 + %0 = COPY %d0 + %4 = SUBSWrr %1, %2, implicit-def %nzcv + Bcc 13, %bb.2, implicit %nzcv + B %bb.1 + + bb.1: + ; CHECK: MADDWrrr %1, %2, %3 + %5 = MADDWrrr %1, %2, %wzr + %6 = ADDWrr %3, killed %5 + %7 = SCVTFUWDri killed %6 + ; CHECK: FMADDDrrr %7, %7, %0 + %8 = FMULDrr %7, %7 + %9 = FADDDrr %0, killed %8 + %d0 = COPY %9 + RET_ReallyLR implicit %d0 + + bb.2: + %d0 = COPY %0 + RET_ReallyLR implicit %d0 + +... diff --git a/test/CodeGen/AArch64/machine-outliner-remarks.ll b/test/CodeGen/AArch64/machine-outliner-remarks.ll index 7f3a4f4d49423..1a237a2403ea5 100644 --- a/test/CodeGen/AArch64/machine-outliner-remarks.ll +++ b/test/CodeGen/AArch64/machine-outliner-remarks.ll @@ -1,9 +1,12 @@ -; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -pass-remarks-missed=machine-outliner -o /dev/null 2>&1 | FileCheck %s +; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -pass-remarks=machine-outliner -pass-remarks-missed=machine-outliner -o /dev/null 2>&1 | FileCheck %s ; CHECK: machine-outliner-remarks.ll:5:9: ; CHECK-SAME: Did not outline 2 instructions from 2 locations. -; CHECK-SAME: Instructions from outlining all occurrences (7) >= +; CHECK-SAME: Instructions from outlining all occurrences (9) >= ; CHECK-SAME: Unoutlined instruction count (4) ; CHECK-SAME: (Also found at: machine-outliner-remarks.ll:13:9) +; CHECK: remark: :0:0: Saved 5 instructions by outlining 7 instructions +; CHECK-SAME: from 2 locations. (Found at: machine-outliner-remarks.ll:27:9, +; CHECK-SAME: machine-outliner-remarks.ll:36:1) ; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -o /dev/null -pass-remarks-missed=machine-outliner -pass-remarks-output=%t.yaml ; RUN: cat %t.yaml | FileCheck %s -check-prefix=YAML ; YAML: --- !Missed @@ -19,7 +22,7 @@ ; YAML-NEXT: - NumOccurrences: '2' ; YAML-NEXT: - String: ' locations.' ; YAML-NEXT: - String: ' Instructions from outlining all occurrences (' -; YAML-NEXT: - OutliningCost: '7' +; YAML-NEXT: - OutliningCost: '9' ; YAML-NEXT: - String: ')' ; YAML-NEXT: - String: ' >= Unoutlined instruction count (' ; YAML-NEXT: - NotOutliningCost: '4' @@ -28,23 +31,68 @@ ; YAML-NEXT: - OtherStartLoc1: 'machine-outliner-remarks.ll:13:9' ; YAML-NEXT: DebugLoc: { File: machine-outliner-remarks.ll, Line: 13, Column: 9 } ; YAML-NEXT: - String: ')' +; YAML: --- !Passed +; YAML-NEXT: Pass: machine-outliner +; YAML-NEXT: Name: OutlinedFunction +; YAML-NEXT: Function: OUTLINED_FUNCTION_0 +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'Saved ' +; YAML-NEXT: - OutliningBenefit: '5' +; YAML-NEXT: - String: ' instructions by ' +; YAML-NEXT: - String: 'outlining ' +; YAML-NEXT: - Length: '7' +; YAML-NEXT: - String: ' instructions ' +; YAML-NEXT: - String: 'from ' +; YAML-NEXT: - NumOccurrences: '2' +; YAML-NEXT: - String: ' locations. ' +; YAML-NEXT: - String: '(Found at: ' +; YAML-NEXT: - StartLoc0: 'machine-outliner-remarks.ll:27:9' +; YAML-NEXT: DebugLoc: { File: machine-outliner-remarks.ll, Line: 27, Column: 9 } +; YAML-NEXT: - String: ', ' +; YAML-NEXT: - StartLoc1: 'machine-outliner-remarks.ll:36:1' +; YAML-NEXT: DebugLoc: { File: machine-outliner-remarks.ll, Line: 36, Column: 1 } +; YAML-NEXT: - String: ')' define void @dog() #0 !dbg !8 { entry: %x = alloca i32, align 4 %y = alloca i32, align 4 - store i32 0, i32* %x, align 4, !dbg !11 + store i32 0, i32* %x, align 4 store i32 1, i32* %y, align 4, !dbg !12 - ret void, !dbg !13 + ret void } define void @cat() #0 !dbg !14 { entry: %x = alloca i32, align 4 %y = alloca i32, align 4 - store i32 0, i32* %x, align 4, !dbg !15 + store i32 0, i32* %x, align 4 store i32 1, i32* %y, align 4, !dbg !16 - ret void, !dbg !17 + ret void +} + +define void @foo() #0 !dbg !18 { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 1, i32* %2, align 4, !dbg !24 + store i32 2, i32* %3, align 4 + store i32 3, i32* %4, align 4, !dbg !26 + ret void +} + +define void @bar() #0 !dbg !27 { + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 1, i32* %2, align 4, !dbg !33 + store i32 2, i32* %3, align 4 + store i32 3, i32* %4, align 4, !dbg !35 + ret void } attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="false" "target-cpu"="cyclone" } @@ -64,10 +112,12 @@ attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="false" !8 = distinct !DISubprogram(name: "dog", scope: !1, file: !1, line: 2, type: !9, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) !9 = !DISubroutineType(types: !10) !10 = !{null} -!11 = !DILocation(line: 4, column: 9, scope: !8) !12 = !DILocation(line: 5, column: 9, scope: !8) -!13 = !DILocation(line: 6, column: 1, scope: !8) !14 = distinct !DISubprogram(name: "cat", scope: !1, file: !1, line: 10, type: !9, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) -!15 = !DILocation(line: 12, column: 9, scope: !14) !16 = !DILocation(line: 13, column: 9, scope: !14) -!17 = !DILocation(line: 14, column: 1, scope: !14) +!18 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 26, type: !9, isLocal: false, isDefinition: true, scopeLine: 26, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!24 = !DILocation(line: 27, column: 9, scope: !18) +!26 = !DILocation(line: 29, column: 9, scope: !18) +!27 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 35, type: !9, isLocal: false, isDefinition: true, scopeLine: 35, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!33 = !DILocation(line: 36, column: 1, scope: !27) +!35 = !DILocation(line: 38, column: 1, scope: !27) \ No newline at end of file diff --git a/test/CodeGen/AArch64/machine-outliner.ll b/test/CodeGen/AArch64/machine-outliner.ll index b5094fe47508b..9b6254fb3cc1a 100644 --- a/test/CodeGen/AArch64/machine-outliner.ll +++ b/test/CodeGen/AArch64/machine-outliner.ll @@ -1,9 +1,31 @@ -; RUN: llc -enable-machine-outliner -mtriple=aarch64-apple-darwin < %s | FileCheck %s +; RUN: llc -enable-machine-outliner -mtriple=aarch64-apple-darwin < %s | FileCheck %s -check-prefix=NoODR +; RUN: llc -enable-machine-outliner -enable-linkonceodr-outlining -mtriple=aarch64-apple-darwin < %s | FileCheck %s -check-prefix=ODR + +define linkonce_odr void @fish() #0 { + ; CHECK-LABEL: _fish: + ; NoODR: orr w8, wzr, #0x1 + ; NoODR-NEXT: stp w8, wzr, [sp, #8] + ; NoODR-NEXT: orr w8, wzr, #0x2 + ; NoODR-NEXT: str w8, [sp, #4] + ; NoODR-NEXT: orr w8, wzr, #0x3 + ; NoODR-NEXT: str w8, [sp], #16 + ; NoODR-NEXT: ret + ; ODR: b l_OUTLINED_FUNCTION_0 + %1 = alloca i32, align 4 + %2 = alloca i32, align 4 + %3 = alloca i32, align 4 + %4 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + store i32 1, i32* %2, align 4 + store i32 2, i32* %3, align 4 + store i32 3, i32* %4, align 4 + ret void +} define void @cat() #0 { -; CHECK-LABEL: _cat: -; CHECK: b l_OUTLINED_FUNCTION_0 -; CHECK-NOT: ret + ; CHECK-LABEL: _cat: + ; CHECK: b l_OUTLINED_FUNCTION_0 + ; CHECK-NOT: ret %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -16,9 +38,9 @@ define void @cat() #0 { } define void @dog() #0 { -; CHECK-LABEL: _dog: -; CHECK: b l_OUTLINED_FUNCTION_0 -; CHECK-NOT: ret + ; CHECK-LABEL: _dog: + ; CHECK: b l_OUTLINED_FUNCTION_0 + ; CHECK-NOT: ret %1 = alloca i32, align 4 %2 = alloca i32, align 4 %3 = alloca i32, align 4 @@ -39,5 +61,4 @@ define void @dog() #0 { ; CHECK-NEXT: str w8, [sp], #16 ; CHECK-NEXT: ret - attributes #0 = { noredzone nounwind ssp uwtable "no-frame-pointer-elim"="false" "target-cpu"="cyclone" } diff --git a/test/CodeGen/AArch64/no-fp-asm-clobbers-crash.ll b/test/CodeGen/AArch64/no-fp-asm-clobbers-crash.ll new file mode 100644 index 0000000000000..5cd8dc57f9adf --- /dev/null +++ b/test/CodeGen/AArch64/no-fp-asm-clobbers-crash.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s | FileCheck %s +; +; Be sure that we ignore clobbers of unallocatable registers, rather than +; crashing. + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +; CHECK-LABEL: foo: +; CHECK: ret +define void @foo() #0 { +entry: + call void asm sideeffect "", "~{v0}"() + call void asm sideeffect "", "~{s0}"() + ret void +} + +attributes #0 = { nounwind "target-features"="-crypto,-fp-armv8,-neon" } diff --git a/test/CodeGen/AArch64/regcoal-physreg.mir b/test/CodeGen/AArch64/regcoal-physreg.mir index f88b7482acacf..095e8a4973ce4 100644 --- a/test/CodeGen/AArch64/regcoal-physreg.mir +++ b/test/CodeGen/AArch64/regcoal-physreg.mir @@ -13,7 +13,7 @@ name: func0 body: | bb.0: ; We usually should not coalesce copies from allocatable physregs. - ; CHECK: %0 = COPY %w7 + ; CHECK: %0:gpr32 = COPY %w7 ; CHECK: STRWui %0, %x1, 0 %0 : gpr32 = COPY %w7 STRWui %0, %x1, 0 @@ -26,7 +26,7 @@ body: | ; It is not fine to coalesce copies from reserved physregs when they are ; clobbered. - ; CHECK: %2 = COPY %fp + ; CHECK: %2:gpr64 = COPY %fp ; CHECK: STRXui %2, %x1, 0 %2 : gpr64 = COPY %fp %fp = SUBXri %fp, 4, 0 @@ -56,14 +56,14 @@ body: | ; Only coalesce when the source register is reserved as a whole (this is ; a limitation of the current code which cannot update liveness information ; of the non-reserved part). - ; CHECK: %6 = COPY %x28_fp + ; CHECK: %6:xseqpairsclass = COPY %x28_fp ; CHECK: HINT 0, implicit %6 %6 : xseqpairsclass = COPY %x28_fp HINT 0, implicit %6 ; It is not fine to coalesce copies from reserved physregs when they are ; clobbered by the regmask on a call. - ; CHECK: %7 = COPY %x18 + ; CHECK: %7:gpr64 = COPY %x18 ; CHECK: BL @f2, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp ; CHECK: STRXui %7, %x1, 0 @@ -80,7 +80,7 @@ body: | ; Cannot coalesce when there are reads of the physreg. ; CHECK-NOT: %fp = SUBXri %fp, 8, 0 - ; CHECK: %9 = SUBXri %fp, 8, 0 + ; CHECK: %9:gpr64sp = SUBXri %fp, 8, 0 ; CHECK: STRXui %fp, %fp, 0 ; CHECK: %fp = COPY %9 %9 : gpr64sp = SUBXri %fp, 8, 0 @@ -96,7 +96,7 @@ body: | ; Cannot coalesce physreg because we have reads on other CFG paths (we ; currently abort for any control flow) ; CHECK-NOT: %fp = SUBXri - ; CHECK: %0 = SUBXri %fp, 12, 0 + ; CHECK: %0:gpr64sp = SUBXri %fp, 12, 0 ; CHECK: CBZX undef %x0, %bb.1 ; CHECK: B %bb.2 %0 : gpr64sp = SUBXri %fp, 12, 0 diff --git a/test/CodeGen/AArch64/spill-undef.mir b/test/CodeGen/AArch64/spill-undef.mir index 4294df286bd30..c4f589b5cc49e 100644 --- a/test/CodeGen/AArch64/spill-undef.mir +++ b/test/CodeGen/AArch64/spill-undef.mir @@ -5,19 +5,19 @@ --- | ; ModuleID = 'stuff.ll' target triple = "aarch64--" - + @g = external global i32 - + define void @foobar() { ret void } - + ... --- name: foobar alignment: 2 tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr32 } - { id: 1, class: gpr32 } - { id: 2, class: gpr32all } @@ -37,25 +37,25 @@ body: | ; But on that path, we don't care about its value. ; Emit a simple KILL instruction instead of an ; actual spill. - ; CHECK: [[UNDEF:%[0-9]+]] = IMPLICIT_DEF + ; CHECK: [[UNDEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF ; CHECK-NEXT: KILL [[UNDEF]] %8 = IMPLICIT_DEF ; %9 us going to be spilled. ; But it is only partially undef. ; Make sure we spill it properly - ; CHECK: [[NINE:%[0-9]+]] = COPY %x0 - ; CHECK: [[NINE]].sub_32 = IMPLICIT_DEF + ; CHECK: [[NINE:%[0-9]+]]:gpr64 = COPY %x0 + ; CHECK: [[NINE]].sub_32:gpr64 = IMPLICIT_DEF ; CHECK-NEXT: STRXui [[NINE]] %9 = COPY %x0 %9.sub_32 = IMPLICIT_DEF CBNZW %wzr, %bb.2 B %bb.1 - + bb.1: %4 = ADRP target-flags(aarch64-page) @g %8 = LDRWui %4, target-flags(aarch64-pageoff, aarch64-nc) @g :: (volatile dereferenceable load 4 from @g) INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr - + bb.2: INLINEASM $nop, 1, 12, implicit-def dead early-clobber %x0, 12, implicit-def dead early-clobber %x1, 12, implicit-def dead early-clobber %x2, 12, implicit-def dead early-clobber %x3, 12, implicit-def dead early-clobber %x4, 12, implicit-def dead early-clobber %x5, 12, implicit-def dead early-clobber %x6, 12, implicit-def dead early-clobber %x7, 12, implicit-def dead early-clobber %x8, 12, implicit-def dead early-clobber %x9, 12, implicit-def dead early-clobber %x10, 12, implicit-def dead early-clobber %x11, 12, implicit-def dead early-clobber %x12, 12, implicit-def dead early-clobber %x13, 12, implicit-def dead early-clobber %x14, 12, implicit-def dead early-clobber %x15, 12, implicit-def dead early-clobber %x16, 12, implicit-def dead early-clobber %x17, 12, implicit-def dead early-clobber %x18, 12, implicit-def dead early-clobber %x19, 12, implicit-def dead early-clobber %x20, 12, implicit-def dead early-clobber %x21, 12, implicit-def dead early-clobber %x22, 12, implicit-def dead early-clobber %x23, 12, implicit-def dead early-clobber %x24, 12, implicit-def dead early-clobber %x25, 12, implicit-def dead early-clobber %x26, 12, implicit-def dead early-clobber %x27, 12, implicit-def dead early-clobber %x28, 12, implicit-def dead early-clobber %fp, 12, implicit-def dead early-clobber %lr %6 = ADRP target-flags(aarch64-page) @g diff --git a/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll b/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll index cdfb667c26bd7..8f83feac51d86 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll +++ b/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll @@ -5,7 +5,7 @@ ; Tests for add. ; CHECK: name: addi32 -; CHECK: {{%[0-9]+}}(s32) = G_ADD +; CHECK: {{%[0-9]+}}:_(s32) = G_ADD define amdgpu_kernel void @addi32(i32 %arg1, i32 %arg2) { %res = add i32 %arg1, %arg2 store i32 %res, i32 addrspace(1)* undef diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir index 56a9e7022db9c..9b53b029691eb 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -13,7 +13,7 @@ legalized: true regBankSelected: true # GCN: global_addrspace -# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 +# GCN: [[PTR:%[0-9]+]]:vreg_64 = COPY %vgpr0_vgpr1 # GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0 body: | diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir index ea2ad2ba83a52..4c05383615a68 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -14,83 +14,83 @@ legalized: true regBankSelected: true # GCN: body: -# GCN: [[PTR:%[0-9]+]] = COPY %sgpr0_sgpr1 +# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY %sgpr0_sgpr1 # Immediate offset: # SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0 -# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0 +# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0 # Max immediate offset for SI # SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0 # VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0 # Immediate overflow for SI -# SI: [[K1024:%[0-9]+]] = S_MOV_B32 1024 +# SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 # SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0 # VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0 # Max immediate offset for VI -# SI: [[K1048572:%[0-9]+]] = S_MOV_B32 1048572 +# SI: [[K1048572:%[0-9]+]]:sreg_32 = S_MOV_B32 1048572 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143 # VI: S_LOAD_DWORD_IMM [[PTR]], 1048572 # # Immediate overflow for VI -# SIVI: [[K1048576:%[0-9]+]] = S_MOV_B32 1048576 +# SIVI: [[K1048576:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 # SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0 # Max immediate for CI -# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 4294967292 -# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 3 -# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 -# SIVI-DAG: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 -# SIVI-DAG: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 -# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# SIVI-DAG: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 -# SIVI-DAG: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 -# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] -# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 +# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 +# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3 +# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 +# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 +# SIVI: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1 +# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0 # Immediate overflow for CI -# GCN: [[K_LO:%[0-9]+]] = S_MOV_B32 0 -# GCN: [[K_HI:%[0-9]+]] = S_MOV_B32 4 -# GCN: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 -# GCN-DAG: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 -# GCN-DAG: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 -# GCN: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# GCN-DAG: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 -# GCN-DAG: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 -# GCN: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] -# GCN: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 +# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +# GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4 +# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 +# GCN-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 +# GCN-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 +# GCN: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# GCN-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1 +# GCN-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1 +# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 # GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 # Max 32-bit byte offset -# SIVI: [[K4294967292:%[0-9]+]] = S_MOV_B32 4294967292 +# SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 # SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0 # Overflow 32-bit byte offset -# SIVI: [[K_LO:%[0-9]+]] = S_MOV_B32 0 -# SIVI: [[K_HI:%[0-9]+]] = S_MOV_B32 1 -# SIVI: [[K:%[0-9]+]] = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 -# SIVI-DAG: [[K_SUB0:%[0-9]+]] = COPY [[K]].sub0 -# SIVI-DAG: [[PTR_LO:%[0-9]+]] = COPY [[PTR]].sub0 -# SIVI: [[ADD_PTR_LO:%[0-9]+]] = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] -# SIVI-DAG: [[K_SUB1:%[0-9]+]] = COPY [[K]].sub1 -# SIVI-DAG: [[PTR_HI:%[0-9]+]] = COPY [[PTR]].sub1 -# SIVI: [[ADD_PTR_HI:%[0-9]+]] = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] -# SIVI: [[ADD_PTR:%[0-9]+]] = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 +# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1 +# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], 1, [[K_HI]], 2 +# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0 +# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0 +# SIVI: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] +# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1 +# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1 +# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] +# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], 1, [[ADD_PTR_HI]], 2 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0 body: | bb.0: liveins: %sgpr0_sgpr1 - + %0:sgpr(p2) = COPY %sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4 diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index ea435725bf25d..0b8092778bd49 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -13,8 +13,8 @@ legalized: true regBankSelected: true # GCN: global_addrspace -# GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 -# GCN: [[VAL:%[0-9]+]] = COPY %vgpr2 +# GCN: [[PTR:%[0-9]+]]:vreg_64 = COPY %vgpr0_vgpr1 +# GCN: [[VAL:%[0-9]+]]:vgpr_32 = COPY %vgpr2 # GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0 body: | diff --git a/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll index 6c3563a9c333d..ebcdac39274a4 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -2,7 +2,7 @@ ; CHECK-LABEL: name: test_f32_inreg -; CHECK: [[S0:%[0-9]+]](s32) = COPY %sgpr0 +; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY %sgpr0 ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]] define amdgpu_vs void @test_f32_inreg(float inreg %arg0) { call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 @@ -10,7 +10,7 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) { } ; CHECK-LABEL: name: test_f32 -; CHECK: [[V0:%[0-9]+]](s32) = COPY %vgpr0 +; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY %vgpr0 ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]] define amdgpu_vs void @test_f32(float %arg0) { call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0 @@ -18,7 +18,7 @@ define amdgpu_vs void @test_f32(float %arg0) { } ; CHECK-LABEL: name: test_ptr2_byval -; CHECK: [[S01:%[0-9]+]](p2) = COPY %sgpr0_sgpr1 +; CHECK: [[S01:%[0-9]+]]:_(p2) = COPY %sgpr0_sgpr1 ; CHECK: G_LOAD [[S01]] define amdgpu_vs void @test_ptr2_byval(i32 addrspace(2)* byval %arg0) { %tmp0 = load volatile i32, i32 addrspace(2)* %arg0 @@ -26,7 +26,7 @@ define amdgpu_vs void @test_ptr2_byval(i32 addrspace(2)* byval %arg0) { } ; CHECK-LABEL: name: test_ptr2_inreg -; CHECK: [[S01:%[0-9]+]](p2) = COPY %sgpr0_sgpr1 +; CHECK: [[S01:%[0-9]+]]:_(p2) = COPY %sgpr0_sgpr1 ; CHECK: G_LOAD [[S01]] define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(2)* inreg %arg0) { %tmp0 = load volatile i32, i32 addrspace(2)* %arg0 @@ -34,8 +34,8 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(2)* inreg %arg0) { } ; CHECK-LABEL: name: test_sgpr_alignment0 -; CHECK: [[S0:%[0-9]+]](s32) = COPY %sgpr0 -; CHECK: [[S23:%[0-9]+]](p2) = COPY %sgpr2_sgpr3 +; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY %sgpr0 +; CHECK: [[S23:%[0-9]+]]:_(p2) = COPY %sgpr2_sgpr3 ; CHECK: G_LOAD [[S23]] ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]] define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(2)* inreg %arg1) { @@ -45,11 +45,11 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(2)* } ; CHECK-LABEL: name: test_order -; CHECK: [[S0:%[0-9]+\(s32\)]] = COPY %sgpr0 -; CHECK: [[S1:%[0-9]+\(s32\)]] = COPY %sgpr1 -; CHECK: [[V0:%[0-9]+\(s32\)]] = COPY %vgpr0 -; CHECK: [[V1:%[0-9]+\(s32\)]] = COPY %vgpr1 -; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]], [[S0]], [[V1]], [[S1]] +; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY %sgpr0 +; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY %sgpr1 +; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY %vgpr0 +; CHECK: [[V1:%[0-9]+]]:_(s32) = COPY %vgpr1 +; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32) define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %arg2, float %arg3) { call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0 ret void diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir index f10c896a7af66..60cb6a8244cd4 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -13,9 +14,11 @@ registers: body: | bb.0: liveins: %vgpr0, %vgpr1 - ; CHECK-LABEL: name: test_add - ; CHECK: %2(s32) = G_ADD %0, %1 + ; CHECK-LABEL: name: test_add + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %vgpr1 + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] %0(s32) = COPY %vgpr0 %1(s32) = COPY %vgpr1 %2(s32) = G_ADD %0, %1 diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index 50ef150510d22..a0f163d573c33 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -13,9 +14,11 @@ registers: body: | bb.0: liveins: %vgpr0, %vgpr1 - ; CHECK-LABEL: name: test_and - ; CHECK: %2(s32) = G_AND %0, %1 + ; CHECK-LABEL: name: test_and + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %vgpr1 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] %0(s32) = COPY %vgpr0 %1(s32) = COPY %vgpr1 %2(s32) = G_AND %0, %1 diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir index e27c313b8ec0f..f8b5c99418fa9 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -13,10 +14,11 @@ registers: body: | bb.0: liveins: %vgpr0 - ; CHECK-LABEL: name: test_bitcast - ; CHECK: %1(<2 x s16>) = G_BITCAST %0 - ; CHECK: %2(s32) = G_BITCAST %1 + ; CHECK-LABEL: name: test_bitcast + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %vgpr0 + ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) %0(s32) = COPY %vgpr0 %1(<2 x s16>) = G_BITCAST %0 %2(s32) = G_BITCAST %1 diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir index b3e41c7751c53..6e2065207f9b0 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -24,10 +25,11 @@ registers: - { id: 1, class: _ } body: | bb.0.entry: - ; CHECK-LABEL: name: test_constant - ; CHECK: %0(s32) = G_CONSTANT i32 5 - ; CHECK: %1(s1) = G_CONSTANT i1 false + ; CHECK-LABEL: name: test_constant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C1]](s1), [[C1]](s1) %0(s32) = G_CONSTANT i32 5 %1(s1) = G_CONSTANT i1 0 G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %0, %0, %0, %0, %0, %1, %1; @@ -40,10 +42,10 @@ registers: - { id: 1, class: _ } body: | bb.0.entry: - ; CHECK-LABEL: name: test_fconstant - ; CHECK: %0(s32) = G_FCONSTANT float 1.000000e+00 - ; CHECK: %1(s32) = G_FCONSTANT float 7.5 + ; CHECK-LABEL: name: test_fconstant + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 7.500000e+00 %0(s32) = G_FCONSTANT float 1.0 %1(s32) = G_FCONSTANT float 7.5 ... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir index 2191ff4356c76..7db9c36b75041 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -13,9 +14,11 @@ registers: body: | bb.0: liveins: %vgpr0, %vgpr1 - ; CHECK-LABEL: name: test_fmul - ; CHECK: %2(s32) = G_FMUL %0, %1 + ; CHECK-LABEL: name: test_fmul + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %vgpr1 + ; CHECK: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] %0(s32) = COPY %vgpr0 %1(s32) = COPY %vgpr1 %2(s32) = G_FMUL %0, %1 diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir index ebd473d769b37..8508f2706fa68 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-icmp.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -16,9 +17,11 @@ registers: body: | bb.0.entry: liveins: %vgpr0 + ; CHECK-LABEL: name: test_icmp + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %vgpr0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] %0(s32) = G_CONSTANT i32 0 %1(s32) = COPY %vgpr0 - - ; CHECK: %2(s1) = G_ICMP intpred(ne), %0(s32), %1 %2(s1) = G_ICMP intpred(ne), %0, %1 ... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index 4057e414697b9..879cd47f8c6ba 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -12,9 +13,11 @@ registers: body: | bb.0: liveins: %vgpr0, %vgpr1 - ; CHECK-LABEL: name: test_or - ; CHECK: %2(s32) = G_OR %0, %1 + ; CHECK-LABEL: name: test_or + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %vgpr1 + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] %0(s32) = COPY %vgpr0 %1(s32) = COPY %vgpr1 %2(s32) = G_OR %0, %1 diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir index d11130936bd9b..09f00936a6348 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -march=amdgcn -mcpu=fiji -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- | @@ -16,13 +17,19 @@ registers: body: | bb.0: liveins: %vgpr0 + ; CHECK-LABEL: name: test_select + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %vgpr0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[C]](s32), [[COPY]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] %0(s32) = G_CONSTANT i32 0 %1(s32) = COPY %vgpr0 %2(s1) = G_ICMP intpred(ne), %0, %1 %3(s32) = G_CONSTANT i32 1 %4(s32) = G_CONSTANT i32 2 - ; CHECK: %5(s32) = G_SELECT %2(s1), %3, %4 %5(s32) = G_SELECT %2, %3, %4 ... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index 3d5251d102072..feecb7728cf0c 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s --- @@ -9,9 +10,11 @@ registers: body: | bb.0.entry: liveins: %vgpr0, %vgpr1 - ; CHECK-LABEL: name: test_shl - ; CHECK: %2(s32) = G_SHL %0, %1 + ; CHECK-LABEL: name: test_shl + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %vgpr1 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]] %0(s32) = COPY %vgpr0 %1(s32) = COPY %vgpr1 %2(s32) = G_SHL %0, %1 diff --git a/test/CodeGen/AMDGPU/InlineAsmCrash.ll b/test/CodeGen/AMDGPU/InlineAsmCrash.ll new file mode 100644 index 0000000000000..8ad1cbb9a32f5 --- /dev/null +++ b/test/CodeGen/AMDGPU/InlineAsmCrash.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +; CHECK: ;;#ASMSTART +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: ;;#ASMEND + +define void @foo(i32* %ptr) { + %tmp = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm "s_nop 0", "=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65"(i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2) + %tmp2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %tmp, 0 + store i32 %tmp2, i32* %ptr, align 4 + ret void +} diff --git a/test/CodeGen/AMDGPU/amdpal-cs.ll b/test/CodeGen/AMDGPU/amdpal-cs.ll new file mode 100644 index 0000000000000..6ffca4c4565e6 --- /dev/null +++ b/test/CodeGen/AMDGPU/amdpal-cs.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -enable-var-scope %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; amdpal compute shader: check for 0x2e12 (COMPUTE_PGM_RSRC1) in pal metadata +; GCN-LABEL: {{^}}cs_amdpal: +; GCN: .amd_amdgpu_pal_metadata{{.*}}0x2e12, +define amdgpu_cs half @cs_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + diff --git a/test/CodeGen/AMDGPU/amdpal-es.ll b/test/CodeGen/AMDGPU/amdpal-es.ll new file mode 100644 index 0000000000000..049e9ae2564e3 --- /dev/null +++ b/test/CodeGen/AMDGPU/amdpal-es.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; amdpal evaluation shader: check for 0x2cca (SPI_SHADER_PGM_RSRC1_ES) in pal metadata +; GCN-LABEL: {{^}}es_amdpal: +; GCN: .amd_amdgpu_pal_metadata{{.*}}0x2cca, +define amdgpu_es half @es_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + + diff --git a/test/CodeGen/AMDGPU/amdpal-gs.ll b/test/CodeGen/AMDGPU/amdpal-gs.ll new file mode 100644 index 0000000000000..89e7cf92c1f65 --- /dev/null +++ b/test/CodeGen/AMDGPU/amdpal-gs.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; amdpal geometry shader: check for 0x2c8a (SPI_SHADER_PGM_RSRC1_GS) in pal metadata +; GCN-LABEL: {{^}}gs_amdpal: +; GCN: .amd_amdgpu_pal_metadata{{.*}}0x2c8a, +define amdgpu_gs half @gs_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + + diff --git a/test/CodeGen/AMDGPU/amdpal-hs.ll b/test/CodeGen/AMDGPU/amdpal-hs.ll new file mode 100644 index 0000000000000..b867717ee2377 --- /dev/null +++ b/test/CodeGen/AMDGPU/amdpal-hs.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; amdpal hull shader: check for 0x2d0a (SPI_SHADER_PGM_RSRC1_HS) in pal metadata +; GCN-LABEL: {{^}}hs_amdpal: +; GCN: .amd_amdgpu_pal_metadata{{.*}}0x2d0a, +define amdgpu_hs half @hs_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + + diff --git a/test/CodeGen/AMDGPU/amdpal-ls.ll b/test/CodeGen/AMDGPU/amdpal-ls.ll new file mode 100644 index 0000000000000..7168a3c77b87b --- /dev/null +++ b/test/CodeGen/AMDGPU/amdpal-ls.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; amdpal load shader: check for 0x2d4a (SPI_SHADER_PGM_RSRC1_LS) in pal metadata +; GCN-LABEL: {{^}}ls_amdpal: +; GCN: .amd_amdgpu_pal_metadata{{.*}}0x2d4a, +define amdgpu_ls half @ls_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + + diff --git a/test/CodeGen/AMDGPU/amdpal-ps.ll b/test/CodeGen/AMDGPU/amdpal-ps.ll new file mode 100644 index 0000000000000..5e19c774a761e --- /dev/null +++ b/test/CodeGen/AMDGPU/amdpal-ps.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; amdpal pixel shader: check for 0x2c0a (SPI_SHADER_PGM_RSRC1_PS) in pal +; metadata. Check for 0x2c0b (SPI_SHADER_PGM_RSRC2_PS) in pal metadata, and +; it has a value starting 0x42 as it is set to 0x42000000 in the metadata +; below. Also check that key 0x10000000 value 0x12345678 is propagated. +; GCN-LABEL: {{^}}ps_amdpal: +; GCN: .amd_amdgpu_pal_metadata{{.*0x2c0a,[^,]*,0x2c0b,0x42.*,0x10000000,0x12345678}} +define amdgpu_ps half @ps_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +!amdgpu.pal.metadata = !{!0} +!0 = !{i32 11275, i32 1107296256, i32 268435456, i32 305419896} diff --git a/test/CodeGen/AMDGPU/amdpal-psenable.ll b/test/CodeGen/AMDGPU/amdpal-psenable.ll new file mode 100644 index 0000000000000..c1494d0d4c43b --- /dev/null +++ b/test/CodeGen/AMDGPU/amdpal-psenable.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; This pixel shader does not use the result of its interpolation, so it would +; end up with an interpolation mode set in PSAddr but not PSEnable. This test tests +; the workaround that ensures that an interpolation mode is also set in PSEnable. +; GCN-LABEL: {{^}}amdpal_psenable: +; GCN: .amd_amdgpu_pal_metadata{{.*}}0xa1b3,0x2,0xa1b4,0x2, +define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inreg %m0, <2 x float> %pos) #6 { + %inst23 = extractelement <2 x float> %pos, i32 0 + %inst24 = extractelement <2 x float> %pos, i32 1 + %inst25 = tail call float @llvm.amdgcn.interp.p1(float %inst23, i32 0, i32 0, i32 %m0) + %inst26 = tail call float @llvm.amdgcn.interp.p2(float %inst25, float %inst24, i32 0, i32 0, i32 %m0) + ret void +} + +declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2 +declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2 + +attributes #6 = { nounwind "InitialPSInputAddr"="2" } diff --git a/test/CodeGen/AMDGPU/amdpal-vs.ll b/test/CodeGen/AMDGPU/amdpal-vs.ll new file mode 100644 index 0000000000000..a13205dabd045 --- /dev/null +++ b/test/CodeGen/AMDGPU/amdpal-vs.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -enable-var-scope %s + +; amdpal vertex shader: check for 45352 (SPI_SHADER_PGM_RSRC1_VS) in pal metadata +; GCN-LABEL: {{^}}vs_amdpal: +; GCN: .amd_amdgpu_pal_metadata{{.*}}0x2c4a, +define amdgpu_vs half @vs_amdpal(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + + diff --git a/test/CodeGen/AMDGPU/amdpal.ll b/test/CodeGen/AMDGPU/amdpal.ll new file mode 100644 index 0000000000000..3c8a490b40e6b --- /dev/null +++ b/test/CodeGen/AMDGPU/amdpal.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -mtriple=amdgcn--amdpal -mcpu=tahiti | FileCheck --check-prefix=PAL --enable-var-scope %s + +; PAL: .AMDGPU.config + +define amdgpu_kernel void @simple(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} + +; Check code sequence for amdpal use of scratch for alloca. This is the case +; where the high half of the address comes from s_getpc. + +; PAL-LABEL: {{^}}scratch: +; PAL: s_getpc_b64 s{{\[}}[[GITPTR:[0-9]+]]: +; PAL: s_mov_b32 s[[GITPTR]], s0 +; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]: +; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]: + +define amdgpu_kernel void @scratch(<2 x i32> %in, i32 %idx, i32* %out) { +entry: + %v = alloca [2 x i32] + %vv = bitcast [2 x i32]* %v to <2 x i32>* + store <2 x i32> %in, <2 x i32>* %vv + %e = getelementptr [2 x i32], [2 x i32]* %v, i32 0, i32 %idx + %x = load i32, i32* %e + store i32 %x, i32* %out + ret void +} + +; Check code sequence for amdpal use of scratch for alloca. This is the case +; where the amdgpu-git-ptr-high function attribute gives the high half of the +; address to use. +; Looks like you can't do arithmetic on a filecheck variable, so we can't test +; that the s_movk_i32 is into a reg that is one more than the following +; s_mov_b32. + +; PAL-LABEL: {{^}}scratch2: +; PAL: s_movk_i32 s{{[0-9]+}}, 0x1234 +; PAL: s_mov_b32 s[[GITPTR:[0-9]+]], s0 +; PAL: s_load_dwordx4 s{{\[}}[[SCRATCHDESC:[0-9]+]]:{{[0-9]+]}}, s{{\[}}[[GITPTR]]: +; PAL: buffer_store{{.*}}, s{{\[}}[[SCRATCHDESC]]: + +define amdgpu_kernel void @scratch2(<2 x i32> %in, i32 %idx, i32* %out) #0 { +entry: + %v = alloca [2 x i32] + %vv = bitcast [2 x i32]* %v to <2 x i32>* + store <2 x i32> %in, <2 x i32>* %vv + %e = getelementptr [2 x i32], [2 x i32]* %v, i32 0, i32 %idx + %x = load i32, i32* %e + store i32 %x, i32* %out + ret void +} + +attributes #0 = { nounwind "amdgpu-git-ptr-high"="0x1234" } diff --git a/test/CodeGen/AMDGPU/anyext.ll b/test/CodeGen/AMDGPU/anyext.ll index 3f220c4084129..cabf0578aaf0c 100644 --- a/test/CodeGen/AMDGPU/anyext.ll +++ b/test/CodeGen/AMDGPU/anyext.ll @@ -1,12 +1,13 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,GFX89 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX89 %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone ; GCN-LABEL: {{^}}anyext_i1_i32: ; GCN: v_cndmask_b32_e64 -define amdgpu_kernel void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) { +define amdgpu_kernel void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) #0 { entry: %tmp = icmp eq i32 %cond, 0 %tmp1 = zext i1 %tmp to i8 @@ -18,11 +19,11 @@ entry: } ; GCN-LABEL: {{^}}s_anyext_i16_i32: -; VI: v_add_u16_e32 [[ADD:v[0-9]+]], -; VI: v_xor_b32_e32 [[XOR:v[0-9]+]], -1, [[ADD]] -; VI: v_and_b32_e32 [[AND:v[0-9]+]], 1, [[XOR]] -; VI: buffer_store_dword [[AND]] -define amdgpu_kernel void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %a, i16 addrspace(1)* %b) { +; GFX89: v_add_u16_e32 [[ADD:v[0-9]+]], +; GFX89: v_xor_b32_e32 [[XOR:v[0-9]+]], -1, [[ADD]] +; GFX89: v_and_b32_e32 [[AND:v[0-9]+]], 1, [[XOR]] +; GFX89: buffer_store_dword [[AND]] +define amdgpu_kernel void @s_anyext_i16_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %a, i16 addrspace(1)* %b) #0 { entry: %tid.x = call i32 @llvm.amdgcn.workitem.id.x() %tid.y = call i32 @llvm.amdgcn.workitem.id.y() @@ -38,3 +39,27 @@ entry: store i32 %tmp4, i32 addrspace(1)* %out ret void } + +; GCN-LABEL: {{^}}anyext_v2i16_to_v2i32: +; GFX9: global_load_short_d16_hi +; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0x80008000 +; GFX9: v_bfi_b32 v{{[0-9]+}}, v{{[0-9]+}}, 0, v{{[0-9]+}} +; GFX9: v_cmp_eq_f32_e32 +; GFX9: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc +define amdgpu_kernel void @anyext_v2i16_to_v2i32() #0 { +bb: + %tmp = load i16, i16 addrspace(1)* undef, align 2 + %tmp2 = insertelement <2 x i16> undef, i16 %tmp, i32 1 + %tmp4 = and <2 x i16> %tmp2, + %tmp5 = zext <2 x i16> %tmp4 to <2 x i32> + %tmp6 = shl nuw <2 x i32> %tmp5, + %tmp7 = or <2 x i32> zeroinitializer, %tmp6 + %tmp8 = bitcast <2 x i32> %tmp7 to <2 x float> + %tmp10 = fcmp oeq <2 x float> %tmp8, zeroinitializer + %tmp11 = zext <2 x i1> %tmp10 to <2 x i8> + %tmp12 = extractelement <2 x i8> %tmp11, i32 1 + store i8 %tmp12, i8 addrspace(1)* undef, align 1 + ret void +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll b/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll index a0694fb1e3c91..7fe5604c3ec72 100644 --- a/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll +++ b/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=HSAMD %s ; CHECK-LABEL: {{^}}min_64_max_64: ; CHECK: SGPRBlocks: 0 @@ -127,3 +128,15 @@ define amdgpu_kernel void @min_1024_max_2048() #3 { ret void } attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"} + +; HSAMD: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) +; HSAMD: Version: [ 1, 0 ] +; HSAMD: Kernels: +; HSAMD: - Name: min_64_max_64 +; HSAMD: MaxFlatWorkGroupSize: 64 +; HSAMD: - Name: min_64_max_128 +; HSAMD: MaxFlatWorkGroupSize: 128 +; HSAMD: - Name: min_128_max_128 +; HSAMD: MaxFlatWorkGroupSize: 128 +; HSAMD: - Name: min_1024_max_2048 +; HSAMD: MaxFlatWorkGroupSize: 2048 diff --git a/test/CodeGen/AMDGPU/branch-relax-bundle.ll b/test/CodeGen/AMDGPU/branch-relax-bundle.ll new file mode 100644 index 0000000000000..9f0b8d3553ee0 --- /dev/null +++ b/test/CodeGen/AMDGPU/branch-relax-bundle.ll @@ -0,0 +1,53 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -amdgpu-s-branch-bits=5 < %s | FileCheck -check-prefix=GCN %s + +; Restrict maximum branch to between +15 and -16 dwords + +; Instructions inside a bundle were collectively counted as +; 0-bytes. Make sure this is accounted for when estimating branch +; distances + +; Bundle used for address in call sequence: 20 bytes +; s_getpc_b64 +; s_add_u32 +; s_addc_u32 + +; plus additional overhead +; s_setpc_b64 +; and some register copies + +declare void @func() #0 + +; GCN-LABEL: {{^}}bundle_size: +; GCN: s_cbranch_scc0 [[BB_EXPANSION:BB[0-9]+_[0-9]+]] +; GCN: s_getpc_b64 +; GCN-NEXT: s_add_u32 +; GCN-NEXT: s_addc_u32 +; GCN-NEXT: s_setpc_b64 + +; GCN: {{^}}[[BB_EXPANSION]]: +; GCN: s_getpc_b64 +; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, func@ +; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, func@ +; GCN: s_swappc_b64 +define amdgpu_kernel void @bundle_size(i32 addrspace(1)* %arg, i32 %cnd) #0 { +bb: + %cmp = icmp eq i32 %cnd, 0 + br i1 %cmp, label %bb3, label %bb2 ; +8 dword branch + +bb2: + call void @func() + call void asm sideeffect + "v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64 + v_nop_e64", ""() #0 + br label %bb3 + +bb3: + store volatile i32 %cnd, i32 addrspace(1)* %arg + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/AMDGPU/calling-conventions.ll b/test/CodeGen/AMDGPU/calling-conventions.ll index 9bf94a1f2da06..5a6fe9ac190b6 100644 --- a/test/CodeGen/AMDGPU/calling-conventions.ll +++ b/test/CodeGen/AMDGPU/calling-conventions.ll @@ -76,4 +76,49 @@ define amdgpu_kernel void @call_fastcc() #0 { ret void } -attributes #0 = { nounwind noinline } \ No newline at end of file +; Mesa compute shader: check for 47176 (COMPUTE_PGM_RSRC1) in .AMDGPU.config +; GCN-LABEL: .AMDGPU.config +; GCN: .long 47176 +; GCN-LABEL: {{^}}cs_mesa: +define amdgpu_cs half @cs_mesa(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; Mesa pixel shader: check for 45096 (SPI_SHADER_PGM_RSRC1_PS) in .AMDGPU.config +; GCN-LABEL: .AMDGPU.config +; GCN: .long 45096 +; GCN-LABEL: {{^}}ps_mesa: +define amdgpu_ps half @ps_mesa(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; Mesa vertex shader: check for 45352 (SPI_SHADER_PGM_RSRC1_VS) in .AMDGPU.config +; GCN-LABEL: .AMDGPU.config +; GCN: .long 45352 +; GCN-LABEL: {{^}}vs_mesa: +define amdgpu_vs half @vs_mesa(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; Mesa geometry shader: check for 45608 (SPI_SHADER_PGM_RSRC1_GS) in .AMDGPU.config +; GCN-LABEL: .AMDGPU.config +; GCN: .long 45608 +; GCN-LABEL: {{^}}gs_mesa: +define amdgpu_gs half @gs_mesa(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +; Mesa hull shader: check for 46120 (SPI_SHADER_PGM_RSRC1_HS) in .AMDGPU.config +; GCN-LABEL: .AMDGPU.config +; GCN: .long 46120 +; GCN-LABEL: {{^}}hs_mesa: +define amdgpu_hs half @hs_mesa(half %arg0) { + %add = fadd half %arg0, 1.0 + ret half %add +} + +attributes #0 = { nounwind noinline } diff --git a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir index 90fba03420901..8ab99c6d2969d 100644 --- a/test/CodeGen/AMDGPU/clamp-omod-special-case.mir +++ b/test/CodeGen/AMDGPU/clamp-omod-special-case.mir @@ -1,8 +1,8 @@ # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s --- # GCN-LABEL: name: v_max_self_clamp_not_set_f32 -# GCN: %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec -# GCN-NEXT: %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit %exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec +# GCN-NEXT: %21:vgpr_32 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit %exec name: v_max_self_clamp_not_set_f32 tracksRegLiveness: true @@ -64,8 +64,8 @@ body: | ... --- # GCN-LABEL: name: v_clamp_omod_already_set_f32 -# GCN: %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec -# GCN: %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit %exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec +# GCN: %21:vgpr_32 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit %exec name: v_clamp_omod_already_set_f32 tracksRegLiveness: true registers: @@ -127,8 +127,8 @@ body: | # Don't fold a mul that looks like an omod if itself has omod set # GCN-LABEL: name: v_omod_mul_omod_already_set_f32 -# GCN: %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec -# GCN-NEXT: %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit %exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec +# GCN-NEXT: %21:vgpr_32 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit %exec name: v_omod_mul_omod_already_set_f32 tracksRegLiveness: true registers: @@ -191,8 +191,8 @@ body: | # Don't fold a mul that looks like an omod if itself has clamp set # This might be OK, but would require folding the clamp at the same time. # GCN-LABEL: name: v_omod_mul_clamp_already_set_f32 -# GCN: %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec -# GCN-NEXT: %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit %exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec +# GCN-NEXT: %21:vgpr_32 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit %exec name: v_omod_mul_clamp_already_set_f32 tracksRegLiveness: true @@ -269,8 +269,8 @@ body: | # Don't fold a mul that looks like an omod if itself has omod set # GCN-LABEL: name: v_omod_add_omod_already_set_f32 -# GCN: %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec -# GCN-NEXT: %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit %exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec +# GCN-NEXT: %21:vgpr_32 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit %exec name: v_omod_add_omod_already_set_f32 tracksRegLiveness: true registers: @@ -333,8 +333,8 @@ body: | # Don't fold a mul that looks like an omod if itself has clamp set # This might be OK, but would require folding the clamp at the same time. # GCN-LABEL: name: v_omod_add_clamp_already_set_f32 -# GCN: %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec -# GCN-NEXT: %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit %exec +# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit %exec +# GCN-NEXT: %21:vgpr_32 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit %exec name: v_omod_add_clamp_already_set_f32 tracksRegLiveness: true diff --git a/test/CodeGen/AMDGPU/clamp.ll b/test/CodeGen/AMDGPU/clamp.ll index 2d6b4f3c0c574..216ecf7634566 100644 --- a/test/CodeGen/AMDGPU/clamp.ll +++ b/test/CodeGen/AMDGPU/clamp.ll @@ -663,6 +663,28 @@ define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out, ret void } +; GCN-LABEL: {{^}}v_clamp_diff_source_f32: +; GCN: v_add_f32_e32 [[A:v[0-9]+]] +; GCN: v_add_f32_e32 [[B:v[0-9]+]] +; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[B]] clamp{{$}} +define amdgpu_kernel void @v_clamp_diff_source_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 +{ + %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 0 + %gep1 = getelementptr float, float addrspace(1)* %aptr, i32 1 + %gep2 = getelementptr float, float addrspace(1)* %aptr, i32 2 + %l0 = load float, float addrspace(1)* %gep0 + %l1 = load float, float addrspace(1)* %gep1 + %l2 = load float, float addrspace(1)* %gep2 + %a = fadd nsz float %l0, %l1 + %b = fadd nsz float %l0, %l2 + %res = call nsz float @llvm.maxnum.f32(float %a, float %b) + %max = call nsz float @llvm.maxnum.f32(float %res, float 0.0) + %min = call nsz float @llvm.minnum.f32(float %max, float 1.0) + %out.gep = getelementptr float, float addrspace(1)* %out, i32 3 + store float %min, float addrspace(1)* %out.gep + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fabs.f32(float) #1 declare float @llvm.minnum.f32(float, float) #1 diff --git a/test/CodeGen/AMDGPU/code-object-metadata-kernel-code-props.ll b/test/CodeGen/AMDGPU/code-object-metadata-kernel-code-props.ll deleted file mode 100644 index 3b232e40cf25c..0000000000000 --- a/test/CodeGen/AMDGPU/code-object-metadata-kernel-code-props.ll +++ /dev/null @@ -1,32 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s - -; CHECK: --- -; CHECK: Version: [ 1, 0 ] - -; CHECK: Kernels: -; CHECK: - Name: test -; CHECK: CodeProps: -; CHECK: KernargSegmentSize: 24 -; GFX700: WavefrontNumSGPRs: 6 -; GFX800: WavefrontNumSGPRs: 96 -; GFX900: WavefrontNumSGPRs: 6 -; GFX700: WorkitemNumVGPRs: 4 -; GFX800: WorkitemNumVGPRs: 6 -; GFX900: WorkitemNumVGPRs: 6 -; CHECK: KernargSegmentAlign: 4 -; CHECK: GroupSegmentAlign: 4 -; CHECK: PrivateSegmentAlign: 4 -; CHECK: WavefrontSize: 6 -define amdgpu_kernel void @test( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b) { -entry: - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b - %r.val = fadd half %a.val, %b.val - store half %r.val, half addrspace(1)* %r - ret void -} diff --git a/test/CodeGen/AMDGPU/combine-ftrunc.ll b/test/CodeGen/AMDGPU/combine-ftrunc.ll new file mode 100644 index 0000000000000..77ab7c3a948c3 --- /dev/null +++ b/test/CodeGen/AMDGPU/combine-ftrunc.ll @@ -0,0 +1,92 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}combine_ftrunc_frint_f64: +; GCN: v_rndne_f64_e32 [[RND:v\[[0-9:]+\]]], +; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], [[RND]] +define amdgpu_kernel void @combine_ftrunc_frint_f64(double addrspace(1)* %p) { + %v = load double, double addrspace(1)* %p, align 8 + %round = tail call double @llvm.rint.f64(double %v) + %trunc = tail call double @llvm.trunc.f64(double %round) + store double %trunc, double addrspace(1)* %p, align 8 + ret void +} + +; GCN-LABEL: {{^}}combine_ftrunc_frint_f32: +; GCN: v_rndne_f32_e32 [[RND:v[0-9]+]], +; GCN: flat_store_dword v[{{[0-9:]+}}], [[RND]] +define amdgpu_kernel void @combine_ftrunc_frint_f32(float addrspace(1)* %p) { + %v = load float, float addrspace(1)* %p, align 4 + %round = tail call float @llvm.rint.f32(float %v) + %trunc = tail call float @llvm.trunc.f32(float %round) + store float %trunc, float addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}combine_ftrunc_frint_v2f32: +; GCN: s_load_dwordx2 +; GCN: s_load_dwordx2 s{{\[}}[[SRC1:[0-9]+]]:[[SRC2:[0-9]+]]{{\]}} +; GCN-DAG: v_rndne_f32_e32 v[[RND1:[0-9]+]], s[[SRC1]] +; GCN-DAG: v_rndne_f32_e32 v[[RND2:[0-9]+]], s[[SRC2]] +; GCN: flat_store_dwordx2 v[{{[0-9:]+}}], v{{\[}}[[RND1]]:[[RND2]]{{\]}} +define amdgpu_kernel void @combine_ftrunc_frint_v2f32(<2 x float> addrspace(1)* %p) { + %v = load <2 x float>, <2 x float> addrspace(1)* %p, align 8 + %round = tail call <2 x float> @llvm.rint.v2f32(<2 x float> %v) + %trunc = tail call <2 x float> @llvm.trunc.v2f32(<2 x float> %round) + store <2 x float> %trunc, <2 x float> addrspace(1)* %p, align 8 + ret void +} + +; GCN-LABEL: {{^}}combine_ftrunc_fceil_f32: +; GCN: v_ceil_f32_e32 [[RND:v[0-9]+]], +; GCN: flat_store_dword v[{{[0-9:]+}}], [[RND]] +define amdgpu_kernel void @combine_ftrunc_fceil_f32(float addrspace(1)* %p) { + %v = load float, float addrspace(1)* %p, align 4 + %round = tail call float @llvm.ceil.f32(float %v) + %trunc = tail call float @llvm.trunc.f32(float %round) + store float %trunc, float addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}combine_ftrunc_ffloor_f32: +; GCN: v_floor_f32_e32 [[RND:v[0-9]+]], +; GCN: flat_store_dword v[{{[0-9:]+}}], [[RND]] +define amdgpu_kernel void @combine_ftrunc_ffloor_f32(float addrspace(1)* %p) { + %v = load float, float addrspace(1)* %p, align 4 + %round = tail call float @llvm.floor.f32(float %v) + %trunc = tail call float @llvm.trunc.f32(float %round) + store float %trunc, float addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}combine_ftrunc_fnearbyint_f32: +; GCN: v_rndne_f32_e32 [[RND:v[0-9]+]], +; GCN: flat_store_dword v[{{[0-9:]+}}], [[RND]] +define amdgpu_kernel void @combine_ftrunc_fnearbyint_f32(float addrspace(1)* %p) { + %v = load float, float addrspace(1)* %p, align 4 + %round = tail call float @llvm.nearbyint.f32(float %v) + %trunc = tail call float @llvm.trunc.f32(float %round) + store float %trunc, float addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}combine_ftrunc_ftrunc_f32: +; GCN: s_load_dword [[SRC:s[0-9]+]], +; GCN: v_trunc_f32_e32 [[RND:v[0-9]+]], [[SRC]] +; GCN: flat_store_dword v[{{[0-9:]+}}], [[RND]] +define amdgpu_kernel void @combine_ftrunc_ftrunc_f32(float addrspace(1)* %p) { + %v = load float, float addrspace(1)* %p, align 4 + %round = tail call float @llvm.trunc.f32(float %v) + %trunc = tail call float @llvm.trunc.f32(float %round) + store float %trunc, float addrspace(1)* %p, align 4 + ret void +} + +declare double @llvm.trunc.f64(double) +declare float @llvm.trunc.f32(float) +declare <2 x float> @llvm.trunc.v2f32(<2 x float>) +declare double @llvm.rint.f64(double) +declare float @llvm.rint.f32(float) +declare <2 x float> @llvm.rint.v2f32(<2 x float>) +declare float @llvm.ceil.f32(float) +declare float @llvm.floor.f32(float) +declare float @llvm.nearbyint.f32(float) diff --git a/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir index 0401f7b07e218..d29c6afe7d4d6 100644 --- a/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ b/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -2,7 +2,7 @@ ... # GCN-LABEL: name: s_fold_and_imm_regimm_32{{$}} -# GCN: %10 = V_MOV_B32_e32 1543, implicit %exec +# GCN: %10:vgpr_32 = V_MOV_B32_e32 1543, implicit %exec # GCN: BUFFER_STORE_DWORD_OFFSET killed %10, name: s_fold_and_imm_regimm_32 alignment: 0 @@ -62,19 +62,19 @@ body: | # GCN-LABEL: name: v_fold_and_imm_regimm_32{{$}} -# GCN: %9 = V_MOV_B32_e32 646, implicit %exec +# GCN: %9:vgpr_32 = V_MOV_B32_e32 646, implicit %exec # GCN: FLAT_STORE_DWORD %19, %9, -# GCN: %10 = V_MOV_B32_e32 646, implicit %exec +# GCN: %10:vgpr_32 = V_MOV_B32_e32 646, implicit %exec # GCN: FLAT_STORE_DWORD %19, %10 -# GCN: %11 = V_MOV_B32_e32 646, implicit %exec +# GCN: %11:vgpr_32 = V_MOV_B32_e32 646, implicit %exec # GCN: FLAT_STORE_DWORD %19, %11, -# GCN: %12 = V_MOV_B32_e32 1234567, implicit %exec +# GCN: %12:vgpr_32 = V_MOV_B32_e32 1234567, implicit %exec # GCN: FLAT_STORE_DWORD %19, %12, -# GCN: %13 = V_MOV_B32_e32 63, implicit %exec +# GCN: %13:vgpr_32 = V_MOV_B32_e32 63, implicit %exec # GCN: FLAT_STORE_DWORD %19, %13, name: v_fold_and_imm_regimm_32 @@ -226,34 +226,34 @@ body: | --- # GCN-LABEL: name: v_fold_shl_imm_regimm_32{{$}} -# GCN: %11 = V_MOV_B32_e32 40955904, implicit %exec +# GCN: %11:vgpr_32 = V_MOV_B32_e32 40955904, implicit %exec # GCN: FLAT_STORE_DWORD %20, %11, -# GCN: %12 = V_MOV_B32_e32 24, implicit %exec +# GCN: %12:vgpr_32 = V_MOV_B32_e32 24, implicit %exec # GCN: FLAT_STORE_DWORD %20, %12, -# GCN: %13 = V_MOV_B32_e32 4096, implicit %exec +# GCN: %13:vgpr_32 = V_MOV_B32_e32 4096, implicit %exec # GCN: FLAT_STORE_DWORD %20, %13, -# GCN: %14 = V_MOV_B32_e32 24, implicit %exec +# GCN: %14:vgpr_32 = V_MOV_B32_e32 24, implicit %exec # GCN: FLAT_STORE_DWORD %20, %14, -# GCN: %15 = V_MOV_B32_e32 0, implicit %exec +# GCN: %15:vgpr_32 = V_MOV_B32_e32 0, implicit %exec # GCN: FLAT_STORE_DWORD %20, %15, -# GCN: %22 = V_MOV_B32_e32 4096, implicit %exec +# GCN: %22:vgpr_32 = V_MOV_B32_e32 4096, implicit %exec # GCN: FLAT_STORE_DWORD %20, %22, -# GCN: %23 = V_MOV_B32_e32 1, implicit %exec +# GCN: %23:vgpr_32 = V_MOV_B32_e32 1, implicit %exec # GCN: FLAT_STORE_DWORD %20, %23, -# GCN: %25 = V_MOV_B32_e32 2, implicit %exec +# GCN: %25:vgpr_32 = V_MOV_B32_e32 2, implicit %exec # GCN: FLAT_STORE_DWORD %20, %25, -# GCN: %26 = V_MOV_B32_e32 7927808, implicit %exec +# GCN: %26:vgpr_32 = V_MOV_B32_e32 7927808, implicit %exec # GCN: FLAT_STORE_DWORD %20, %26, -# GCN: %28 = V_MOV_B32_e32 -8, implicit %exec +# GCN: %28:vgpr_32 = V_MOV_B32_e32 -8, implicit %exec # GCN: FLAT_STORE_DWORD %20, %28, name: v_fold_shl_imm_regimm_32 @@ -367,7 +367,7 @@ body: | --- # GCN-LABEL: name: s_fold_ashr_imm_regimm_32{{$}} -# GCN: %11 = V_MOV_B32_e32 243, implicit %exec +# GCN: %11:vgpr_32 = V_MOV_B32_e32 243, implicit %exec # GCN: BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, name: s_fold_ashr_imm_regimm_32 alignment: 0 @@ -425,34 +425,34 @@ body: | ... # GCN-LABEL: name: v_fold_ashr_imm_regimm_32{{$}} -# GCN: %11 = V_MOV_B32_e32 3903258, implicit %exec +# GCN: %11:vgpr_32 = V_MOV_B32_e32 3903258, implicit %exec # GCN: FLAT_STORE_DWORD %20, %11, -# GCN: %12 = V_MOV_B32_e32 62452139, implicit %exec +# GCN: %12:vgpr_32 = V_MOV_B32_e32 62452139, implicit %exec # GCN: FLAT_STORE_DWORD %20, %12, -# GCN: %13 = V_MOV_B32_e32 1678031, implicit %exec +# GCN: %13:vgpr_32 = V_MOV_B32_e32 1678031, implicit %exec # GCN: FLAT_STORE_DWORD %20, %13, -# GCN: %14 = V_MOV_B32_e32 3, implicit %exec +# GCN: %14:vgpr_32 = V_MOV_B32_e32 3, implicit %exec # GCN: FLAT_STORE_DWORD %20, %14, -# GCN: %15 = V_MOV_B32_e32 -1, implicit %exec +# GCN: %15:vgpr_32 = V_MOV_B32_e32 -1, implicit %exec # GCN: FLAT_STORE_DWORD %20, %15, -# GCN: %22 = V_MOV_B32_e32 62500, implicit %exec +# GCN: %22:vgpr_32 = V_MOV_B32_e32 62500, implicit %exec # GCN: FLAT_STORE_DWORD %20, %22, -# GCN: %23 = V_MOV_B32_e32 500000, implicit %exec +# GCN: %23:vgpr_32 = V_MOV_B32_e32 500000, implicit %exec # GCN: FLAT_STORE_DWORD %20, %23, -# GCN: %25 = V_MOV_B32_e32 1920, implicit %exec +# GCN: %25:vgpr_32 = V_MOV_B32_e32 1920, implicit %exec # GCN: FLAT_STORE_DWORD %20, %25, -# GCN: %26 = V_MOV_B32_e32 487907, implicit %exec +# GCN: %26:vgpr_32 = V_MOV_B32_e32 487907, implicit %exec # GCN: FLAT_STORE_DWORD %20, %26, -# GCN: %28 = V_MOV_B32_e32 -1, implicit %exec +# GCN: %28:vgpr_32 = V_MOV_B32_e32 -1, implicit %exec # GCN: FLAT_STORE_DWORD %20, %28, name: v_fold_ashr_imm_regimm_32 @@ -575,7 +575,7 @@ body: | --- # GCN-LABEL: name: s_fold_lshr_imm_regimm_32{{$}} -# GCN: %11 = V_MOV_B32_e32 1048332, implicit %exec +# GCN: %11:vgpr_32 = V_MOV_B32_e32 1048332, implicit %exec # GCN: BUFFER_STORE_DWORD_OFFSET killed %11, killed %8, name: s_fold_lshr_imm_regimm_32 alignment: 0 @@ -634,34 +634,34 @@ body: | --- # GCN-LABEL: name: v_fold_lshr_imm_regimm_32{{$}} -# GCN: %11 = V_MOV_B32_e32 3903258, implicit %exec +# GCN: %11:vgpr_32 = V_MOV_B32_e32 3903258, implicit %exec # GCN: FLAT_STORE_DWORD %20, %11, -# GCN: %12 = V_MOV_B32_e32 62452139, implicit %exec +# GCN: %12:vgpr_32 = V_MOV_B32_e32 62452139, implicit %exec # GCN: FLAT_STORE_DWORD %20, %12, -# GCN: %13 = V_MOV_B32_e32 1678031, implicit %exec +# GCN: %13:vgpr_32 = V_MOV_B32_e32 1678031, implicit %exec # GCN: FLAT_STORE_DWORD %20, %13, -# GCN: %14 = V_MOV_B32_e32 3, implicit %exec +# GCN: %14:vgpr_32 = V_MOV_B32_e32 3, implicit %exec # GCN: FLAT_STORE_DWORD %20, %14, -# GCN: %15 = V_MOV_B32_e32 1, implicit %exec +# GCN: %15:vgpr_32 = V_MOV_B32_e32 1, implicit %exec # GCN: FLAT_STORE_DWORD %20, %15, -# GCN: %22 = V_MOV_B32_e32 62500, implicit %exec +# GCN: %22:vgpr_32 = V_MOV_B32_e32 62500, implicit %exec # GCN: FLAT_STORE_DWORD %20, %22, -# GCN: %23 = V_MOV_B32_e32 500000, implicit %exec +# GCN: %23:vgpr_32 = V_MOV_B32_e32 500000, implicit %exec # GCN: FLAT_STORE_DWORD %20, %23, -# GCN: %25 = V_MOV_B32_e32 1920, implicit %exec +# GCN: %25:vgpr_32 = V_MOV_B32_e32 1920, implicit %exec # GCN: FLAT_STORE_DWORD %20, %25, -# GCN: %26 = V_MOV_B32_e32 487907, implicit %exec +# GCN: %26:vgpr_32 = V_MOV_B32_e32 487907, implicit %exec # GCN: FLAT_STORE_DWORD %20, %26, -# GCN: %28 = V_MOV_B32_e32 1073741823, implicit %exec +# GCN: %28:vgpr_32 = V_MOV_B32_e32 1073741823, implicit %exec # GCN: FLAT_STORE_DWORD %20, %28, name: v_fold_lshr_imm_regimm_32 @@ -787,7 +787,7 @@ body: | # GCN-LABEL: name: undefined_vreg_operand{{$}} # GCN: bb.0 -# GCN-NEXT: FLAT_STORE_DWORD undef %3, undef %1, +# GCN-NEXT: FLAT_STORE_DWORD undef %3:vreg_64, undef %1:vgpr_32, # GCN-NEXT: S_ENDPGM name: undefined_vreg_operand tracksRegLiveness: true diff --git a/test/CodeGen/AMDGPU/control-flow-optnone.ll b/test/CodeGen/AMDGPU/control-flow-optnone.ll new file mode 100644 index 0000000000000..2122af62735fe --- /dev/null +++ b/test/CodeGen/AMDGPU/control-flow-optnone.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; optnone disables AMDGPUAnnotateUniformValues, so no branch is known +; to be uniform during instruction selection. The custom selection for +; brcond was not checking if the branch was uniform, relying on the +; selection pattern to check that. That would fail, so then the branch +; would fail to select. + +; GCN-LABEL: {{^}}copytoreg_divergent_brcond: +; GCN: s_branch + +; GCN-DAG: v_cmp_lt_i32 +; GCN-DAG: v_cmp_gt_i32 +; GCN: s_and_b64 +; GCN: s_mov_b64 exec + +; GCN: s_or_b64 exec, exec +; GCN: v_cmp_eq_u32 +; GCN: s_cbranch_vccnz +; GCN-NEXT: s_branch +define amdgpu_kernel void @copytoreg_divergent_brcond(i32 %arg, i32 %arg1, i32 %arg2) #0 { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp3 = zext i32 %tmp to i64 + %tmp5 = add i64 %tmp3, undef + %tmp6 = trunc i64 %tmp5 to i32 + %tmp7 = mul nsw i32 %tmp6, %arg2 + br label %bb8 + +bb8.loopexit: ; preds = %bb14 + br label %bb8 + +bb8: ; preds = %bb8.loopexit, %bb + br label %bb9 + +bb9: ; preds = %bb14, %bb8 + %tmp10 = icmp slt i32 %tmp7, %arg1 + %tmp11 = icmp sgt i32 %arg, 0 + %tmp12 = and i1 %tmp10, %tmp11 + br i1 %tmp12, label %bb13, label %bb14 + +bb13: ; preds = %bb9 + store volatile i32 0, i32 addrspace(1)* undef, align 4 + br label %bb14 + +bb14: ; preds = %bb13, %bb9 + %tmp15 = icmp eq i32 %arg2, 1 + br i1 %tmp15, label %bb8.loopexit, label %bb9 +} + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +attributes #0 = { nounwind optnone noinline } +attributes #1 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/test/CodeGen/AMDGPU/cttz_zero_undef.ll index 1bfd38d94bfdf..71b5fd9ba7ae6 100644 --- a/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ b/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -1,8 +1,12 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-NOSDWA -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-SDWA -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s +declare i7 @llvm.cttz.i7(i7, i1) nounwind readnone +declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone +declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone +declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone declare i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -76,3 +80,190 @@ define amdgpu_kernel void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noali store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16 ret void } + +; FUNC-LABEL: {{^}}s_cttz_zero_undef_i8_with_select: +; SI: s_ff1_i32_b32 s{{[0-9]+}}, s{{[0-9]+}} +; EG: MEM_RAT MSKOR +; EG: FFBL_INT +define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noalias %out, i8 %val) nounwind { + %cttz = tail call i8 @llvm.cttz.i8(i8 %val, i1 true) nounwind readnone + %cttz_ret = icmp ne i8 %val, 0 + %ret = select i1 %cttz_ret, i8 %cttz, i8 32 + store i8 %cttz, i8 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_cttz_zero_undef_i16_with_select: +; SI: s_ff1_i32_b32 s{{[0-9]+}}, s{{[0-9]+}} +; EG: MEM_RAT MSKOR +; EG: FFBL_INT +define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(i16 addrspace(1)* noalias %out, i16 %val) nounwind { + %cttz = tail call i16 @llvm.cttz.i16(i16 %val, i1 true) nounwind readnone + %cttz_ret = icmp ne i16 %val, 0 + %ret = select i1 %cttz_ret, i16 %cttz, i16 32 + store i16 %cttz, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32_with_select: +; SI: s_ff1_i32_b32 +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] +; EG: FFBL_INT {{\*? *}}[[RESULT]] +define amdgpu_kernel void @s_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 %val) nounwind { + %cttz = tail call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone + %cttz_ret = icmp ne i32 %val, 0 + %ret = select i1 %cttz_ret, i32 %cttz, i32 32 + store i32 %cttz, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}s_cttz_zero_undef_i64_with_select: +; SI: s_ff1_i32_b32 s{{[0-9]+}}, s{{[0-9]+}} +; SI: s_ff1_i32_b32 s{{[0-9]+}}, s{{[0-9]+}} +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] +define amdgpu_kernel void @s_cttz_zero_undef_i64_with_select(i64 addrspace(1)* noalias %out, i64 %val) nounwind { + %cttz = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) nounwind readnone + %cttz_ret = icmp ne i64 %val, 0 + %ret = select i1 %cttz_ret, i64 %cttz, i64 32 + store i64 %cttz, i64 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_cttz_zero_undef_i8_with_select: +; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; SI-SDWA: v_ffbl_b32_sdwa +; EG: MEM_RAT MSKOR +define amdgpu_kernel void @v_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noalias %out, i8 addrspace(1)* nocapture readonly %arrayidx) nounwind { + %val = load i8, i8 addrspace(1)* %arrayidx, align 1 + %cttz = tail call i8 @llvm.cttz.i8(i8 %val, i1 true) nounwind readnone + %cttz_ret = icmp ne i8 %val, 0 + %ret = select i1 %cttz_ret, i8 %cttz, i8 32 + store i8 %ret, i8 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_cttz_zero_undef_i16_with_select: +; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; SI-SDWA: v_ffbl_b32_sdwa +; EG: MEM_RAT MSKOR +define amdgpu_kernel void @v_cttz_zero_undef_i16_with_select(i16 addrspace(1)* noalias %out, i16 addrspace(1)* nocapture readonly %arrayidx) nounwind { + %val = load i16, i16 addrspace(1)* %arrayidx, align 1 + %cttz = tail call i16 @llvm.cttz.i16(i16 %val, i1 true) nounwind readnone + %cttz_ret = icmp ne i16 %val, 0 + %ret = select i1 %cttz_ret, i16 %cttz, i16 32 + store i16 %ret, i16 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32_with_select: +; SI: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_ne_u32_e32 vcc, 0 +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] +define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind { + %val = load i32, i32 addrspace(1)* %arrayidx, align 1 + %cttz = tail call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone + %cttz_ret = icmp ne i32 %val, 0 + %ret = select i1 %cttz_ret, i32 %cttz, i32 32 + store i32 %ret, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_cttz_zero_undef_i64_with_select: +; SI-NOSDWA: v_or_b32_e32 +; SI-NOSDWA: v_or_b32_e32 +; SI-NOSDWA: v_or_b32_e32 +; SI-SDWA: v_or_b32_sdwa +; SI-NOSDWA: v_or_b32_e32 +; SI-SDWA: v_or_b32_sdwa +; SI: v_or_b32_e32 [[VAL1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_or_b32_e32 [[VAL2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} +; SI-DAG: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]] +; SI-DAG: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL2]] +; SI: v_cmp_eq_u32_e32 vcc, 0 +; SI: v_cmp_ne_u64_e32 vcc, 0 +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] +define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* noalias %out, i64 addrspace(1)* nocapture readonly %arrayidx) nounwind { + %val = load i64, i64 addrspace(1)* %arrayidx, align 1 + %cttz = tail call i64 @llvm.cttz.i64(i64 %val, i1 true) nounwind readnone + %cttz_ret = icmp ne i64 %val, 0 + %ret = select i1 %cttz_ret, i64 %cttz, i64 32 + store i64 %ret, i64 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_cttz_i32_sel_eq_neg1: +; SI: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL:v[0-9]+]] +; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]] +; SI: s_endpgm +; EG: MEM_RAT_CACHELESS STORE_RAW +; EG: FFBL_INT +define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind { + %val = load i32, i32 addrspace(1)* %arrayidx, align 1 + %ctlz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone + %cmp = icmp eq i32 %val, 0 + %sel = select i1 %cmp, i32 -1, i32 %ctlz + store i32 %sel, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_cttz_i32_sel_ne_neg1: +; SI: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL:v[0-9]+]] +; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]] +; SI: s_endpgm +; EG: MEM_RAT_CACHELESS STORE_RAW +; EG: FFBL_INT +define amdgpu_kernel void @v_cttz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind { + %val = load i32, i32 addrspace(1)* %arrayidx, align 1 + %ctlz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone + %cmp = icmp ne i32 %val, 0 + %sel = select i1 %cmp, i32 %ctlz, i32 -1 + store i32 %sel, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_cttz_i32_sel_ne_bitwidth: +; SI: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp +; SI: v_cndmask +; SI: s_endpgm +; EG: MEM_RAT_CACHELESS STORE_RAW +; EG: FFBL_INT +define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind { + %val = load i32, i32 addrspace(1)* %arrayidx, align 1 + %ctlz = call i32 @llvm.cttz.i32(i32 %val, i1 false) nounwind readnone + %cmp = icmp ne i32 %ctlz, 32 + %sel = select i1 %cmp, i32 %ctlz, i32 -1 + store i32 %sel, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_cttz_i8_sel_eq_neg1: +; SI: {{buffer|flat}}_load_ubyte +; SI: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; EG: MEM_RAT MSKOR +; EG: FFBL_INT + define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* nocapture readonly %arrayidx) nounwind { + %val = load i8, i8 addrspace(1)* %arrayidx, align 1 + %ctlz = call i8 @llvm.cttz.i8(i8 %val, i1 false) nounwind readnone + %cmp = icmp eq i8 %val, 0 + %sel = select i1 %cmp, i8 -1, i8 %ctlz + store i8 %sel, i8 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}v_cttz_i16_sel_eq_neg1: +; SI: {{buffer|flat}}_load_ubyte +; SI: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; SI: buffer_store_short +; EG: MEM_RAT MSKOR +; EG: FFBL_INT + define amdgpu_kernel void @v_cttz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* nocapture readonly %arrayidx) nounwind { + %val = load i16, i16 addrspace(1)* %arrayidx, align 1 + %ctlz = call i16 @llvm.cttz.i16(i16 %val, i1 false) nounwind readnone + %cmp = icmp eq i16 %val, 0 + %sel = select i1 %cmp, i16 -1, i16 %ctlz + store i16 %sel, i16 addrspace(1)* %out + ret void +} + + diff --git a/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll b/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll new file mode 100644 index 0000000000000..e7e7b9f907ca4 --- /dev/null +++ b/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; We are only checking that instruction selection can succeed in this case. This +; cut down test results in no instructions, but that's fine. +; +; See https://llvm.org/PR33743 for details of the bug being addressed +; +; Checking that shufflevector with 3-vec mask is handled in +; combineShuffleToVectorExtend +; +; GCN: s_endpgm + +define amdgpu_ps void @main(i32 %in1) local_unnamed_addr { +.entry: + br i1 undef, label %bb12, label %bb + +bb: + %__llpc_global_proxy_r5.12.vec.insert = insertelement <4 x i32> undef, i32 %in1, i32 3 + %tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> undef, <3 x i32> + %tmp4 = bitcast <3 x i32> %tmp3 to <3 x float> + %a2.i123 = extractelement <3 x float> %tmp4, i32 2 + %tmp5 = bitcast float %a2.i123 to i32 + %__llpc_global_proxy_r2.0.vec.insert196 = insertelement <4 x i32> undef, i32 %tmp5, i32 0 + br label %bb12 + +bb12: + %__llpc_global_proxy_r2.0 = phi <4 x i32> [ %__llpc_global_proxy_r2.0.vec.insert196, %bb ], [ undef, %.entry ] + %tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> undef, <3 x i32> + %tmp7 = bitcast <3 x i32> %tmp6 to <3 x float> + %a0.i = extractelement <3 x float> %tmp7, i32 0 + ret void +} diff --git a/test/CodeGen/AMDGPU/detect-dead-lanes.mir b/test/CodeGen/AMDGPU/detect-dead-lanes.mir index c265b8e2ad2ea..b2f5e816b2637 100644 --- a/test/CodeGen/AMDGPU/detect-dead-lanes.mir +++ b/test/CodeGen/AMDGPU/detect-dead-lanes.mir @@ -6,12 +6,12 @@ # CHECK: S_NOP 0, implicit-def %0 # CHECK: S_NOP 0, implicit-def %1 # CHECK: S_NOP 0, implicit-def dead %2 -# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}} +# CHECK: %3:sreg_128 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}} # CHECK: S_NOP 0, implicit %3.sub0 # CHECK: S_NOP 0, implicit %3.sub1 # CHECK: S_NOP 0, implicit undef %3.sub2 -# CHECK: %4 = COPY %3.sub0_sub1 -# CHECK: %5 = COPY undef %3.sub2_sub3 +# CHECK: %4:sreg_64 = COPY %3.sub0_sub1 +# CHECK: %5:sreg_64 = COPY undef %3.sub2_sub3 # CHECK: S_NOP 0, implicit %4.sub0 # CHECK: S_NOP 0, implicit %4.sub1 # CHECK: S_NOP 0, implicit undef %5.sub0 @@ -42,9 +42,9 @@ body: | # Check defined lanes transfer; Includes checking for some special cases like # undef operands or IMPLICIT_DEF definitions. # CHECK-LABEL: name: test1 -# CHECK: %0 = REG_SEQUENCE %sgpr0, {{[0-9]+}}, %sgpr0, {{[0-9]+}} -# CHECK: %1 = INSERT_SUBREG %0, %sgpr1, {{[0-9]+}} -# CHECK: %2 = INSERT_SUBREG %0.sub2_sub3, %sgpr42, {{[0-9]+}} +# CHECK: %0:sreg_128 = REG_SEQUENCE %sgpr0, {{[0-9]+}}, %sgpr0, {{[0-9]+}} +# CHECK: %1:sreg_128 = INSERT_SUBREG %0, %sgpr1, {{[0-9]+}} +# CHECK: %2:sreg_64 = INSERT_SUBREG %0.sub2_sub3, %sgpr42, {{[0-9]+}} # CHECK: S_NOP 0, implicit %1.sub0 # CHECK: S_NOP 0, implicit undef %1.sub1 # CHECK: S_NOP 0, implicit %1.sub2 @@ -52,25 +52,25 @@ body: | # CHECK: S_NOP 0, implicit %2.sub0 # CHECK: S_NOP 0, implicit undef %2.sub1 -# CHECK: %3 = IMPLICIT_DEF -# CHECK: %4 = INSERT_SUBREG %0, undef %3, {{[0-9]+}} +# CHECK: %3:sreg_32_xm0 = IMPLICIT_DEF +# CHECK: %4:sreg_128 = INSERT_SUBREG %0, undef %3, {{[0-9]+}} # CHECK: S_NOP 0, implicit undef %4.sub0 # CHECK: S_NOP 0, implicit undef %4.sub1 # CHECK: S_NOP 0, implicit %4.sub2 # CHECK: S_NOP 0, implicit undef %4.sub3 -# CHECK: %5 = EXTRACT_SUBREG %0, {{[0-9]+}} -# CHECK: %6 = EXTRACT_SUBREG %5, {{[0-9]+}} -# CHECK: %7 = EXTRACT_SUBREG %5, {{[0-9]+}} +# CHECK: %5:sreg_64 = EXTRACT_SUBREG %0, {{[0-9]+}} +# CHECK: %6:sreg_32_xm0 = EXTRACT_SUBREG %5, {{[0-9]+}} +# CHECK: %7:sreg_32_xm0 = EXTRACT_SUBREG %5, {{[0-9]+}} # CHECK: S_NOP 0, implicit %5 # CHECK: S_NOP 0, implicit %6 # CHECK: S_NOP 0, implicit undef %7 -# CHECK: %8 = IMPLICIT_DEF -# CHECK: %9 = EXTRACT_SUBREG undef %8, {{[0-9]+}} +# CHECK: %8:sreg_64 = IMPLICIT_DEF +# CHECK: %9:sreg_32_xm0 = EXTRACT_SUBREG undef %8, {{[0-9]+}} # CHECK: S_NOP 0, implicit undef %9 -# CHECK: %10 = EXTRACT_SUBREG undef %0, {{[0-9]+}} +# CHECK: %10:sreg_128 = EXTRACT_SUBREG undef %0, {{[0-9]+}} # CHECK: S_NOP 0, implicit undef %10 name: test1 registers: @@ -125,29 +125,29 @@ body: | # CHECK: S_NOP 0, implicit-def dead %0 # CHECK: S_NOP 0, implicit-def %1 # CHECK: S_NOP 0, implicit-def %2 -# CHECK: %3 = REG_SEQUENCE undef %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}} +# CHECK: %3:sreg_128 = REG_SEQUENCE undef %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}} # CHECK: S_NOP 0, implicit %3.sub1 # CHECK: S_NOP 0, implicit %3.sub3 # CHECK: S_NOP 0, implicit-def %4 # CHECK: S_NOP 0, implicit-def dead %5 -# CHECK: %6 = REG_SEQUENCE %4, {{[0-9]+}}, undef %5, {{[0-9]+}} +# CHECK: %6:sreg_64 = REG_SEQUENCE %4, {{[0-9]+}}, undef %5, {{[0-9]+}} # CHECK: S_NOP 0, implicit %6 # CHECK: S_NOP 0, implicit-def dead %7 # CHECK: S_NOP 0, implicit-def %8 -# CHECK: %9 = INSERT_SUBREG undef %7, %8, {{[0-9]+}} +# CHECK: %9:sreg_128 = INSERT_SUBREG undef %7, %8, {{[0-9]+}} # CHECK: S_NOP 0, implicit %9.sub2 # CHECK: S_NOP 0, implicit-def %10 # CHECK: S_NOP 0, implicit-def dead %11 -# CHECK: %12 = INSERT_SUBREG %10, undef %11, {{[0-9]+}} +# CHECK: %12:sreg_128 = INSERT_SUBREG %10, undef %11, {{[0-9]+}} # CHECK: S_NOP 0, implicit %12.sub3 # CHECK: S_NOP 0, implicit-def %13 # CHECK: S_NOP 0, implicit-def dead %14 -# CHECK: %15 = REG_SEQUENCE %13, {{[0-9]+}}, undef %14, {{[0-9]+}} -# CHECK: %16 = EXTRACT_SUBREG %15, {{[0-9]+}} +# CHECK: %15:sreg_128 = REG_SEQUENCE %13, {{[0-9]+}}, undef %14, {{[0-9]+}} +# CHECK: %16:sreg_64 = EXTRACT_SUBREG %15, {{[0-9]+}} # CHECK: S_NOP 0, implicit %16.sub1 name: test2 @@ -205,7 +205,7 @@ body: | # CHECK-LABEL: name: test3 # CHECK: S_NOP 0, implicit-def %0 # CHECK: %vcc = COPY %0 -# CHECK: %1 = COPY %vcc +# CHECK: %1:sreg_64 = COPY %vcc # CHECK: S_NOP 0, implicit %1 name: test3 tracksRegLiveness: true @@ -225,7 +225,7 @@ body: | # CHECK-LABEL: name: test4 # CHECK: S_NOP 0, implicit-def dead %0 # CHECK: KILL undef %0 -# CHECK: %1 = IMPLICIT_DEF +# CHECK: %1:sreg_64 = IMPLICIT_DEF # CHECK: S_NOP 0, implicit undef %1 name: test4 tracksRegLiveness: true @@ -245,7 +245,7 @@ body: | # used. # CHECK-LABEL: name: test5 # CHECK: S_NOP 0, implicit-def %0 -# CHECK: %1 = REG_SEQUENCE undef %0, {{[0-9]+}}, %0, {{[0-9]+}} +# CHECK: %1:sreg_64 = REG_SEQUENCE undef %0, {{[0-9]+}}, %0, {{[0-9]+}} # CHECK: S_NOP 0, implicit %1.sub1 name: test5 tracksRegLiveness: true @@ -265,10 +265,10 @@ body: | # CHECK: S_NOP 0, implicit-def %0 # CHECK: S_NOP 0, implicit-def dead %1 # CHECK: S_NOP 0, implicit-def dead %2 -# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, undef %1, {{[0-9]+}}, undef %2, {{[0-9]+}} +# CHECK: %3:sreg_128 = REG_SEQUENCE %0, {{[0-9]+}}, undef %1, {{[0-9]+}}, undef %2, {{[0-9]+}} # CHECK: bb.1: -# CHECK: %4 = PHI %3, %bb.0, %5, %bb.1 +# CHECK: %4:sreg_128 = PHI %3, %bb.0, %5, %bb.1 # CHECK: bb.2: # CHECK: S_NOP 0, implicit %4.sub0 @@ -315,12 +315,12 @@ body: | # CHECK: S_NOP 0, implicit-def %1 # CHECK: S_NOP 0, implicit-def dead %2 # CHECK: S_NOP 0, implicit-def %3 -# CHECK: %4 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}}, %3, {{[0-9]+}} +# CHECK: %4:sreg_128 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}}, %3, {{[0-9]+}} # CHECK: bb.1: -# CHECK: %5 = PHI %4, %bb.0, %6, %bb.1 +# CHECK: %5:sreg_128 = PHI %4, %bb.0, %6, %bb.1 -# CHECK: %6 = REG_SEQUENCE %5.sub1, {{[0-9]+}}, %5.sub3, {{[0-9]+}}, undef %5.sub2, {{[0-9]+}}, %5.sub0, {{[0-9]+}} +# CHECK: %6:sreg_128 = REG_SEQUENCE %5.sub1, {{[0-9]+}}, %5.sub3, {{[0-9]+}}, undef %5.sub2, {{[0-9]+}}, %5.sub0, {{[0-9]+}} # CHECK: bb.2: # CHECK: S_NOP 0, implicit %6.sub3 @@ -361,12 +361,12 @@ body: | # CHECK-LABEL: name: loop2 # CHECK: bb.0: # CHECK: S_NOP 0, implicit-def %0 -# CHECK: %1 = REG_SEQUENCE %0, {{[0-9]+}} +# CHECK: %1:sreg_128 = REG_SEQUENCE %0, {{[0-9]+}} # CHECK: bb.1: -# CHECK: %2 = PHI %1, %bb.0, %3, %bb.1 +# CHECK: %2:sreg_128 = PHI %1, %bb.0, %3, %bb.1 -# CHECK: %3 = REG_SEQUENCE %2.sub3, {{[0-9]+}}, undef %2.sub1, {{[0-9]+}}, %2.sub0, {{[0-9]+}}, %2.sub2, {{[0-9]+}} +# CHECK: %3:sreg_128 = REG_SEQUENCE %2.sub3, {{[0-9]+}}, undef %2.sub1, {{[0-9]+}}, %2.sub0, {{[0-9]+}}, %2.sub2, {{[0-9]+}} # CHECK: bb.2: # CHECK: S_NOP 0, implicit %2.sub0 diff --git a/test/CodeGen/AMDGPU/early-inline.ll b/test/CodeGen/AMDGPU/early-inline.ll index c871d54bec7ed..a4f970ee238a8 100644 --- a/test/CodeGen/AMDGPU/early-inline.ll +++ b/test/CodeGen/AMDGPU/early-inline.ll @@ -1,6 +1,5 @@ ; RUN: opt -mtriple=amdgcn-- -O1 -S -inline-threshold=1 -amdgpu-early-inline-all %s | FileCheck %s -; CHECK: @c_alias @c_alias = alias i32 (i32), i32 (i32)* @callee define i32 @callee(i32 %x) { @@ -17,6 +16,7 @@ entry: ; CHECK: mul i32 ; CHECK-NOT: call i32 +; CHECK: define i32 @c_alias define amdgpu_kernel void @caller(i32 %x) { entry: %res = call i32 @callee(i32 %x) diff --git a/test/CodeGen/AMDGPU/elf-header.ll b/test/CodeGen/AMDGPU/elf-header.ll new file mode 100644 index 0000000000000..192eb780f70e8 --- /dev/null +++ b/test/CodeGen/AMDGPU/elf-header.ll @@ -0,0 +1,49 @@ +; RUN: llc -march=r600 -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600-- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600-amd- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600-amd-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s +; RUN: llc -mtriple=r600-unknown-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=R600 --check-prefix=R600-OSABI-NONE %s + +; RUN: llc -march=amdgcn -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn-- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn-amd- -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn-amd-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s +; RUN: llc -mtriple=amdgcn-unknown-unknown -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-NONE %s + +; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s +; RUN: llc -mtriple=amdgcn-unknown-amdhsa -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-HSA %s + +; RUN: llc -mtriple=amdgcn--amdpal -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s +; RUN: llc -mtriple=amdgcn-unknown-amdpal -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-PAL %s + +; RUN: llc -mtriple=amdgcn--mesa3d -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s +; RUN: llc -mtriple=amdgcn-amd-mesa3d -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s +; RUN: llc -mtriple=amdgcn-unknown-mesa3d -filetype=obj < %s | llvm-readobj -file-headers - | FileCheck --check-prefix=GCN --check-prefix=GCN-OSABI-MESA3D %s + +; R600: Format: ELF32-amdgpu +; R600: Arch: r600 +; R600: AddressSize: 32bit +; GCN: Format: ELF64-amdgpu +; GCN: Arch: amdgcn +; GCN: AddressSize: 64bit + +; R600-OSABI-NONE: OS/ABI: SystemV (0x0) +; GCN-OSABI-NONE: OS/ABI: SystemV (0x0) +; GCN-OSABI-HSA: OS/ABI: AMDGPU_HSA (0x40) +; GCN-OSABI-PAL: OS/ABI: AMDGPU_PAL (0x41) +; GCN-OSABI-MESA3D: OS/ABI: AMDGPU_MESA3D (0x42) + +; R600: Machine: EM_AMDGPU (0xE0) +; R600: Flags [ (0x1) +; R600: EF_AMDGPU_ARCH_R600 (0x1) +; R600: ] +; GCN: Machine: EM_AMDGPU (0xE0) +; GCN: Flags [ (0x2) +; GCN: EF_AMDGPU_ARCH_GCN (0x2) +; GCN: ] + +define amdgpu_kernel void @elf_header() { + ret void +} diff --git a/test/CodeGen/AMDGPU/elf-notes.ll b/test/CodeGen/AMDGPU/elf-notes.ll new file mode 100644 index 0000000000000..cd3c91719592a --- /dev/null +++ b/test/CodeGen/AMDGPU/elf-notes.ll @@ -0,0 +1,85 @@ +; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=gfx800 -mattr=+code-object-v3 < %s | FileCheck --check-prefix=GCN --check-prefix=OSABI-UNK --check-prefix=GFX800 %s +; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=iceland -mattr=+code-object-v3 < %s | FileCheck --check-prefix=GCN --check-prefix=OSABI-UNK --check-prefix=GFX800 %s +; RUN: llc -mtriple=amdgcn-amd-unknown -mcpu=gfx800 -mattr=+code-object-v3 -filetype=obj < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=GCN --check-prefix=OSABI-UNK-ELF --check-prefix=GFX800 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -mattr=+code-object-v3 < %s | FileCheck --check-prefix=GCN --check-prefix=OSABI-HSA --check-prefix=GFX800 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -mattr=+code-object-v3 < %s | FileCheck --check-prefix=GCN --check-prefix=OSABI-HSA --check-prefix=GFX800 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -mattr=+code-object-v3 -filetype=obj < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=GCN --check-prefix=OSABI-HSA-ELF --check-prefix=GFX800 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx800 -mattr=+code-object-v3 < %s | FileCheck --check-prefix=GCN --check-prefix=OSABI-PAL --check-prefix=GFX800 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=iceland -mattr=+code-object-v3 < %s | FileCheck --check-prefix=GCN --check-prefix=OSABI-PAL --check-prefix=GFX800 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx800 -mattr=+code-object-v3 -filetype=obj < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=GCN --check-prefix=OSABI-PAL-ELF --check-prefix=GFX800 %s +; RUN: llc -march=r600 -mattr=+code-object-v3 < %s | FileCheck --check-prefix=R600 %s + +; OSABI-UNK-NOT: .hsa_code_object_version +; OSABI-UNK-NOT: .hsa_code_object_isa +; OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown--gfx800" +; OSABI-UNK-NOT: .amd_amdgpu_hsa_metadata +; OSABI-UNK-NOT: .amd_amdgpu_pal_metadata + +; OSABI-UNK-ELF-NOT: Unknown note type +; OSABI-UNK-ELF: NT_AMD_AMDGPU_ISA (ISA Version) +; OSABI-UNK-ELF: ISA Version: +; OSABI-UNK-ELF: amdgcn-amd-unknown--gfx800 +; OSABI-UNK-ELF-NOT: Unknown note type +; OSABI-UNK-ELF-NOT: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) +; OSABI-UNK-ELF-NOT: Unknown note type +; OSABI-UNK-ELF-NOT: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata) +; OSABI-UNK-ELF-NOT: Unknown note type + +; OSABI-HSA-NOT: .hsa_code_object_version +; OSABI-HSA-NOT: .hsa_code_object_isa +; OSABI-HSA: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx800" +; OSABI-HSA: .amd_amdgpu_hsa_metadata +; OSABI-HSA-NOT: .amd_amdgpu_pal_metadata + +; OSABI-HSA-ELF-NOT: Unknown note type +; OSABI-HSA-ELF: NT_AMD_AMDGPU_ISA (ISA Version) +; OSABI-HSA-ELF: ISA Version: +; OSABI-HSA-ELF: amdgcn-amd-amdhsa--gfx800 +; OSABI-HSA-ELF-NOT: Unknown note type +; OSABI-HSA-ELF: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) +; OSABI-HSA-ELF: HSA Metadata: +; OSABI-HSA-ELF: --- +; OSABI-HSA-ELF: Version: [ 1, 0 ] +; OSABI-HSA-ELF: Kernels: +; OSABI-HSA-ELF: - Name: elf_notes +; OSABI-HSA-ELF: SymbolName: 'elf_notes@kd' +; OSABI-HSA-ELF: CodeProps: +; OSABI-HSA-ELF: KernargSegmentSize: 0 +; OSABI-HSA-ELF: GroupSegmentFixedSize: 0 +; OSABI-HSA-ELF: PrivateSegmentFixedSize: 0 +; OSABI-HSA-ELF: KernargSegmentAlign: 4 +; OSABI-HSA-ELF: WavefrontSize: 64 +; OSABI-HSA-ELF: NumSGPRs: 96 +; OSABI-HSA-ELF: ... +; OSABI-HSA-ELF-NOT: Unknown note type +; OSABI-HSA-ELF-NOT: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata) +; OSABI-HSA-ELF-NOT: Unknown note type + +; OSABI-PAL-NOT: .hsa_code_object_version +; OSABI-PAL-NOT: .hsa_code_object_isa +; OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx800" +; OSABI-PAL-NOT: .amd_amdgpu_hsa_metadata +; OSABI-PAL: .amd_amdgpu_pal_metadata + +; OSABI-PAL-ELF-NOT: Unknown note type +; OSABI-PAL-ELF: NT_AMD_AMDGPU_ISA (ISA Version) +; OSABI-PAL-ELF: ISA Version: +; OSABI-PAL-ELF: amdgcn-amd-amdpal--gfx800 +; OSABI-PAL-ELF-NOT: Unknown note type +; OSABI-PAL-ELF-NOT: NT_AMD_AMDGPU_HSA_METADATA (HSA Metadata) +; OSABI-PAL-ELF-NOT: Unknown note type +; OSABI-PAL-ELF: NT_AMD_AMDGPU_PAL_METADATA (PAL Metadata) +; OSABI-PAL-ELF: PAL Metadata: +; TODO: Following check line fails on mips: +; OSABI-PAL-ELF-XXX: 0x2e12,0xac02c0,0x2e13,0x80,0x1000001b,0x1,0x10000022,0x60,0x1000003e,0x0 +; OSABI-PAL-ELF-NOT: Unknown note type + +; R600-NOT: .hsa_code_object_version +; R600-NOT: .hsa_code_object_isa +; R600-NOT: .amd_amdgpu_isa +; R600-NOT: .amd_amdgpu_hsa_metadata +; R600-NOT: .amd_amdgpu_pal_metadatas + +define amdgpu_kernel void @elf_notes() { + ret void +} diff --git a/test/CodeGen/AMDGPU/elf.ll b/test/CodeGen/AMDGPU/elf.ll index b22f8608d7e33..de8c010f204aa 100644 --- a/test/CodeGen/AMDGPU/elf.ll +++ b/test/CodeGen/AMDGPU/elf.ll @@ -8,8 +8,8 @@ ; Test that we don't try to produce a COFF file on windows ; RUN: llc < %s -mtriple=amdgcn-pc-mingw -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols -file-headers - | FileCheck --check-prefix=ELF %s -; ELF: Format: ELF64 -; ELF: OS/ABI: AMDGPU_HSA (0x40) +; ELF: Format: ELF64-amdgpu +; ELF: OS/ABI: SystemV (0x0) ; ELF: Machine: EM_AMDGPU (0xE0) ; ELF: Name: .AMDGPU.config ; ELF: Type: SHT_PROGBITS diff --git a/test/CodeGen/AMDGPU/elf.r600.ll b/test/CodeGen/AMDGPU/elf.r600.ll index 93c5e55750336..1ca1524cbaa1c 100644 --- a/test/CodeGen/AMDGPU/elf.r600.ll +++ b/test/CodeGen/AMDGPU/elf.r600.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=redwood -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF %s ; RUN: llc < %s -march=r600 -mcpu=redwood -o - | FileCheck --check-prefix=CONFIG %s -; ELF: Format: ELF32 +; ELF: Format: ELF32-amdgpu ; ELF: Name: .AMDGPU.config ; CONFIG: .section .AMDGPU.config diff --git a/test/CodeGen/AMDGPU/endpgm-dce.mir b/test/CodeGen/AMDGPU/endpgm-dce.mir index 59802ca97924e..9833cc10d40af 100644 --- a/test/CodeGen/AMDGPU/endpgm-dce.mir +++ b/test/CodeGen/AMDGPU/endpgm-dce.mir @@ -25,7 +25,7 @@ body: | --- # GCN-LABEL: name: load_without_memoperand # GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc -# GCN-NEXT: dead %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr +# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr # GCN-NEXT: S_ENDPGM name: load_without_memoperand tracksRegLiveness: true @@ -49,7 +49,7 @@ body: | --- # GCN-LABEL: name: load_volatile # GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc -# GCN-NEXT: dead %1 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile load 4) +# GCN-NEXT: dead %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile load 4) # GCN-NEXT: S_ENDPGM name: load_volatile tracksRegLiveness: true @@ -120,7 +120,7 @@ body: | --- # GCN-LABEL: name: exp # GCN: %sgpr0_sgpr1 = S_OR_B64 %exec, killed %vcc, implicit-def %scc -# GCN-NEXT: EXP 32, undef %0, undef %1, %2, undef %3, 0, 0, 15, implicit %exec +# GCN-NEXT: EXP 32, undef %0:vgpr_32, undef %1:vgpr_32, %2, undef %3:vgpr_32, 0, 0, 15, implicit %exec # GCN-NEXT: S_ENDPGM name: exp tracksRegLiveness: true diff --git a/test/CodeGen/AMDGPU/enqueue-kernel.ll b/test/CodeGen/AMDGPU/enqueue-kernel.ll new file mode 100644 index 0000000000000..b1b83c2b4a101 --- /dev/null +++ b/test/CodeGen/AMDGPU/enqueue-kernel.ll @@ -0,0 +1,92 @@ +; RUN: opt -amdgpu-lower-enqueued-block -S < %s | FileCheck %s + +; CHECK: @__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* +; CHECK: @__test_block_invoke_2_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* + +target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" +target triple = "amdgcn-amdhsa-amd-opencl" + +%struct.ndrange_t = type { i32 } +%opencl.queue_t = type opaque + +define amdgpu_kernel void @test(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr + !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { +entry: + %block = alloca <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, align 8 + %tmp = alloca %struct.ndrange_t, align 4 + %block2 = alloca <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, align 8 + %tmp3 = alloca %struct.ndrange_t, align 4 + %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block, i32 0, i32 0 + store i32 25, i32* %block.size, align 8 + %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block, i32 0, i32 1 + store i32 8, i32* %block.align, align 4 + %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block, i32 0, i32 2 + store i8 addrspace(4)* addrspacecast (i8* bitcast (void (<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 8 + %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block, i32 0, i32 3 + store i8 addrspace(1)* %a, i8 addrspace(1)** %block.captured, align 8 + %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block, i32 0, i32 4 + store i8 %b, i8* %block.captured1, align 8 + %tmp1 = bitcast <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>* %block to void ()* + %tmp2 = bitcast void ()* %tmp1 to i8* + %tmp4 = addrspacecast i8* %tmp2 to i8 addrspace(4)* + %tmp5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t* byval nonnull %tmp, i8 addrspace(4)* nonnull %tmp4) #2 + %block.size4 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 0 + store i32 41, i32* %block.size4, align 8 + %block.align5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 1 + store i32 8, i32* %block.align5, align 4 + %block.invoke6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 2 + store i8 addrspace(4)* addrspacecast (i8* bitcast (void (<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>)* @__test_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke6, align 8 + %block.captured7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 3 + store i8 addrspace(1)* %a, i8 addrspace(1)** %block.captured7, align 8 + %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 6 + store i8 %b, i8* %block.captured8, align 8 + %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 4 + store i64 addrspace(1)* %c, i64 addrspace(1)** %block.captured9, align 8 + %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2, i32 0, i32 5 + store i64 %d, i64* %block.captured10, align 8 + %tmp6 = bitcast <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>* %block2 to void ()* + %tmp7 = bitcast void ()* %tmp6 to i8* + %tmp8 = addrspacecast i8* %tmp7 to i8 addrspace(4)* + %tmp9 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t* byval nonnull %tmp3, i8 addrspace(4)* nonnull %tmp8) #2 + ret void +} + +; CHECK: define amdgpu_kernel void @__test_block_invoke_kernel({{.*}}) #[[AT1:[0-9]+]] +define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg) #0 + !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { +entry: + %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg, 3 + %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg, 4 + store i8 %.fca.4.extract, i8 addrspace(1)* %.fca.3.extract, align 1 + ret void +} + +declare i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)*, i32, %struct.ndrange_t*, i8 addrspace(4)*) local_unnamed_addr + +; CHECK: define amdgpu_kernel void @__test_block_invoke_2_kernel({{.*}}) #[[AT2:[0-9]+]] +define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, + i64 addrspace(1)*, i64, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 + !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 { +entry: + %.fca.3.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 3 + %.fca.4.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 4 + %.fca.5.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 5 + %.fca.6.extract = extractvalue <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> %arg, 6 + store i8 %.fca.6.extract, i8 addrspace(1)* %.fca.3.extract, align 1 + store i64 %.fca.5.extract, i64 addrspace(1)* %.fca.4.extract, align 8 + ret void +} + +; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel_runtime_handle" +; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel_runtime_handle" + +attributes #0 = { "enqueued-block" } + +!3 = !{i32 1, i32 0, i32 1, i32 0} +!4 = !{!"none", !"none", !"none", !"none"} +!5 = !{!"char*", !"char", !"long*", !"long"} +!6 = !{!"", !"", !"", !""} +!14 = !{i32 0} +!15 = !{!"none"} +!16 = !{!"__block_literal"} +!17 = !{!""} diff --git a/test/CodeGen/AMDGPU/fabs.ll b/test/CodeGen/AMDGPU/fabs.ll index 600c6cd8230eb..550ad7956c929 100644 --- a/test/CodeGen/AMDGPU/fabs.ll +++ b/test/CodeGen/AMDGPU/fabs.ll @@ -83,7 +83,7 @@ define amdgpu_kernel void @fabs_fn_fold(float addrspace(1)* %out, float %in0, fl ret void } -; GCN-LABEL: {{^}}fabs_fold: +; FUNC-LABEL: {{^}}fabs_fold: ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c ; GCN-NOT: and @@ -95,6 +95,18 @@ define amdgpu_kernel void @fabs_fold(float addrspace(1)* %out, float %in0, float ret void } +; Make sure we turn some integer operations back into fabs +; FUNC-LABEL: {{^}}bitpreserve_fabs_f32: +; GCN: v_add_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, 1.0 +define amdgpu_kernel void @bitpreserve_fabs_f32(float addrspace(1)* %out, float %in) { + %in.bc = bitcast float %in to i32 + %int.abs = and i32 %in.bc, 2147483647 + %bc = bitcast i32 %int.abs to float + %fadd = fadd float %bc, 1.0 + store float %fadd, float addrspace(1)* %out + ret void +} + declare float @fabs(float) readnone declare float @llvm.fabs.f32(float) readnone declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone diff --git a/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll index f35fe098569da..f66278845c1f6 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll @@ -519,6 +519,10 @@ define amdgpu_kernel void @test_fold_canonicalize_load_nnan_value_f16(half addrs ret void } +; Avoid failing the test on FreeBSD11.0 which will match the GCN-NOT: 1.0 +; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive +; CHECK: .amd_amdgpu_isa + declare float @llvm.canonicalize.f32(float) #0 declare double @llvm.canonicalize.f64(double) #0 declare half @llvm.canonicalize.f16(half) #0 diff --git a/test/CodeGen/AMDGPU/fneg.ll b/test/CodeGen/AMDGPU/fneg.ll index d1eabfb13c9af..94ec61622bd26 100644 --- a/test/CodeGen/AMDGPU/fneg.ll +++ b/test/CodeGen/AMDGPU/fneg.ll @@ -84,3 +84,15 @@ define amdgpu_kernel void @fneg_fold_f32(float addrspace(1)* %out, float %in) { store float %fmul, float addrspace(1)* %out ret void } + +; Make sure we turn some integer operations back into fabs +; FUNC-LABEL: {{^}}bitpreserve_fneg_f32: +; GCN: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -4.0 +define amdgpu_kernel void @bitpreserve_fneg_f32(float addrspace(1)* %out, float %in) { + %in.bc = bitcast float %in to i32 + %int.abs = xor i32 %in.bc, 2147483648 + %bc = bitcast i32 %int.abs to float + %fadd = fmul float %bc, 4.0 + store float %fadd, float addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/fold-cndmask.mir b/test/CodeGen/AMDGPU/fold-cndmask.mir index 8dfec91663038..1ddb02a59b962 100644 --- a/test/CodeGen/AMDGPU/fold-cndmask.mir +++ b/test/CodeGen/AMDGPU/fold-cndmask.mir @@ -1,11 +1,11 @@ # RUN: llc -march=amdgcn -run-pass si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s -# CHECK: %1 = V_MOV_B32_e32 0, implicit %exec -# CHECK: %2 = V_MOV_B32_e32 0, implicit %exec -# CHECK: %4 = COPY %3 -# CHECK: %5 = V_MOV_B32_e32 0, implicit %exec -# CHECK: %6 = V_MOV_B32_e32 0, implicit %exec -# CHECK: %7 = COPY %3 +# CHECK: %1:vgpr_32 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %2:vgpr_32 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %4:vgpr_32 = COPY %3 +# CHECK: %5:vgpr_32 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %6:vgpr_32 = V_MOV_B32_e32 0, implicit %exec +# CHECK: %7:vgpr_32 = COPY %3 --- name: fold_cndmask diff --git a/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir b/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir index 3155b7a8664fb..9831538aa66a3 100644 --- a/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir +++ b/test/CodeGen/AMDGPU/fold-immediate-output-mods.mir @@ -1,8 +1,8 @@ # RUN: llc -march=amdgcn -run-pass peephole-opt -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s ... # GCN-LABEL: name: no_fold_imm_madak_mac_clamp_f32 -# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec -# GCN-NEXT: %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec +# GCN: %23:vgpr_32 = V_MOV_B32_e32 1090519040, implicit %exec +# GCN-NEXT: %24:vgpr_32 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec name: no_fold_imm_madak_mac_clamp_f32 tracksRegLiveness: true @@ -72,8 +72,8 @@ body: | ... --- # GCN-LABEL: name: no_fold_imm_madak_mac_omod_f32 -# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec -# GCN: %24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit %exec +# GCN: %23:vgpr_32 = V_MOV_B32_e32 1090519040, implicit %exec +# GCN: %24:vgpr_32 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit %exec name: no_fold_imm_madak_mac_omod_f32 tracksRegLiveness: true @@ -143,8 +143,8 @@ body: | ... --- # GCN: name: no_fold_imm_madak_mad_clamp_f32 -# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec -# GCN: %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec +# GCN: %23:vgpr_32 = V_MOV_B32_e32 1090519040, implicit %exec +# GCN: %24:vgpr_32 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit %exec name: no_fold_imm_madak_mad_clamp_f32 tracksRegLiveness: true @@ -214,8 +214,8 @@ body: | ... --- # GCN: name: no_fold_imm_madak_mad_omod_f32 -# GCN: %23 = V_MOV_B32_e32 1090519040, implicit %exec -# GCN: %24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit %exec +# GCN: %23:vgpr_32 = V_MOV_B32_e32 1090519040, implicit %exec +# GCN: %24:vgpr_32 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit %exec name: no_fold_imm_madak_mad_omod_f32 tracksRegLiveness: true diff --git a/test/CodeGen/AMDGPU/fold-operands-order.mir b/test/CodeGen/AMDGPU/fold-operands-order.mir index 51bb357fcf6ee..3f28f39930f57 100644 --- a/test/CodeGen/AMDGPU/fold-operands-order.mir +++ b/test/CodeGen/AMDGPU/fold-operands-order.mir @@ -6,10 +6,10 @@ # aren't made in users before the def is seen. # GCN-LABEL: name: mov_in_use_list_2x{{$}} -# GCN: %2 = V_MOV_B32_e32 0, implicit %exec -# GCN-NEXT: %3 = COPY undef %0 +# GCN: %2:vgpr_32 = V_MOV_B32_e32 0, implicit %exec +# GCN-NEXT: %3:vgpr_32 = COPY undef %0 -# GCN: %1 = V_MOV_B32_e32 0, implicit %exec +# GCN: %1:vgpr_32 = V_MOV_B32_e32 0, implicit %exec name: mov_in_use_list_2x diff --git a/test/CodeGen/AMDGPU/fpext-free.ll b/test/CodeGen/AMDGPU/fpext-free.ll new file mode 100644 index 0000000000000..0a504b3e03e4e --- /dev/null +++ b/test/CodeGen/AMDGPU/fpext-free.ll @@ -0,0 +1,384 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32FLUSH %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9,GFX9-F32DENORM %s +; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32FLUSH %s +; RUN: llc -march=amdgcn -mcpu=gfx803 -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI,VI-F32DENORM %s + +; fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + +; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}} +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 +; GFX9-F32DENORM-NEXT: v_add_f32 +define float @fadd_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { +entry: + %mul = fmul half %x, %y + %mul.ext = fpext half %mul to float + %add = fadd float %mul.ext, %z + ret float %add +} + +; f16->f64 is not free. +; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f64: +; GFX89: v_mul_f16 +; GFX89: v_cvt_f32_f16 +; GFX89: v_cvt_f64_f32 +; GFX89: v_add_f64 +define double @fadd_fpext_fmul_f16_to_f64(half %x, half %y, double %z) #0 { +entry: + %mul = fmul half %x, %y + %mul.ext = fpext half %mul to double + %add = fadd double %mul.ext, %z + ret double %add +} + +; f32->f64 is not free. +; GCN-LABEL: {{^}}fadd_fpext_fmul_f32_to_f64: +; GCN: v_mul_f32 +; GCN: v_cvt_f64_f32 +; GCN: v_add_f64 +define double @fadd_fpext_fmul_f32_to_f64(float %x, float %y, double %z) #0 { +entry: + %mul = fmul float %x, %y + %mul.ext = fpext float %mul to double + %add = fadd double %mul.ext, %z + ret double %add +} + +; fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) +; GCN-LABEL: {{^}}fadd_fpext_fmul_f16_to_f32_commute: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]{{$}} +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 +; GFX9-F32DENORM-NEXT: v_add_f32 +; GFX9-F32DENORM-NEXT: s_setpc_b64 +define float @fadd_fpext_fmul_f16_to_f32_commute(half %x, half %y, float %z) #0 { +entry: + %mul = fmul half %x, %y + %mul.ext = fpext half %mul to float + %add = fadd float %z, %mul.ext + ret float %add +} + +; fold (fadd (fma x, y, (fpext (fmul u, v))), z) +; -> (fma x, y, (fma (fpext u), (fpext v), z)) + +; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] +; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 +; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 +; GFX9-F32DENORM-NEXT: v_fma_f32 +; GFX9-F32DENORM-NEXT: v_add_f32 +; GFX9-F32DENORM-NEXT: s_setpc_b64 +define float @fadd_muladd_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { +entry: + %mul = fmul half %u, %v + %mul.ext = fpext half %mul to float + %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) + %add = fadd float %fma, %z + ret float %add +} + +; fold (fadd x, (fma y, z, (fpext (fmul u, v))) +; -> (fma y, z, (fma (fpext u), (fpext v), x)) +; GCN-LABEL: {{^}}fadd_muladd_fpext_fmul_f16_to_f32_commute: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] +; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 +; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16 +; GFX9-F32DENORM-NEXT: v_fma_f32 +; GFX9-F32DENORM-NEXT: v_add_f32 +; GFX9-F32DENORM-NEXT: s_setpc_b64 +define float @fadd_muladd_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 { +entry: + %mul = fmul half %u, %v + %mul.ext = fpext half %mul to float + %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) + %add = fadd float %z, %fma + ret float %add +} + +; GCN-LABEL: {{^}}fadd_fmad_fpext_fmul_f16_to_f32: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v2, v3, v4 op_sel_hi:[1,1,0] +; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 +; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v2, v2, v3 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v2, v2 +; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 +define float @fadd_fmad_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { +entry: + %mul = fmul half %u, %v + %mul.ext = fpext half %mul to float + %mul1 = fmul contract float %x, %y + %fmad = fadd contract float %mul1, %mul.ext + %add = fadd float %fmad, %z + ret float %add +} + +; fold (fadd (fma x, y, (fpext (fmul u, v))), z) +; -> (fma x, y, (fma (fpext u), (fpext v), z)) + +; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32: +; GCN: s_waitcnt +; GFX89: v_mul_f16 +; GFX89: v_cvt_f32_f16 +; GFX89: v_fma_f32 +; GFX89: v_add_f32 +define float @fadd_fma_fpext_fmul_f16_to_f32(float %x, float %y, half %u, half %v, float %z) #0 { +entry: + %mul = fmul contract half %u, %v + %mul.ext = fpext half %mul to float + %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext) + %add = fadd float %fma, %z + ret float %add +} + +; GCN-LABEL: {{^}}fadd_fma_fpext_fmul_f16_to_f32_commute: +; GCN: s_waitcnt +; GFX89: v_mul_f16 +; GFX89: v_cvt_f32_f16 +; GFX89: v_fma_f32 +; GFX89: v_add_f32 +define float @fadd_fma_fpext_fmul_f16_to_f32_commute(float %x, float %y, half %u, half %v, float %z) #0 { +entry: + %mul = fmul contract half %u, %v + %mul.ext = fpext half %mul to float + %fma = call float @llvm.fma.f32(float %x, float %y, float %mul.ext) + %add = fadd float %z, %fma + ret float %add +} + +; fold (fadd x, (fpext (fma y, z, (fmul u, v))) +; -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) + +; GCN-LABEL: {{^}}fadd_fpext_fmuladd_f16_to_f32: +; GFX9: v_mul_f16 +; GFX9: v_fma_legacy_f16 +; GFX9: v_cvt_f32_f16 +; GFX9: v_add_f32_e32 +define float @fadd_fpext_fmuladd_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 { +entry: + %mul = fmul contract half %u, %v + %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul) + %ext.fma = fpext half %fma to float + %add = fadd float %x, %ext.fma + ret float %add +} + +; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32: +; GFX9: v_mul_f16 +; GFX9: v_fma_legacy_f16 +; GFX9: v_cvt_f32_f16 +; GFX9: v_add_f32_e32 +define float @fadd_fpext_fma_f16_to_f32(float %x, half %y, half %z, half %u, half %v) #0 { +entry: + %mul = fmul contract half %u, %v + %fma = call half @llvm.fma.f16(half %y, half %z, half %mul) + %ext.fma = fpext half %fma to float + %add = fadd float %x, %ext.fma + ret float %add +} + +; GCN-LABEL: {{^}}fadd_fpext_fma_f16_to_f32_commute: +; GFX9: v_mul_f16 +; GFX9: v_fma_legacy_f16 +; GFX9: v_cvt_f32_f16 +; GFX9: v_add_f32_e32 +define float @fadd_fpext_fma_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 { +entry: + %mul = fmul contract half %u, %v + %fma = call half @llvm.fma.f16(half %y, half %z, half %mul) + %ext.fma = fpext half %fma to float + %add = fadd float %ext.fma, %x + ret float %add +} + +; fold (fsub (fpext (fmul x, y)), z) +; -> (fma (fpext x), (fpext y), (fneg z)) + +; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]{{$}} +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX9-F32DENORM-NEXT: s_setpc_b64 +define float @fsub_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { +entry: + %mul = fmul half %x, %y + %mul.ext = fpext half %mul to float + %add = fsub float %mul.ext, %z + ret float %add +} + +; fold (fsub x, (fpext (fmul y, z))) +; -> (fma (fneg (fpext y)), (fpext z), x) + +; GCN-LABEL: {{^}}fsub_fpext_fmul_f16_to_f32_commute: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v1, v2, v0 op_sel_hi:[1,1,0] +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16_e32 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 +; GFX9-F32DENORM-NEXT: v_sub_f32_e32 +; GFX9-F32DENORM-NEXT: s_setpc_b64 +define float @fsub_fpext_fmul_f16_to_f32_commute(float %x, half %y, half %z) #0 { +entry: + %mul = fmul contract half %y, %z + %mul.ext = fpext half %mul to float + %add = fsub contract float %x, %mul.ext + ret float %add +} + +; fold (fsub (fpext (fneg (fmul, x, y))), z) +; -> (fneg (fma (fpext x), (fpext y), z)) + +; GCN-LABEL: {{^}}fsub_fpext_fneg_fmul_f16_to_f32: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}} +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX9-F32DENORM-NEXT: s_setpc_b64 +define float @fsub_fpext_fneg_fmul_f16_to_f32(half %x, half %y, float %z) #0 { +entry: + %mul = fmul half %x, %y + %neg.mul = fsub half -0.0, %mul + %neg.mul.ext = fpext half %neg.mul to float + %add = fsub float %neg.mul.ext, %z + ret float %add +} + +; fold (fsub (fneg (fpext (fmul, x, y))), z) +; -> (fneg (fma (fpext x)), (fpext y), z) + +; GCN-LABEL: {{^}}fsub_fneg_fpext_fmul_f16_to_f32: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, v0, -v1, -v2 op_sel_hi:[1,1,0]{{$}} +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16_e64 v0, v0, -v1 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX9-F32DENORM-NEXT: s_setpc_b64 +define float @fsub_fneg_fpext_fmul_f16_to_f32(half %x, half %y, float %z) #0 { +entry: + %mul = fmul half %x, %y + %mul.ext = fpext half %mul to float + %neg.mul.ext = fsub float -0.0, %mul.ext + %add = fsub float %neg.mul.ext, %z + ret float %add +} + +; fold (fsub (fmad x, y, (fpext (fmul u, v))), z) +; -> (fmad x, y (fmad (fpext u), (fpext v), (fneg z))) +; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v2, v3, v4, -v2 op_sel_hi:[1,1,0]{{$}} +; GFX9-F32FLUSH-NEXT: v_mac_f32_e32 v2, v0, v1 +; GFX9-F32FLUSH-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX9-F32DENORM-NEXT: v_fma_f32 v0, v0, v1, v3 +; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v2 +; GFX9-F32DENORM-NEXT: s_setpc_b64 +define float @fsub_muladd_fpext_mul_f16_to_f32(float %x, float %y, float %z, half %u, half %v) #0 { +entry: + %mul = fmul half %u, %v + %mul.ext = fpext half %mul to float + %fma = call float @llvm.fmuladd.f32(float %x, float %y, float %mul.ext) + %add = fsub float %fma, %z + ret float %add +} + +; fold (fsub (fpext (fmad x, y, (fmul u, v))), z) +; -> (fmad (fpext x), (fpext y), +; (fmad (fpext u), (fpext v), (fneg z))) + +; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32: +; GFX9: v_mul_f16 +; GFX9: v_fma_legacy_f16 +; GFX9: v_cvt_f32_f16 +; GFX9: v_sub_f32 +; GCN: s_setpc_b64 +define float @fsub_fpext_muladd_mul_f16_to_f32(half %x, half %y, float %z, half %u, half %v) #0 { +entry: + %mul = fmul half %u, %v + %fma = call half @llvm.fmuladd.f16(half %x, half %y, half %mul) + %fma.ext = fpext half %fma to float + %add = fsub float %fma.ext, %z + ret float %add +} + +; fold (fsub x, (fmad y, z, (fpext (fmul u, v)))) +; -> (fmad (fneg y), z, (fmad (fneg (fpext u)), (fpext v), x)) +; GCN-LABEL: {{^}}fsub_muladd_fpext_mul_f16_to_f32_commute: +; GCN: s_waitcnt +; GFX9-F32FLUSH-NEXT: v_mad_mix_f32 v0, -v3, v4, v0 op_sel_hi:[1,1,0]{{$}} +; GFX9-F32FLUSH-NEXT: v_mad_f32 v0, -v1, v2, v0{{$}} +; GFX9-F32FLUSH-NEXT: s_setpc_b64 + +; GFX9-F32DENORM-NEXT: v_mul_f16_e32 v3, v3, v4 +; GFX9-F32DENORM-NEXT: v_cvt_f32_f16_e32 v3, v3 +; GFX9-F32DENORM-NEXT: v_fma_f32 v1, v1, v2, v3 +; GFX9-F32DENORM-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX9-F32DENORM-NEXT: s_setpc_b64 +define float @fsub_muladd_fpext_mul_f16_to_f32_commute(float %x, float %y, float %z, half %u, half %v) #0 { +entry: + %mul = fmul half %u, %v + %mul.ext = fpext half %mul to float + %fma = call float @llvm.fmuladd.f32(float %y, float %z, float %mul.ext) + %add = fsub float %x, %fma + ret float %add +} + +; fold (fsub x, (fpext (fma y, z, (fmul u, v)))) +; -> (fma (fneg (fpext y)), (fpext z), +; (fma (fneg (fpext u)), (fpext v), x)) +; GCN-LABEL: {{^}}fsub_fpext_muladd_mul_f16_to_f32_commute: +; GCN: s_waitcnt +; GFX9-NEXT: v_mul_f16_e32 v3, v3, v4 +; GFX9-NEXT: v_fma_legacy_f16 v1, v1, v2, v3 +; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX9-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 +define float @fsub_fpext_muladd_mul_f16_to_f32_commute(float %x, half %y, half %z, half %u, half %v) #0 { +entry: + %mul = fmul half %u, %v + %fma = call half @llvm.fmuladd.f16(half %y, half %z, half %mul) + %fma.ext = fpext half %fma to float + %add = fsub float %x, %fma.ext + ret float %add +} + +declare float @llvm.fmuladd.f32(float, float, float) #0 +declare float @llvm.fma.f32(float, float, float) #0 +declare half @llvm.fmuladd.f16(half, half, half) #0 +declare half @llvm.fma.f16(half, half, half) #0 + +attributes #0 = { nounwind readnone speculatable } diff --git a/test/CodeGen/AMDGPU/code-object-metadata-deduce-ro-arg.ll b/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll similarity index 76% rename from test/CodeGen/AMDGPU/code-object-metadata-deduce-ro-arg.ll rename to test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll index a33c3646e253e..c07c5556ce382 100644 --- a/test/CodeGen/AMDGPU/code-object-metadata-deduce-ro-arg.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg.ll @@ -1,24 +1,24 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s ; CHECK: - Name: test_ro_arg -; CHECK: Args: -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: SymbolName: 'test_ro_arg@kd' +; CHECK-NEXT: Args: +; CHECK-NEXT: - TypeName: 'float*' +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: F32 -; CHECK-NEXT: AccQual: ReadOnly ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: AccQual: ReadOnly ; CHECK-NEXT: IsConst: true ; CHECK-NEXT: IsRestrict: true -; CHECK-NEXT: TypeName: 'float*' - -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: 'float*' +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: F32 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: 'float*' +; CHECK-NEXT: AccQual: Default define amdgpu_kernel void @test_ro_arg(float addrspace(1)* noalias readonly %in, float addrspace(1)* %out) !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2 @@ -30,4 +30,3 @@ define amdgpu_kernel void @test_ro_arg(float addrspace(1)* noalias readonly %in, !1 = !{!"none", !"none"} !2 = !{!"float*", !"float*"} !3 = !{!"const restrict", !""} - diff --git a/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll b/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll similarity index 85% rename from test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll rename to test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll index 37fd08242fbaa..4ac9bacebe1c3 100644 --- a/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -1,9 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-comd -amdgpu-verify-comd -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -amdgpu-dump-comd -amdgpu-verify-comd -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-comd -amdgpu-verify-comd -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s %struct.A = type { i8, float } %opencl.image1d_t = type opaque @@ -14,6 +14,8 @@ %struct.B = type { i32 addrspace(1)*} %opencl.clk_event_t = type opaque +@__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* + ; CHECK: --- ; CHECK: Version: [ 1, 0 ] ; CHECK: Printf: @@ -22,15 +24,16 @@ ; CHECK: Kernels: ; CHECK: - Name: test_char +; CHECK-NEXT: SymbolName: 'test_char@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 1 +; CHECK-NEXT: - TypeName: char +; CHECK-NEXT: Size: 1 ; CHECK-NEXT: Align: 1 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: char ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -55,15 +58,16 @@ define amdgpu_kernel void @test_char(i8 %a) } ; CHECK: - Name: test_ushort2 +; CHECK-NEXT: SymbolName: 'test_ushort2@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: ushort2 +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: U16 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: ushort2 ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -88,15 +92,16 @@ define amdgpu_kernel void @test_ushort2(<2 x i16> %a) } ; CHECK: - Name: test_int3 +; CHECK-NEXT: SymbolName: 'test_int3@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 16 +; CHECK-NEXT: - TypeName: int3 +; CHECK-NEXT: Size: 16 ; CHECK-NEXT: Align: 16 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int3 ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -121,15 +126,16 @@ define amdgpu_kernel void @test_int3(<3 x i32> %a) } ; CHECK: - Name: test_ulong4 +; CHECK-NEXT: SymbolName: 'test_ulong4@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 32 +; CHECK-NEXT: - TypeName: ulong4 +; CHECK-NEXT: Size: 32 ; CHECK-NEXT: Align: 32 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: U64 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: ulong4 ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -154,15 +160,16 @@ define amdgpu_kernel void @test_ulong4(<4 x i64> %a) } ; CHECK: - Name: test_half8 +; CHECK-NEXT: SymbolName: 'test_half8@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 16 +; CHECK-NEXT: - TypeName: half8 +; CHECK-NEXT: Size: 16 ; CHECK-NEXT: Align: 16 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: F16 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: half8 ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -187,15 +194,16 @@ define amdgpu_kernel void @test_half8(<8 x half> %a) } ; CHECK: - Name: test_float16 +; CHECK-NEXT: SymbolName: 'test_float16@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 64 +; CHECK-NEXT: - TypeName: float16 +; CHECK-NEXT: Size: 64 ; CHECK-NEXT: Align: 64 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: F32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: float16 ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -220,15 +228,16 @@ define amdgpu_kernel void @test_float16(<16 x float> %a) } ; CHECK: - Name: test_double16 +; CHECK-NEXT: SymbolName: 'test_double16@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 128 +; CHECK-NEXT: - TypeName: double16 +; CHECK-NEXT: Size: 128 ; CHECK-NEXT: Align: 128 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: F64 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: double16 ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -253,16 +262,17 @@ define amdgpu_kernel void @test_double16(<16 x double> %a) } ; CHECK: - Name: test_pointer +; CHECK-NEXT: SymbolName: 'test_pointer@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: I32 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: 'int *' +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -287,16 +297,17 @@ define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) } ; CHECK: - Name: test_image +; CHECK-NEXT: SymbolName: 'test_image@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: image2d_t +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Image ; CHECK-NEXT: ValueType: Struct -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: image2d_t +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -321,15 +332,16 @@ define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) } ; CHECK: - Name: test_sampler +; CHECK-NEXT: SymbolName: 'test_sampler@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: sampler_t +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: Sampler ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: sampler_t ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -354,16 +366,17 @@ define amdgpu_kernel void @test_sampler(i32 %a) } ; CHECK: - Name: test_queue +; CHECK-NEXT: SymbolName: 'test_queue@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: queue_t +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Queue ; CHECK-NEXT: ValueType: Struct -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: queue_t +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -388,16 +401,17 @@ define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) } ; CHECK: - Name: test_struct +; CHECK-NEXT: SymbolName: 'test_struct@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: struct A +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: Struct -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Private -; CHECK-NEXT: TypeName: struct A +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -422,15 +436,16 @@ define amdgpu_kernel void @test_struct(%struct.A* byval %a) } ; CHECK: - Name: test_i128 +; CHECK-NEXT: SymbolName: 'test_i128@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 16 +; CHECK-NEXT: - TypeName: i128 +; CHECK-NEXT: Size: 16 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: Struct ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: i128 ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -455,27 +470,28 @@ define amdgpu_kernel void @test_i128(i128 %a) } ; CHECK: - Name: test_multi_arg +; CHECK-NEXT: SymbolName: 'test_multi_arg@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: short2 +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I16 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: short2 -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: char3 +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: char3 ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -500,31 +516,32 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) } ; CHECK: - Name: test_addr_space +; CHECK-NEXT: SymbolName: 'test_addr_space@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: I32 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: 'int *' -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: I32 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Constant -; CHECK-NEXT: TypeName: 'int *' -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: PointeeAlign: 4 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: TypeName: 'int *' +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -551,34 +568,35 @@ define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, } ; CHECK: - Name: test_type_qual +; CHECK-NEXT: SymbolName: 'test_type_qual@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: I32 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: IsVolatile: true -; CHECK-NEXT: TypeName: 'int *' -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: I32 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: IsConst: true ; CHECK-NEXT: IsRestrict: true -; CHECK-NEXT: TypeName: 'int *' -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: 'int *' +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Pipe ; CHECK-NEXT: ValueType: Struct -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: IsPipe: true -; CHECK-NEXT: TypeName: 'int *' ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -605,30 +623,31 @@ define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, } ; CHECK: - Name: test_access_qual +; CHECK-NEXT: SymbolName: 'test_access_qual@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: image1d_t +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Image ; CHECK-NEXT: ValueType: Struct -; CHECK-NEXT: AccQual: ReadOnly ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: image1d_t -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: AccQual: ReadOnly +; CHECK-NEXT: - TypeName: image2d_t +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Image ; CHECK-NEXT: ValueType: Struct -; CHECK-NEXT: AccQual: WriteOnly ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: image2d_t -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: AccQual: WriteOnly +; CHECK-NEXT: - TypeName: image3d_t +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: Image ; CHECK-NEXT: ValueType: Struct -; CHECK-NEXT: AccQual: ReadWrite ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: image3d_t +; CHECK-NEXT: AccQual: ReadWrite ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -655,17 +674,18 @@ define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, } ; CHECK: - Name: test_vec_type_hint_half +; CHECK-NEXT: SymbolName: 'test_vec_type_hint_half@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: half ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -690,17 +710,18 @@ define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) } ; CHECK: - Name: test_vec_type_hint_float +; CHECK-NEXT: SymbolName: 'test_vec_type_hint_float@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: float ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -725,17 +746,18 @@ define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) } ; CHECK: - Name: test_vec_type_hint_double +; CHECK-NEXT: SymbolName: 'test_vec_type_hint_double@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: double ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -760,17 +782,18 @@ define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) } ; CHECK: - Name: test_vec_type_hint_char +; CHECK-NEXT: SymbolName: 'test_vec_type_hint_char@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: char ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -795,17 +818,18 @@ define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) } ; CHECK: - Name: test_vec_type_hint_short +; CHECK-NEXT: SymbolName: 'test_vec_type_hint_short@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: short ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -830,17 +854,18 @@ define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) } ; CHECK: - Name: test_vec_type_hint_long +; CHECK-NEXT: SymbolName: 'test_vec_type_hint_long@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: long ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -865,17 +890,18 @@ define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) } ; CHECK: - Name: test_vec_type_hint_unknown +; CHECK-NEXT: SymbolName: 'test_vec_type_hint_unknown@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Attrs: ; CHECK-NEXT: VecTypeHint: unknown ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -900,18 +926,19 @@ define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) } ; CHECK: - Name: test_reqd_wgs_vec_type_hint +; CHECK-NEXT: SymbolName: 'test_reqd_wgs_vec_type_hint@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Attrs: ; CHECK-NEXT: ReqdWorkGroupSize: [ 1, 2, 4 ] ; CHECK-NEXT: VecTypeHint: int ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -937,18 +964,19 @@ define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) } ; CHECK: - Name: test_wgs_hint_vec_type_hint +; CHECK-NEXT: SymbolName: 'test_wgs_hint_vec_type_hint@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Attrs: ; CHECK-NEXT: WorkGroupSizeHint: [ 8, 16, 32 ] ; CHECK-NEXT: VecTypeHint: uint4 ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: int +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: int ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -974,16 +1002,17 @@ define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) } ; CHECK: - Name: test_arg_ptr_to_ptr +; CHECK-NEXT: SymbolName: 'test_arg_ptr_to_ptr@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: 'int **' +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: I32 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: 'int **' +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -1008,16 +1037,17 @@ define amdgpu_kernel void @test_arg_ptr_to_ptr(i32* addrspace(1)* %a) } ; CHECK: - Name: test_arg_struct_contains_ptr +; CHECK-NEXT: SymbolName: 'test_arg_struct_contains_ptr@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: - TypeName: struct B +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: Struct -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Private -; CHECK-NEXT: TypeName: struct B +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -1042,15 +1072,16 @@ define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B* byval %a) } ; CHECK: - Name: test_arg_vector_of_ptr +; CHECK-NEXT: SymbolName: 'test_arg_vector_of_ptr@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 16 +; CHECK-NEXT: - TypeName: 'global int* __attribute__((ext_vector_type(2)))' +; CHECK-NEXT: Size: 16 ; CHECK-NEXT: Align: 16 ; CHECK-NEXT: ValueKind: ByValue ; CHECK-NEXT: ValueType: I32 ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: TypeName: 'global int* __attribute__((ext_vector_type(2)))' ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -1075,16 +1106,17 @@ define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) } ; CHECK: - Name: test_arg_unknown_builtin_type +; CHECK-NEXT: SymbolName: 'test_arg_unknown_builtin_type@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: clk_event_t +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: Struct -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: clk_event_t +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -1110,64 +1142,65 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type( } ; CHECK: - Name: test_pointee_align +; CHECK-NEXT: SymbolName: 'test_pointee_align@kd' ; CHECK-NEXT: Language: OpenCL C ; CHECK-NEXT: LanguageVersion: [ 2, 0 ] ; CHECK-NEXT: Args: -; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: - TypeName: 'long *' +; CHECK-NEXT: Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: GlobalBuffer ; CHECK-NEXT: ValueType: I64 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Global -; CHECK-NEXT: TypeName: 'long *' -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - TypeName: 'char *' +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: PointeeAlign: 1 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: TypeName: 'char *' -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - TypeName: 'char2 *' +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: PointeeAlign: 2 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: TypeName: 'char2 *' -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - TypeName: 'char3 *' +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: PointeeAlign: 4 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: TypeName: 'char3 *' -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - TypeName: 'char4 *' +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: PointeeAlign: 4 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: TypeName: 'char4 *' -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - TypeName: 'char8 *' +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: PointeeAlign: 8 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: TypeName: 'char8 *' -; CHECK-NEXT: - Size: 4 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - TypeName: 'char16 *' +; CHECK-NEXT: Size: 4 ; CHECK-NEXT: Align: 4 ; CHECK-NEXT: ValueKind: DynamicSharedPointer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: PointeeAlign: 16 -; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: AddrSpaceQual: Local -; CHECK-NEXT: TypeName: 'char16 *' +; CHECK-NEXT: AccQual: Default ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX @@ -1197,6 +1230,45 @@ define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, ret void } +; CHECK: - Name: __test_block_invoke_kernel +; CHECK-NEXT: SymbolName: '__test_block_invoke_kernel@kd' +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Attrs: +; CHECK-NEXT: RuntimeHandle: __test_block_invoke_kernel_runtime_handle +; CHECK-NEXT: Args: +; CHECK-NEXT: - TypeName: __block_literal +; CHECK-NEXT: Size: 25 +; CHECK-NEXT: Align: 1 +; CHECK-NEXT: ValueKind: ByValue +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenPrintfBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +define amdgpu_kernel void @__test_block_invoke_kernel( + <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }> %arg) #0 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110 + !kernel_arg_base_type !110 !kernel_arg_type_qual !4 { + ret void +} + +attributes #0 = { "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } + !llvm.printf.fmts = !{!100, !101} !1 = !{i32 0} @@ -1250,13 +1322,6 @@ define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, !94 = !{!"", !"", !"", !"", !"", !"", !""} !100 = !{!"1:1:4:%d\5Cn"} !101 = !{!"2:1:8:%g\5Cn"} +!110 = !{!"__block_literal"} -; NOTES: Displaying notes found at file offset 0x{{[0-9]+}} -; NOTES-NEXT: Owner Data size Description -; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001) -; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003) -; GFX700: AMD 0x00008b0a Unknown note type: (0x0000000a) -; GFX800: AMD 0x00008e6e Unknown note type: (0x0000000a) -; GFX900: AMD 0x00008b0a Unknown note type: (0x0000000a) - -; PARSER: AMDGPU Code Object Metadata Parser Test: PASS +; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/test/CodeGen/AMDGPU/code-object-metadata-images.ll b/test/CodeGen/AMDGPU/hsa-metadata-images.ll similarity index 62% rename from test/CodeGen/AMDGPU/code-object-metadata-images.ll rename to test/CodeGen/AMDGPU/hsa-metadata-images.ll index 918560469852b..286f57399b71b 100644 --- a/test/CodeGen/AMDGPU/code-object-metadata-images.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-images.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s %opencl.image1d_t = type opaque %opencl.image1d_array_t = type opaque @@ -19,44 +19,45 @@ ; CHECK: Version: [ 1, 0 ] ; CHECK: Kernels: -; CHECK: - Name: test +; CHECK: - Name: test +; CHECK: SymbolName: 'test@kd' ; CHECK: Args: -; CHECK: - Size: 8 +; CHECK: - TypeName: image1d_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image1d_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image1d_array_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image1d_array_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image1d_buffer_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image1d_buffer_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image2d_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image2d_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image2d_array_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image2d_array_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image2d_array_depth_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image2d_array_depth_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image2d_array_msaa_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image2d_array_msaa_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image2d_array_msaa_depth_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image2d_array_msaa_depth_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image2d_depth_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image2d_depth_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image2d_msaa_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image2d_msaa_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image2d_msaa_depth_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image2d_msaa_depth_t -; CHECK: - Size: 8 +; CHECK: - TypeName: image3d_t +; CHECK: Size: 8 ; CHECK: ValueKind: Image -; CHECK: TypeName: image3d_t define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a, %opencl.image1d_array_t addrspace(1)* %b, %opencl.image1d_buffer_t addrspace(1)* %c, diff --git a/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-1.ll b/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll similarity index 78% rename from test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-1.ll rename to test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll index f41da9f921361..f46b07d80b5d1 100644 --- a/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-1.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. diff --git a/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-2.ll b/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll similarity index 79% rename from test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-2.ll rename to test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll index 0509663d9849a..f4e584b5ae399 100644 --- a/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-2.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. diff --git a/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-3.ll b/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll similarity index 80% rename from test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-3.ll rename to test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll index 7404cec5d78ac..5e951dd3f8f7e 100644 --- a/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-3.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s ; Make sure llc does not crash for invalid opencl version metadata. diff --git a/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll b/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll new file mode 100644 index 0000000000000..2d02b46e479d7 --- /dev/null +++ b/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll @@ -0,0 +1,34 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] + +; CHECK: Kernels: +; CHECK: - Name: test +; CHECK: SymbolName: 'test@kd' +; CHECK: CodeProps: +; CHECK: KernargSegmentSize: 24 +; CHECK: GroupSegmentFixedSize: 0 +; CHECK: PrivateSegmentFixedSize: 0 +; CHECK: KernargSegmentAlign: 8 +; CHECK: WavefrontSize: 64 +; GFX700: NumSGPRs: 6 +; GFX800: NumSGPRs: 96 +; GFX900: NumSGPRs: 6 +; GFX700: NumVGPRs: 4 +; GFX800: NumVGPRs: 6 +; GFX900: NumVGPRs: 6 +; CHECK: MaxFlatWorkGroupSize: 256 +define amdgpu_kernel void @test( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} diff --git a/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll b/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll similarity index 87% rename from test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll rename to test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll index 0ffc922031539..f9b94d1914320 100644 --- a/test/CodeGen/AMDGPU/code-object-metadata-kernel-debug-props.ll +++ b/test/CodeGen/AMDGPU/hsa-metadata-kernel-debug-props.ll @@ -1,6 +1,6 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s declare void @llvm.dbg.declare(metadata, metadata, metadata) @@ -8,13 +8,14 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK: Version: [ 1, 0 ] ; CHECK: Kernels: -; CHECK: - Name: test +; CHECK: - Name: test +; CHECK: SymbolName: 'test@kd' ; CHECK: DebugProps: ; CHECK: DebuggerABIVersion: [ 1, 0 ] ; CHECK: ReservedNumVGPRs: 4 ; GFX700: ReservedFirstVGPR: 8 ; GFX800: ReservedFirstVGPR: 8 -; GFX9: ReservedFirstVGPR: 14 +; GFX900: ReservedFirstVGPR: 11 ; CHECK: PrivateSegmentBufferSGPR: 0 ; CHECK: WavefrontPrivateSegmentOffsetSGPR: 11 define amdgpu_kernel void @test(i32 addrspace(1)* %A) #0 !dbg !7 !kernel_arg_addr_space !12 !kernel_arg_access_qual !13 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !15 { diff --git a/test/CodeGen/AMDGPU/inline-attr.ll b/test/CodeGen/AMDGPU/inline-attr.ll new file mode 100644 index 0000000000000..6f6b5f4c0b023 --- /dev/null +++ b/test/CodeGen/AMDGPU/inline-attr.ll @@ -0,0 +1,33 @@ +; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-unsafe-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=UNSAFE %s +; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NONANS %s +; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s + +; GCN: define float @foo(float %x) local_unnamed_addr #0 { +; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 { +; GCN: %mul.i = fmul float %load, 1.500000e+01 + +; UNSAFE: attributes #0 = { norecurse nounwind readnone "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } +; UNSAFE: attributes #1 = { norecurse nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } + +; NOINFS: attributes #0 = { norecurse nounwind readnone "no-infs-fp-math"="true" } +; NOINFS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" } + +; NONANS: attributes #0 = { norecurse nounwind readnone "no-nans-fp-math"="true" } +; NONANS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" } + +define float @foo(float %x) #0 { +entry: + %mul = fmul float %x, 1.500000e+01 + ret float %mul +} + +define amdgpu_kernel void @caller(float addrspace(1)* %p) #1 { +entry: + %load = load float, float addrspace(1)* %p, align 4 + %call = call fast float @foo(float %load) #0 + store float %call, float addrspace(1)* %p, align 4 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" } diff --git a/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir b/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir index bd5f296affb54..e3a559998be29 100644 --- a/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir +++ b/test/CodeGen/AMDGPU/insert-skips-kill-uncond.mir @@ -33,7 +33,7 @@ body: | bb.1: successors: %bb.2 %vgpr0 = V_MOV_B32_e32 0, implicit %exec - SI_KILL_TERMINATOR %vgpr0, implicit-def %exec, implicit-def %vcc, implicit %exec + SI_KILL_F32_COND_IMM_TERMINATOR %vgpr0, 0, 3, implicit-def %exec, implicit-def %vcc, implicit %exec S_BRANCH %bb.2 bb.2: diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll index 98f7058b5ef80..b6f72a114d93a 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -3,7 +3,7 @@ ;CHECK-LABEL: {{^}}test1: ;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc -;VI: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff +;VI: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_atomic_swap v0, v1, s[0:3], 0 idxen glc ;CHECK: s_waitcnt vmcnt(0) @@ -14,7 +14,7 @@ ;CHECK: buffer_atomic_swap v0, v2, s[0:3], 0 offen offset:42 glc ;CHECK-DAG: s_waitcnt vmcnt(0) ;SICI: buffer_atomic_swap v0, v1, s[0:3], 0 offen glc -;VI: buffer_atomic_swap v0, off, s[0:3], [[SOFS]] offset:1 glc +;VI: buffer_atomic_swap v0, off, s[0:3], [[SOFS]] offset:4 glc ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_atomic_swap v0, off, s[0:3], 0{{$}} define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) { @@ -71,24 +71,24 @@ main_body: ;CHECK-LABEL: {{^}}test3: ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc ;CHECK: s_waitcnt vmcnt(0) -;VI: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff +;VI: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 idxen glc ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v3, s[0:3], 0 offen glc ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v[2:3], s[0:3], 0 idxen offen glc ;CHECK: s_waitcnt vmcnt(0) -;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v3, s[0:3], 0 offen offset:42 glc +;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v3, s[0:3], 0 offen offset:44 glc ;CHECK-DAG: s_waitcnt vmcnt(0) ;SICI: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen glc -;VI: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:1 glc +;VI: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:4 glc define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) { main_body: %o1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0) %o2 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %o1, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0) %o3 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %o2, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 %voffset, i1 0) %o4 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %o3, i32 %cmp, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i1 0) - %ofs.5 = add i32 %voffset, 42 + %ofs.5 = add i32 %voffset, 44 %o5 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %o4, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 %ofs.5, i1 0) %o6 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %o5, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 8192, i1 0) diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll index 9cb9f25520b8b..d5159934d3f92 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll @@ -27,20 +27,20 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_immoffs_large: -;SICI: v_mov_b32_e32 [[VOFS:v[0-9]+]], 0x103c +;SICI: v_mov_b32_e32 [[VOFS:v[0-9]+]], 0x1038 ;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[VOFS]], s[0:3], 0 offen ;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen -;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 61 offset:4095 -;VI-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7fff -;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4093 +;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 60 offset:4092 +;VI-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7ffc +;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4092 ;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen -;VI-DAG: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff -;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:1 +;VI-DAG: s_mov_b32 [[OFS2:s[0-9]+]], 0x8ffc +;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:4 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) { main_body: - %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4156, i1 0, i1 0) - %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36860, i1 0, i1 0) + %d.0 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4152, i1 0, i1 0) + %d.1 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36856, i1 0, i1 0) %d.2 = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 36864, i1 0, i1 0) %d.3 = fadd <4 x float> %d.0, %d.1 %data = fadd <4 x float> %d.2, %d.3 @@ -48,10 +48,10 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_immoffs_reuse: -;VI: s_movk_i32 [[OFS:s[0-9]+]], 0xfff -;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:65 +;VI: s_movk_i32 [[OFS:s[0-9]+]], 0xffc +;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:68 ;VI-NOT: s_mov -;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:81 +;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:84 ;VI: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) { main_body: @@ -80,11 +80,11 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_ofs_imm: -;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen offset:58 +;CHECK: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 offen offset:60 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) { main_body: - %ofs = add i32 %1, 58 + %ofs = add i32 %1, 60 %data = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0) ret <4 x float> %data } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll index 5c93ae0e78672..03caca8d29c45 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll @@ -18,18 +18,18 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_immoffs: -;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:42 +;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) { main_body: - %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 42, i1 0, i1 0) + %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 40, i1 0, i1 0) ret <4 x float> %data } ;CHECK-LABEL: {{^}}buffer_load_immoffs_large: ;SICI: buffer_load_dwordx4 v[0:3], {{v[0-9]+}}, s[0:3], 0 offen -;VI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x1fff -;VI: buffer_load_dwordx4 v[0:3], off, s[0:3], [[OFFSET]] offset:1 +;VI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x1ffc +;VI: buffer_load_dwordx4 v[0:3], off, s[0:3], [[OFFSET]] offset:4 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) { main_body: @@ -56,11 +56,11 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_ofs_imm: -;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:58 +;CHECK: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) { main_body: - %ofs = add i32 %1, 58 + %ofs = add i32 %1, 60 %data = call <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i1 0, i1 0) ret <4 x float> %data } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll new file mode 100644 index 0000000000000..a1ecb7f750c7b --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll @@ -0,0 +1,241 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s + +; SI-LABEL: {{^}}gs_const: +; SI-NOT: v_cmpx +; SI: s_mov_b64 exec, 0 +define amdgpu_gs void @gs_const() { + %tmp = icmp ule i32 0, 3 + %tmp1 = select i1 %tmp, float 1.000000e+00, float -1.000000e+00 + %c1 = fcmp oge float %tmp1, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + %tmp2 = icmp ule i32 3, 0 + %tmp3 = select i1 %tmp2, float 1.000000e+00, float -1.000000e+00 + %c2 = fcmp oge float %tmp3, 0.0 + call void @llvm.amdgcn.kill(i1 %c2) + ret void +} + +; SI-LABEL: {{^}}vcc_implicit_def: +; SI-NOT: v_cmp_gt_f32_e32 vcc, +; SI: v_cmp_gt_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], 0, v{{[0-9]+}} +; SI: v_cmpx_le_f32_e32 vcc, 0, v{{[0-9]+}} +; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1.0, [[CMP]] +define amdgpu_ps void @vcc_implicit_def(float %arg13, float %arg14) { + %tmp0 = fcmp olt float %arg13, 0.000000e+00 + %c1 = fcmp oge float %arg14, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + %tmp1 = select i1 %tmp0, float 1.000000e+00, float 0.000000e+00 + call void @llvm.amdgcn.exp.f32(i32 1, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0 + ret void +} + +; SI-LABEL: {{^}}true: +; SI-NEXT: BB# +; SI-NEXT: BB# +; SI-NEXT: s_endpgm +define amdgpu_gs void @true() { + call void @llvm.amdgcn.kill(i1 true) + ret void +} + +; SI-LABEL: {{^}}false: +; SI-NOT: v_cmpx +; SI: s_mov_b64 exec, 0 +define amdgpu_gs void @false() { + call void @llvm.amdgcn.kill(i1 false) + ret void +} + +; SI-LABEL: {{^}}and: +; SI: v_cmp_lt_i32 +; SI: v_cmp_lt_i32 +; SI: s_or_b64 s[0:1] +; SI: s_and_b64 exec, exec, s[0:1] +define amdgpu_gs void @and(i32 %a, i32 %b, i32 %c, i32 %d) { + %c1 = icmp slt i32 %a, %b + %c2 = icmp slt i32 %c, %d + %x = or i1 %c1, %c2 + call void @llvm.amdgcn.kill(i1 %x) + ret void +} + +; SI-LABEL: {{^}}andn2: +; SI: v_cmp_lt_i32 +; SI: v_cmp_lt_i32 +; SI: s_xor_b64 s[0:1] +; SI: s_andn2_b64 exec, exec, s[0:1] +define amdgpu_gs void @andn2(i32 %a, i32 %b, i32 %c, i32 %d) { + %c1 = icmp slt i32 %a, %b + %c2 = icmp slt i32 %c, %d + %x = xor i1 %c1, %c2 + %y = xor i1 %x, 1 + call void @llvm.amdgcn.kill(i1 %y) + ret void +} + +; SI-LABEL: {{^}}oeq: +; SI: v_cmpx_eq_f32 +; SI-NOT: s_and +define amdgpu_gs void @oeq(float %a) { + %c1 = fcmp oeq float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}ogt: +; SI: v_cmpx_lt_f32 +; SI-NOT: s_and +define amdgpu_gs void @ogt(float %a) { + %c1 = fcmp ogt float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}oge: +; SI: v_cmpx_le_f32 +; SI-NOT: s_and +define amdgpu_gs void @oge(float %a) { + %c1 = fcmp oge float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}olt: +; SI: v_cmpx_gt_f32 +; SI-NOT: s_and +define amdgpu_gs void @olt(float %a) { + %c1 = fcmp olt float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}ole: +; SI: v_cmpx_ge_f32 +; SI-NOT: s_and +define amdgpu_gs void @ole(float %a) { + %c1 = fcmp ole float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}one: +; SI: v_cmpx_lg_f32 +; SI-NOT: s_and +define amdgpu_gs void @one(float %a) { + %c1 = fcmp one float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}ord: +; FIXME: This is absolutely unimportant, but we could use the cmpx variant here. +; SI: v_cmp_o_f32 +define amdgpu_gs void @ord(float %a) { + %c1 = fcmp ord float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}uno: +; FIXME: This is absolutely unimportant, but we could use the cmpx variant here. +; SI: v_cmp_u_f32 +define amdgpu_gs void @uno(float %a) { + %c1 = fcmp uno float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}ueq: +; SI: v_cmpx_nlg_f32 +; SI-NOT: s_and +define amdgpu_gs void @ueq(float %a) { + %c1 = fcmp ueq float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}ugt: +; SI: v_cmpx_nge_f32 +; SI-NOT: s_and +define amdgpu_gs void @ugt(float %a) { + %c1 = fcmp ugt float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}uge: +; SI: v_cmpx_ngt_f32_e32 vcc, -1.0 +; SI-NOT: s_and +define amdgpu_gs void @uge(float %a) { + %c1 = fcmp uge float %a, -1.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}ult: +; SI: v_cmpx_nle_f32_e32 vcc, -2.0 +; SI-NOT: s_and +define amdgpu_gs void @ult(float %a) { + %c1 = fcmp ult float %a, -2.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}ule: +; SI: v_cmpx_nlt_f32_e32 vcc, 2.0 +; SI-NOT: s_and +define amdgpu_gs void @ule(float %a) { + %c1 = fcmp ule float %a, 2.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}une: +; SI: v_cmpx_neq_f32_e32 vcc, 0 +; SI-NOT: s_and +define amdgpu_gs void @une(float %a) { + %c1 = fcmp une float %a, 0.0 + call void @llvm.amdgcn.kill(i1 %c1) + ret void +} + +; SI-LABEL: {{^}}neg_olt: +; SI: v_cmpx_ngt_f32_e32 vcc, 1.0 +; SI-NOT: s_and +define amdgpu_gs void @neg_olt(float %a) { + %c1 = fcmp olt float %a, 1.0 + %c2 = xor i1 %c1, 1 + call void @llvm.amdgcn.kill(i1 %c2) + ret void +} + +; SI-LABEL: {{^}}fcmp_x2: +; FIXME: LLVM should be able to combine these fcmp opcodes. +; SI: v_cmp_gt_f32 +; SI: v_cndmask_b32 +; SI: v_cmpx_le_f32 +define amdgpu_ps void @fcmp_x2(float %a) #0 { + %ogt = fcmp nsz ogt float %a, 2.500000e-01 + %k = select i1 %ogt, float -1.000000e+00, float 0.000000e+00 + %c = fcmp nsz oge float %k, 0.000000e+00 + call void @llvm.amdgcn.kill(i1 %c) #1 + ret void +} + +; SI-LABEL: {{^}}wqm: +; SI: v_cmp_neq_f32_e32 vcc, 0 +; SI: s_wqm_b64 s[0:1], vcc +; SI: s_and_b64 exec, exec, s[0:1] +define amdgpu_ps void @wqm(float %a) { + %c1 = fcmp une float %a, 0.0 + %c2 = call i1 @llvm.amdgcn.wqm.vote(i1 %c1) + call void @llvm.amdgcn.kill(i1 %c2) + ret void +} + +declare void @llvm.amdgcn.kill(i1) #0 +declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 +declare i1 @llvm.amdgcn.wqm.vote(i1) + +attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll index 1c3cba8d3e4fe..3061bd91c9c55 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll @@ -1,7 +1,10 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: {{^}}test1: -; CHECK: v_cndmask_b32_e64 v0, 0, 1, exec +; CHECK: s_mov_b64 s[0:1], exec +; CHECK: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; +; Note: The hardware doesn't implement EXEC as src2 for v_cndmask. ; ; Note: We could generate better code here if we recognized earlier that ; there is no WQM use and therefore llvm.amdgcn.ps.live is constant. However, diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll new file mode 100644 index 0000000000000..1946e6a361867 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=CHECK %s + +;CHECK-LABEL: {{^}}ret: +;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 +;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]] +;CHECK: v_cndmask_b32_e64 v0, 0, 1.0, [[WQM]] +define amdgpu_ps float @ret(i32 %v0, i32 %v1) #1 { +main_body: + %c = icmp eq i32 %v0, %v1 + %w = call i1 @llvm.amdgcn.wqm.vote(i1 %c) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +;CHECK-LABEL: {{^}}true: +;CHECK: s_wqm_b64 +define amdgpu_ps float @true() #1 { +main_body: + %w = call i1 @llvm.amdgcn.wqm.vote(i1 true) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +;CHECK-LABEL: {{^}}false: +;CHECK: s_wqm_b64 +define amdgpu_ps float @false() #1 { +main_body: + %w = call i1 @llvm.amdgcn.wqm.vote(i1 false) + %r = select i1 %w, float 1.0, float 0.0 + ret float %r +} + +;CHECK-LABEL: {{^}}kill: +;CHECK: v_cmp_eq_u32_e32 [[CMP:[^,]+]], v0, v1 +;CHECK: s_wqm_b64 [[WQM:[^,]+]], [[CMP]] +;FIXME: This could just be: s_and_b64 exec, exec, [[WQM]] +;CHECK: v_cndmask_b32_e64 [[KILL:[^,]+]], -1.0, 1.0, [[WQM]] +;CHECK: v_cmpx_le_f32_e32 {{[^,]+}}, 0, [[KILL]] +;CHECK: s_endpgm +define amdgpu_ps void @kill(i32 %v0, i32 %v1) #1 { +main_body: + %c = icmp eq i32 %v0, %v1 + %w = call i1 @llvm.amdgcn.wqm.vote(i1 %c) + %r = select i1 %w, float 1.0, float -1.0 + call void @llvm.AMDGPU.kill(float %r) + ret void +} + +declare void @llvm.AMDGPU.kill(float) #1 +declare i1 @llvm.amdgcn.wqm.vote(i1) + +attributes #1 = { nounwind } diff --git a/test/CodeGen/AMDGPU/load-hi16.ll b/test/CodeGen/AMDGPU/load-hi16.ll index 806664bb32ec0..88a60935c743e 100644 --- a/test/CodeGen/AMDGPU/load-hi16.ll +++ b/test/CodeGen/AMDGPU/load-hi16.ll @@ -7,7 +7,7 @@ ; GFX9-NEXT: s_waitcnt ; GFX9-NEXT: s_setpc_b64 -; VI: ds_read_u16 +; VI: ds_read_u16 v define <2 x i16> @load_local_hi_v2i16_undeflo(i16 addrspace(3)* %in) #0 { entry: %load = load i16, i16 addrspace(3)* %in @@ -22,7 +22,7 @@ entry: ; GFX9-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-NEXT: s_setpc_b64 -; VI: ds_read_u16 +; VI: ds_read_u16 v define <2 x i16> @load_local_hi_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 { entry: %load = load i16, i16 addrspace(3)* %in @@ -40,7 +40,7 @@ entry: ; GFX9-NEXT: s_waitcnt ; GFX9-NEXT: s_setpc_b64 -; VI: ds_read_u16 +; VI: ds_read_u16 v define void @load_local_hi_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 { entry: %load = load i16, i16 addrspace(3)* %in @@ -58,7 +58,7 @@ entry: ; GFX9-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-NEXT: s_setpc_b64 -; VI: ds_read_u16 +; VI: ds_read_u16 v define <2 x i16> @load_local_hi_v2i16_zerolo(i16 addrspace(3)* %in) #0 { entry: %load = load i16, i16 addrspace(3)* %in @@ -75,7 +75,7 @@ entry: ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_setpc_b64 -; VI: ds_read_u16 +; VI: ds_read_u16 v ; VI: v_lshlrev_b32_e32 v0, 16, v0 define i32 @load_local_hi_v2i16_zerolo_shift(i16 addrspace(3)* %in) #0 { entry: @@ -93,7 +93,7 @@ entry: ; GFX9-NEXT: s_waitcnt ; GFX9-NEXT: s_setpc_b64 -; VI: ds_read_u16 +; VI: ds_read_u16 v define void @load_local_hi_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 { entry: %load = load half, half addrspace(3)* %in @@ -111,7 +111,7 @@ entry: ; GFX9-NEXT: s_waitcnt ; GFX9-NEXT: s_setpc_b64 -; VI: ds_read_u8 +; VI: ds_read_u8 v define void @load_local_hi_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 { entry: %load = load i8, i8 addrspace(3)* %in @@ -130,7 +130,7 @@ entry: ; GFX9-NEXT: s_waitcnt ; GFX9-NEXT: s_setpc_b64 -; VI: ds_read_i8 +; VI: ds_read_i8 v define void @load_local_hi_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 { entry: %load = load i8, i8 addrspace(3)* %in diff --git a/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll b/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll new file mode 100644 index 0000000000000..e158677e0982b --- /dev/null +++ b/test/CodeGen/AMDGPU/load-private-double16-amdgiz.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" + +; GCN-LABEL: @test_unaligned_load +; GCN: buffer_load_dword +; GCN-NOT: flat_load_dword +define amdgpu_kernel void @test_unaligned_load(<16 x double> addrspace(1)* %results, i32 %i) { +entry: + %a = inttoptr i32 %i to <16 x double> addrspace(5)* + %v = load <16 x double>, <16 x double> addrspace(5)* %a, align 8 + store <16 x double> %v, <16 x double> addrspace(1)* %results, align 128 + ret void +} + +; GCN-LABEL: @test_unaligned_store +; GCN: buffer_store_dword +; GCN-NOT: flat_store_dword +define amdgpu_kernel void @test_unaligned_store(<16 x double> %v, i32 %i) { +entry: + %a = inttoptr i32 %i to <16 x double> addrspace(5)* + store <16 x double> %v, <16 x double> addrspace(5)* %a, align 8 + ret void +} diff --git a/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir b/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir index 2a431fe7946a8..6c6b19a04c622 100644 --- a/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir +++ b/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir @@ -2,16 +2,16 @@ # GCN-LABEL: name: cluster_add_addc # GCN: S_NOP 0, implicit-def %vcc -# GCN: dead %2, %3 = V_ADD_I32_e64 %0, %1, implicit %exec -# GCN: dead %4, dead %5 = V_ADDC_U32_e64 %6, %7, %3, implicit %exec +# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit %exec +# GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %3, implicit %exec name: cluster_add_addc registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } @@ -27,10 +27,10 @@ body: | ... # GCN-LABEL: name: interleave_add64s -# GCN: dead %8, %9 = V_ADD_I32_e64 %0, %1, implicit %exec -# GCN-NEXT: dead %12, dead %13 = V_ADDC_U32_e64 %4, %5, %9, implicit %exec -# GCN-NEXT: dead %10, %11 = V_ADD_I32_e64 %2, %3, implicit %exec -# GCN-NEXT: dead %14, dead %15 = V_ADDC_U32_e64 %6, %7, %11, implicit %exec +# GCN: dead %8:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit %exec +# GCN-NEXT: dead %12:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %4, %5, %9, implicit %exec +# GCN-NEXT: dead %10:vgpr_32, %11:sreg_64_xexec = V_ADD_I32_e64 %2, %3, implicit %exec +# GCN-NEXT: dead %14:vgpr_32, dead %15:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %11, implicit %exec name: interleave_add64s registers: - { id: 0, class: vgpr_32 } @@ -42,13 +42,13 @@ registers: - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } - { id: 8, class: vgpr_32 } - - { id: 9, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } - { id: 10, class: vgpr_32 } - - { id: 11, class: sreg_64 } + - { id: 11, class: sreg_64_xexec } - { id: 12, class: vgpr_32 } - - { id: 13, class: sreg_64 } + - { id: 13, class: sreg_64_xexec } - { id: 14, class: vgpr_32 } - - { id: 15, class: sreg_64 } + - { id: 15, class: sreg_64_xexec } body: | bb.0: @@ -71,15 +71,15 @@ body: | # GCN-LABEL: name: cluster_mov_addc # GCN: S_NOP 0, implicit-def %vcc -# GCN-NEXT: %2 = S_MOV_B64 0 -# GCN-NEXT: dead %3, dead %4 = V_ADDC_U32_e64 %0, %1, %2, implicit %exec +# GCN-NEXT: %2:sreg_64_xexec = S_MOV_B64 0 +# GCN-NEXT: dead %3:vgpr_32, dead %4:sreg_64_xexec = V_ADDC_U32_e64 %0, %1, %2, implicit %exec name: cluster_mov_addc registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - - { id: 2, class: sreg_64 } + - { id: 2, class: sreg_64_xexec } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_xexec } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } @@ -93,23 +93,23 @@ body: | ... # GCN-LABEL: name: no_cluster_add_addc_diff_sgpr -# GCN: dead %2, dead %3 = V_ADD_I32_e64 %0, %1, implicit %exec -# GCN-NEXT: %6 = V_MOV_B32_e32 0, implicit %exec -# GCN-NEXT: %7 = V_MOV_B32_e32 0, implicit %exec +# GCN: dead %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, implicit %exec +# GCN-NEXT: %6:vgpr_32 = V_MOV_B32_e32 0, implicit %exec +# GCN-NEXT: %7:vgpr_32 = V_MOV_B32_e32 0, implicit %exec # GCN-NEXT: S_NOP 0, implicit-def %vcc -# GCN-NEXT: %8 = S_MOV_B64 0 -# GCN-NEXT: dead %4, dead %5 = V_ADDC_U32_e64 %6, %7, %8, implicit %exec +# GCN-NEXT: %8:sreg_64_xexec = S_MOV_B64 0 +# GCN-NEXT: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %8, implicit %exec name: no_cluster_add_addc_diff_sgpr registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } - - { id: 8, class: sreg_64 } + - { id: 8, class: sreg_64_xexec } body: | bb.0: %0 = V_MOV_B32_e32 0, implicit %exec @@ -123,16 +123,16 @@ body: | ... # GCN-LABEL: name: cluster_sub_subb # GCN: S_NOP 0, implicit-def %vcc -# GCN: dead %2, %3 = V_SUB_I32_e64 %0, %1, implicit %exec -# GCN: dead %4, dead %5 = V_SUBB_U32_e64 %6, %7, %3, implicit %exec +# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUB_I32_e64 %0, %1, implicit %exec +# GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_SUBB_U32_e64 %6, %7, %3, implicit %exec name: cluster_sub_subb registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } @@ -149,16 +149,16 @@ body: | # GCN-LABEL: name: cluster_cmp_cndmask # GCN: S_NOP 0, implicit-def %vcc -# GCN-NEXT: %3 = V_CMP_EQ_I32_e64 %0, %1, implicit %exec -# GCN-NEXT: dead %4 = V_CNDMASK_B32_e64 %0, %1, %3, implicit %exec +# GCN-NEXT: %3:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit %exec +# GCN-NEXT: dead %4:vgpr_32 = V_CNDMASK_B32_e64 %0, %1, %3, implicit %exec name: cluster_cmp_cndmask registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } @@ -172,16 +172,16 @@ body: | ... # GCN-LABEL: name: cluster_multi_use_cmp_cndmask -# GCN: %4 = V_CMP_EQ_I32_e64 %0, %1, implicit %exec -# GCN-NEXT: dead %5 = V_CNDMASK_B32_e64 %2, %1, %4, implicit %exec -# GCN-NEXT: dead %6 = V_CNDMASK_B32_e64 %1, %3, %4, implicit %exec +# GCN: %4:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit %exec +# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 %2, %1, %4, implicit %exec +# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 %1, %3, %4, implicit %exec name: cluster_multi_use_cmp_cndmask registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_xexec } - { id: 5, class: vgpr_32 } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } @@ -200,17 +200,17 @@ body: | ... # GCN-LABEL: name: cluster_multi_use_cmp_cndmask2 -# GCN: %4 = V_CMP_EQ_I32_e64 %0, %1, implicit %exec -# GCN-NEXT: dead %5 = V_CNDMASK_B32_e64 %2, %1, %4, implicit %exec -# GCN-NEXT: %3 = V_MOV_B32_e32 0, implicit %exec -# GCN-NEXT: dead %6 = V_CNDMASK_B32_e64 %1, %3, %4, implicit %exec +# GCN: %4:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit %exec +# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 %2, %1, %4, implicit %exec +# GCN-NEXT: %3:vgpr_32 = V_MOV_B32_e32 0, implicit %exec +# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 %1, %3, %4, implicit %exec name: cluster_multi_use_cmp_cndmask2 registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - { id: 3, class: vgpr_32 } - - { id: 4, class: sreg_64 } + - { id: 4, class: sreg_64_xexec } - { id: 5, class: vgpr_32 } - { id: 6, class: vgpr_32 } - { id: 7, class: vgpr_32 } diff --git a/test/CodeGen/AMDGPU/mad-mix.ll b/test/CodeGen/AMDGPU/mad-mix.ll index dfecdb7675190..abffc3af2aae9 100644 --- a/test/CodeGen/AMDGPU/mad-mix.ll +++ b/test/CodeGen/AMDGPU/mad-mix.ll @@ -398,6 +398,106 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src ret float %result } +; GCN-LABEL: {{^}}v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: +; GFX9: s_waitcnt +; GFX9-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 ; encoding +; GFX9-NEXT: s_setpc_b64 + +; CIVI: v_mad_f32 +define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %src0 = extractelement <2 x half> %src0.arg.bc, i32 0 + %src0.neg = fsub half -0.0, %src0 + %src0.ext = fpext half %src0.neg to float + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float +; %src0.ext.neg = fsub float -0.0, %src0.ext + %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) + ret float %result +} + +; Make sure we don't fold pre-cvt fneg if we already have a fabs +; GCN-LABEL: {{^}}v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: +; GFX9: s_waitcnt +define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 + %src0.neg = fsub half -0.0, %src0 + %src0.ext = fpext half %src0.neg to float + %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float + %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) + ret float %result +} + +; GCN-LABEL: {{^}}v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: +; GFX9: s_waitcnt +; GFX9-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] +; GFX9-NEXT: s_setpc_b64 +define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 + %src0.abs = call half @llvm.fabs.f16(half %src0) + %src0.ext = fpext half %src0.abs to float + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float + %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) + ret float %result +} + +; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: +; GFX9: s_waitcnt +; GFX9-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] +; GFX9-NEXT: s_setpc_b64 +define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %fneg = fsub <2 x half> , %src0.arg.bc + %src0 = extractelement <2 x half> %fneg, i32 1 + %src0.ext = fpext half %src0 to float + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float + %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) + ret float %result +} + +; FIXME: Should be able to fold +; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: +; GFX9: s_waitcnt +; GFX9-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0 +; GFX9-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,0,0] +; GFX9-NEXT: s_setpc_b64 +define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) + %src0 = extractelement <2 x half> %fabs, i32 1 + %src0.ext = fpext half %src0 to float + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float + %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) + ret float %result +} + +; FIXME: Should be able to fold +; GCN-LABEL: {{^}}v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: +; GFX9: s_waitcnt +; GFX9-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0 +; GFX9-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] +; GFX9-NEXT: s_setpc_b64 +define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { + %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) + %fneg.fabs = fsub <2 x half> , %fabs + %src0 = extractelement <2 x half> %fneg.fabs, i32 1 + %src0.ext = fpext half %src0 to float + %src1.ext = fpext half %src1 to float + %src2.ext = fpext half %src2 to float + %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) + ret float %result +} + +declare half @llvm.fabs.f16(half) #2 +declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 declare float @llvm.fabs.f32(float) #2 declare float @llvm.minnum.f32(float, float) #2 declare float @llvm.maxnum.f32(float, float) #2 diff --git a/test/CodeGen/AMDGPU/memory-legalizer-atomic-load.ll b/test/CodeGen/AMDGPU/memory-legalizer-atomic-load.ll deleted file mode 100644 index 0434978caa283..0000000000000 --- a/test/CodeGen/AMDGPU/memory-legalizer-atomic-load.ll +++ /dev/null @@ -1,282 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s - -; CHECK-LABEL: {{^}}system_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @system_unordered( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in unordered, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}system_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @system_monotonic( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in monotonic, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}system_acquire -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} -; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @system_acquire( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in acquire, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}system_seq_cst -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} -; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @system_seq_cst( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}singlethread_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @singlethread_unordered( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") unordered, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}singlethread_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @singlethread_monotonic( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") monotonic, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}singlethread_acquire -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @singlethread_acquire( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") acquire, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}singlethread_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @singlethread_seq_cst( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}agent_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @agent_unordered( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") unordered, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}agent_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @agent_monotonic( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") monotonic, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}agent_acquire -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} -; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @agent_acquire( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") acquire, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}agent_seq_cst -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} -; CHECK-NEXT: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @agent_seq_cst( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}workgroup_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @workgroup_unordered( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") unordered, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}workgroup_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @workgroup_monotonic( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") monotonic, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}workgroup_acquire -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @workgroup_acquire( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") acquire, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}workgroup_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @workgroup_seq_cst( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}wavefront_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @wavefront_unordered( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") unordered, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}wavefront_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @wavefront_monotonic( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") monotonic, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}wavefront_acquire -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @wavefront_acquire( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") acquire, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; CHECK-LABEL: {{^}}wavefront_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK-NOT: buffer_wbinvl1_vol -; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] -define amdgpu_kernel void @wavefront_seq_cst( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") seq_cst, align 4 - store i32 %val, i32 addrspace(4)* %out - ret void -} diff --git a/test/CodeGen/AMDGPU/memory-legalizer-atomic-store.ll b/test/CodeGen/AMDGPU/memory-legalizer-atomic-store.ll deleted file mode 100644 index 14b7d3e62f865..0000000000000 --- a/test/CodeGen/AMDGPU/memory-legalizer-atomic-store.ll +++ /dev/null @@ -1,202 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck %s - -; CHECK-LABEL: {{^}}system_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @system_unordered( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out unordered, align 4 - ret void -} - -; CHECK-LABEL: {{^}}system_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @system_monotonic( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out monotonic, align 4 - ret void -} - -; CHECK-LABEL: {{^}}system_release -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @system_release( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out release, align 4 - ret void -} - -; CHECK-LABEL: {{^}}system_seq_cst -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @system_seq_cst( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4 - ret void -} - -; CHECK-LABEL: {{^}}singlethread_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @singlethread_unordered( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") unordered, align 4 - ret void -} - -; CHECK-LABEL: {{^}}singlethread_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @singlethread_monotonic( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") monotonic, align 4 - ret void -} - -; CHECK-LABEL: {{^}}singlethread_release -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @singlethread_release( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") release, align 4 - ret void -} - -; CHECK-LABEL: {{^}}singlethread_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @singlethread_seq_cst( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") seq_cst, align 4 - ret void -} - -; CHECK-LABEL: {{^}}agent_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @agent_unordered( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") unordered, align 4 - ret void -} - -; CHECK-LABEL: {{^}}agent_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @agent_monotonic( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") monotonic, align 4 - ret void -} - -; CHECK-LABEL: {{^}}agent_release -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @agent_release( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") release, align 4 - ret void -} - -; CHECK-LABEL: {{^}}agent_seq_cst -; CHECK: s_waitcnt vmcnt(0){{$}} -; CHECK-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @agent_seq_cst( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") seq_cst, align 4 - ret void -} - -; CHECK-LABEL: {{^}}workgroup_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @workgroup_unordered( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") unordered, align 4 - ret void -} - -; CHECK-LABEL: {{^}}workgroup_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @workgroup_monotonic( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") monotonic, align 4 - ret void -} - -; CHECK-LABEL: {{^}}workgroup_release -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @workgroup_release( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") release, align 4 - ret void -} - -; CHECK-LABEL: {{^}}workgroup_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @workgroup_seq_cst( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") seq_cst, align 4 - ret void -} - -; CHECK-LABEL: {{^}}wavefront_unordered -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @wavefront_unordered( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") unordered, align 4 - ret void -} - -; CHECK-LABEL: {{^}}wavefront_monotonic -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @wavefront_monotonic( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") monotonic, align 4 - ret void -} - -; CHECK-LABEL: {{^}}wavefront_release -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @wavefront_release( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") release, align 4 - ret void -} - -; CHECK-LABEL: {{^}}wavefront_seq_cst -; CHECK-NOT: s_waitcnt vmcnt(0){{$}} -; CHECK: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} -define amdgpu_kernel void @wavefront_seq_cst( - i32 %in, i32 addrspace(4)* %out) { -entry: - store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") seq_cst, align 4 - ret void -} diff --git a/test/CodeGen/AMDGPU/memory-legalizer-load.ll b/test/CodeGen/AMDGPU/memory-legalizer-load.ll new file mode 100644 index 0000000000000..57e705f2732a7 --- /dev/null +++ b/test/CodeGen/AMDGPU/memory-legalizer-load.ll @@ -0,0 +1,377 @@ +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s + +declare i32 @llvm.amdgcn.workitem.id.x() + +; GCN-LABEL: {{^}}system_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @system_unordered( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}system_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @system_monotonic( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}system_acquire +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN-NEXT: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @system_acquire( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}system_seq_cst +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN-NEXT: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @system_seq_cst( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}singlethread_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @singlethread_unordered( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}singlethread_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @singlethread_monotonic( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}singlethread_acquire +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @singlethread_acquire( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}singlethread_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @singlethread_seq_cst( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("singlethread") seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}agent_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @agent_unordered( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}agent_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @agent_monotonic( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}agent_acquire +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN-NEXT: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @agent_acquire( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}agent_seq_cst +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN-NEXT: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @agent_seq_cst( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("agent") seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}workgroup_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @workgroup_unordered( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}workgroup_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @workgroup_monotonic( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}workgroup_acquire +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @workgroup_acquire( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}workgroup_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @workgroup_seq_cst( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("workgroup") seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}wavefront_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @wavefront_unordered( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") unordered, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}wavefront_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @wavefront_monotonic( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") monotonic, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}wavefront_acquire +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @wavefront_acquire( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") acquire, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}wavefront_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN-NOT: buffer_wbinvl1_vol +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @wavefront_seq_cst( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load atomic i32, i32 addrspace(4)* %in syncscope("wavefront") seq_cst, align 4 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_private_0 +; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} +define amdgpu_kernel void @nontemporal_private_0( + i32* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32* %in, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_private_1 +; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} +define amdgpu_kernel void @nontemporal_private_1( + i32* %in, i32 addrspace(4)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid + %val = load i32, i32* %val.gep, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_global_0 +; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}} +define amdgpu_kernel void @nontemporal_global_0( + i32 addrspace(1)* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_global_1 +; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} +; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}} +define amdgpu_kernel void @nontemporal_global_1( + i32 addrspace(1)* %in, i32 addrspace(4)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid + %val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_local_0 +; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @nontemporal_local_0( + i32 addrspace(3)* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_local_1 +; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @nontemporal_local_1( + i32 addrspace(3)* %in, i32 addrspace(4)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid + %val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_flat_0 +; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} +define amdgpu_kernel void @nontemporal_flat_0( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +; GCN-LABEL: {{^}}nontemporal_flat_1 +; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} +define amdgpu_kernel void @nontemporal_flat_1( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val.gep = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tid + %val = load i32, i32 addrspace(4)* %val.gep, align 4, !nontemporal !0 + store i32 %val, i32 addrspace(4)* %out + ret void +} + +!0 = !{i32 1} diff --git a/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-load.ll b/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-load.ll deleted file mode 100644 index 2157ca33d22a2..0000000000000 --- a/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-load.ll +++ /dev/null @@ -1,97 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx800 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s - -declare i32 @llvm.amdgcn.workitem.id.x() - -; GCN-LABEL: {{^}}nontemporal_load_private_0 -; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} -define amdgpu_kernel void @nontemporal_load_private_0( - i32* %in, i32 addrspace(4)* %out) { -entry: - %val = load i32, i32* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_private_1 -; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} -define amdgpu_kernel void @nontemporal_load_private_1( - i32* %in, i32 addrspace(4)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32* %in, i32 %tid - %val = load i32, i32* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_global_0 -; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0x0{{$}} -define amdgpu_kernel void @nontemporal_load_global_0( - i32 addrspace(1)* %in, i32 addrspace(4)* %out) { -entry: - %val = load i32, i32 addrspace(1)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_global_1 -; GFX8: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} -; GFX9: global_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], off glc slc{{$}} -define amdgpu_kernel void @nontemporal_load_global_1( - i32 addrspace(1)* %in, i32 addrspace(4)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid - %val = load i32, i32 addrspace(1)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_local_0 -; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} -define amdgpu_kernel void @nontemporal_load_local_0( - i32 addrspace(3)* %in, i32 addrspace(4)* %out) { -entry: - %val = load i32, i32 addrspace(3)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_local_1 -; GCN: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} -define amdgpu_kernel void @nontemporal_load_local_1( - i32 addrspace(3)* %in, i32 addrspace(4)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(3)* %in, i32 %tid - %val = load i32, i32 addrspace(3)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_flat_0 -; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} -define amdgpu_kernel void @nontemporal_load_flat_0( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load i32, i32 addrspace(4)* %in, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -; GCN-LABEL: {{^}}nontemporal_load_flat_1 -; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}] glc slc{{$}} -define amdgpu_kernel void @nontemporal_load_flat_1( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val.gep = getelementptr inbounds i32, i32 addrspace(4)* %in, i32 %tid - %val = load i32, i32 addrspace(4)* %val.gep, align 4, !nontemporal !0 - store i32 %val, i32 addrspace(4)* %out - ret void -} - -!0 = !{i32 1} diff --git a/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-store.ll b/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-store.ll deleted file mode 100644 index 78604797a345b..0000000000000 --- a/test/CodeGen/AMDGPU/memory-legalizer-nontemporal-store.ll +++ /dev/null @@ -1,97 +0,0 @@ -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx800 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s -; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s - -declare i32 @llvm.amdgcn.workitem.id.x() - -; GCN-LABEL: {{^}}nontemporal_store_private_0 -; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_private_0( - i32 addrspace(4)* %in, i32* %out) { -entry: - %val = load i32, i32 addrspace(4)* %in, align 4 - store i32 %val, i32* %out, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_private_1 -; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_private_1( - i32 addrspace(4)* %in, i32* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid - store i32 %val, i32* %out.gep, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_global_0 -; GCN: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_global_0( - i32 addrspace(4)* %in, i32 addrspace(1)* %out) { -entry: - %val = load i32, i32 addrspace(4)* %in, align 4 - store i32 %val, i32 addrspace(1)* %out, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_global_1 -; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} -; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_global_1( - i32 addrspace(4)* %in, i32 addrspace(1)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid - store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_local_0 -; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} -define amdgpu_kernel void @nontemporal_store_local_0( - i32 addrspace(4)* %in, i32 addrspace(3)* %out) { -entry: - %val = load i32, i32 addrspace(4)* %in, align 4 - store i32 %val, i32 addrspace(3)* %out, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_local_1 -; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} -define amdgpu_kernel void @nontemporal_store_local_1( - i32 addrspace(4)* %in, i32 addrspace(3)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid - store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_flat_0 -; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_flat_0( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %val = load i32, i32 addrspace(4)* %in, align 4 - store i32 %val, i32 addrspace(4)* %out, !nontemporal !0 - ret void -} - -; GCN-LABEL: {{^}}nontemporal_store_flat_1 -; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} -define amdgpu_kernel void @nontemporal_store_flat_1( - i32 addrspace(4)* %in, i32 addrspace(4)* %out) { -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %val = load i32, i32 addrspace(4)* %in, align 4 - %out.gep = getelementptr inbounds i32, i32 addrspace(4)* %out, i32 %tid - store i32 %val, i32 addrspace(4)* %out.gep, !nontemporal !0 - ret void -} - -!0 = !{i32 1} diff --git a/test/CodeGen/AMDGPU/memory-legalizer-store.ll b/test/CodeGen/AMDGPU/memory-legalizer-store.ll new file mode 100644 index 0000000000000..50df0bc166390 --- /dev/null +++ b/test/CodeGen/AMDGPU/memory-legalizer-store.ll @@ -0,0 +1,298 @@ +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX8 %s +; RUN: llc -mtriple=amdgcn-amd- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX9 %s + +declare i32 @llvm.amdgcn.workitem.id.x() + +; GCN-LABEL: {{^}}system_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @system_unordered( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out unordered, align 4 + ret void +} + +; GCN-LABEL: {{^}}system_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @system_monotonic( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out monotonic, align 4 + ret void +} + +; GCN-LABEL: {{^}}system_release +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @system_release( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out release, align 4 + ret void +} + +; GCN-LABEL: {{^}}system_seq_cst +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @system_seq_cst( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out seq_cst, align 4 + ret void +} + +; GCN-LABEL: {{^}}singlethread_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @singlethread_unordered( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") unordered, align 4 + ret void +} + +; GCN-LABEL: {{^}}singlethread_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @singlethread_monotonic( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") monotonic, align 4 + ret void +} + +; GCN-LABEL: {{^}}singlethread_release +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @singlethread_release( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") release, align 4 + ret void +} + +; GCN-LABEL: {{^}}singlethread_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @singlethread_seq_cst( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("singlethread") seq_cst, align 4 + ret void +} + +; GCN-LABEL: {{^}}agent_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @agent_unordered( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") unordered, align 4 + ret void +} + +; GCN-LABEL: {{^}}agent_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @agent_monotonic( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") monotonic, align 4 + ret void +} + +; GCN-LABEL: {{^}}agent_release +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @agent_release( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") release, align 4 + ret void +} + +; GCN-LABEL: {{^}}agent_seq_cst +; GCN: s_waitcnt vmcnt(0){{$}} +; GCN-NEXT: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @agent_seq_cst( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("agent") seq_cst, align 4 + ret void +} + +; GCN-LABEL: {{^}}workgroup_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @workgroup_unordered( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") unordered, align 4 + ret void +} + +; GCN-LABEL: {{^}}workgroup_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @workgroup_monotonic( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") monotonic, align 4 + ret void +} + +; GCN-LABEL: {{^}}workgroup_release +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @workgroup_release( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") release, align 4 + ret void +} + +; GCN-LABEL: {{^}}workgroup_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @workgroup_seq_cst( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("workgroup") seq_cst, align 4 + ret void +} + +; GCN-LABEL: {{^}}wavefront_unordered +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @wavefront_unordered( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") unordered, align 4 + ret void +} + +; GCN-LABEL: {{^}}wavefront_monotonic +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @wavefront_monotonic( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") monotonic, align 4 + ret void +} + +; GCN-LABEL: {{^}}wavefront_release +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @wavefront_release( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") release, align 4 + ret void +} + +; GCN-LABEL: {{^}}wavefront_seq_cst +; GCN-NOT: s_waitcnt vmcnt(0){{$}} +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @wavefront_seq_cst( + i32 %in, i32 addrspace(4)* %out) { +entry: + store atomic i32 %in, i32 addrspace(4)* %out syncscope("wavefront") seq_cst, align 4 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_private_0 +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} +define amdgpu_kernel void @nontemporal_private_0( + i32 addrspace(4)* %in, i32* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4 + store i32 %val, i32* %out, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_private_1 +; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen glc slc{{$}} +define amdgpu_kernel void @nontemporal_private_1( + i32 addrspace(4)* %in, i32* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val = load i32, i32 addrspace(4)* %in, align 4 + %out.gep = getelementptr inbounds i32, i32* %out, i32 %tid + store i32 %val, i32* %out.gep, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_global_0 +; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}} +define amdgpu_kernel void @nontemporal_global_0( + i32 addrspace(4)* %in, i32 addrspace(1)* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4 + store i32 %val, i32 addrspace(1)* %out, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_global_1 +; GFX8: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}}, off glc slc{{$}} +define amdgpu_kernel void @nontemporal_global_1( + i32 addrspace(4)* %in, i32 addrspace(1)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val = load i32, i32 addrspace(4)* %in, align 4 + %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %tid + store i32 %val, i32 addrspace(1)* %out.gep, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_local_0 +; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @nontemporal_local_0( + i32 addrspace(4)* %in, i32 addrspace(3)* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4 + store i32 %val, i32 addrspace(3)* %out, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_local_1 +; GCN: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}{{$}} +define amdgpu_kernel void @nontemporal_local_1( + i32 addrspace(4)* %in, i32 addrspace(3)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val = load i32, i32 addrspace(4)* %in, align 4 + %out.gep = getelementptr inbounds i32, i32 addrspace(3)* %out, i32 %tid + store i32 %val, i32 addrspace(3)* %out.gep, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_flat_0 +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} +define amdgpu_kernel void @nontemporal_flat_0( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %val = load i32, i32 addrspace(4)* %in, align 4 + store i32 %val, i32 addrspace(4)* %out, !nontemporal !0 + ret void +} + +; GCN-LABEL: {{^}}nontemporal_flat_1 +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} glc slc{{$}} +define amdgpu_kernel void @nontemporal_flat_1( + i32 addrspace(4)* %in, i32 addrspace(4)* %out) { +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %val = load i32, i32 addrspace(4)* %in, align 4 + %out.gep = getelementptr inbounds i32, i32 addrspace(4)* %out, i32 %tid + store i32 %val, i32 addrspace(4)* %out.gep, !nontemporal !0 + ret void +} + +!0 = !{i32 1} diff --git a/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir b/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir index 0a6c8a41130d4..6c6590a154a07 100644 --- a/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir +++ b/test/CodeGen/AMDGPU/opt-sgpr-to-vgpr-copy.mir @@ -3,21 +3,21 @@ # Check that constant is in SGPR registers # GCN-LABEL: {{^}}name: const_to_sgpr{{$}} -# GCN: %[[HI:[0-9]+]] = S_MOV_B32 0 -# GCN-NEXT: %[[LO:[0-9]+]] = S_MOV_B32 1048576 -# GCN-NEXT: %[[SGPR_PAIR:[0-9]+]] = REG_SEQUENCE killed %[[LO]], 1, killed %[[HI]], 2 +# GCN: %[[HI:[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 +# GCN-NEXT: %[[LO:[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1048576 +# GCN-NEXT: %[[SGPR_PAIR:[0-9]+]]:sreg_64 = REG_SEQUENCE killed %[[LO]], 1, killed %[[HI]], 2 # GCN-NEXT: V_CMP_LT_U64_e64 killed %{{[0-9]+}}, %[[SGPR_PAIR]], implicit %exec # GCN-LABEL: {{^}}name: const_to_sgpr_multiple_use{{$}} -# GCN: %[[HI:[0-9]+]] = S_MOV_B32 0 -# GCN-NEXT: %[[LO:[0-9]+]] = S_MOV_B32 1048576 -# GCN-NEXT: %[[SGPR_PAIR:[0-9]+]] = REG_SEQUENCE killed %[[LO]], 1, killed %[[HI]], 2 +# GCN: %[[HI:[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0 +# GCN-NEXT: %[[LO:[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1048576 +# GCN-NEXT: %[[SGPR_PAIR:[0-9]+]]:sreg_64 = REG_SEQUENCE killed %[[LO]], 1, killed %[[HI]], 2 # GCN-NEXT: V_CMP_LT_U64_e64 killed %{{[0-9]+}}, %[[SGPR_PAIR]], implicit %exec # GCN-NEXT: V_CMP_LT_U64_e64 killed %{{[0-9]+}}, %[[SGPR_PAIR]], implicit %exec # GCN-LABEL: {{^}}name: const_to_sgpr_subreg{{$}} -# GCN: %[[OP0:[0-9]+]] = REG_SEQUENCE killed %{{[0-9]+}}, 1, killed %{{[0-9]+}}, 2 +# GCN: %[[OP0:[0-9]+]]:vreg_64 = REG_SEQUENCE killed %{{[0-9]+}}, 1, killed %{{[0-9]+}}, 2 # GCN-NEXT: V_CMP_LT_U32_e64 killed %[[OP0]].sub0, 12, implicit %exec --- | diff --git a/test/CodeGen/AMDGPU/readlane_exec0.mir b/test/CodeGen/AMDGPU/readlane_exec0.mir new file mode 100644 index 0000000000000..b6d58d74ebddb --- /dev/null +++ b/test/CodeGen/AMDGPU/readlane_exec0.mir @@ -0,0 +1,32 @@ +# RUN: llc -o - %s -march=amdgcn -mcpu=fiji -run-pass=si-insert-skips -verify-machineinstrs | FileCheck -check-prefix=GCN %s + +# GCN-LABEL: readlane_exec0 +# GCN: bb.0 +# GCN: S_CBRANCH_EXECZ %bb.2 + +--- +name: readlane_exec0 + +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %vgpr1_vgpr2:0x00000001, %vgpr2_vgpr3:0x00000003 + + %vgpr4 = V_AND_B32_e32 1, %vgpr1, implicit %exec + V_CMP_EQ_U32_e32 1, killed %vgpr4, implicit-def %vcc, implicit %exec + %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed %vcc, implicit-def %exec, implicit-def %scc, implicit %exec + SI_MASK_BRANCH %bb.2, implicit %exec + S_BRANCH %bb.1 + + bb.1: + + %sgpr10 = V_READFIRSTLANE_B32 %vgpr2, implicit %exec + %sgpr11 = V_READFIRSTLANE_B32 %vgpr3, implicit %exec + %sgpr10 = S_LOAD_DWORD_IMM killed %sgpr10_sgpr11, 0, 0 + S_WAITCNT 127 + %vgpr0 = V_XOR_B32_e32 killed %sgpr10, killed %vgpr0, implicit %exec + + bb.2: + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc +... diff --git a/test/CodeGen/AMDGPU/regcoal-subrange-join.mir b/test/CodeGen/AMDGPU/regcoal-subrange-join.mir index bac348aaed709..a03135348ce19 100644 --- a/test/CodeGen/AMDGPU/regcoal-subrange-join.mir +++ b/test/CodeGen/AMDGPU/regcoal-subrange-join.mir @@ -4,10 +4,10 @@ # This test will provoke a subrange join (see annotations below) during simple register coalescing # Without a fix for PR33524 this causes an unreachable in SubRange Join # -# GCN-DAG: undef %[[REG0:[0-9]+]].sub0 = COPY %sgpr5 -# GCN-DAG: undef %[[REG1:[0-9]+]].sub0 = COPY %sgpr2 -# GCN-DAG: %[[REG0]].sub1 = S_MOV_B32 1 -# GCN-DAG: %[[REG1]].sub1 = S_MOV_B32 1 +# GCN-DAG: undef %[[REG0:[0-9]+]].sub0:sgpr_64 = COPY %sgpr5 +# GCN-DAG: undef %[[REG1:[0-9]+]].sub0:sgpr_64 = COPY %sgpr2 +# GCN-DAG: %[[REG0]].sub1:sgpr_64 = S_MOV_B32 1 +# GCN-DAG: %[[REG1]].sub1:sgpr_64 = S_MOV_B32 1 --- | define amdgpu_vs void @regcoal-subrange-join(i32 inreg %arg, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 %arg6) local_unnamed_addr #0 { diff --git a/test/CodeGen/AMDGPU/regcoalesce-dbg.mir b/test/CodeGen/AMDGPU/regcoalesce-dbg.mir index ecf94b5772ffc..c5a9a0ad01abe 100644 --- a/test/CodeGen/AMDGPU/regcoalesce-dbg.mir +++ b/test/CodeGen/AMDGPU/regcoalesce-dbg.mir @@ -4,12 +4,12 @@ # LIS->getInstructionIndex with a DBG_VALUE instruction, which does not have # a slot index. -# CHECK: %13.sub2 = S_MOV_B32 0 +# CHECK: %13.sub2:sgpr_128 = S_MOV_B32 0 # CHECK: DBG_VALUE{{.*}}debug-use %13.sub2 --- | define amdgpu_kernel void @test(i32 addrspace(1)* %out) { ret void } - + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !4, producer: "llvm", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !4) !1 = !DILocalVariable(name: "a", scope: !2, file: !4, line: 126, type: !6) !2 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 1, type: !3, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !5) @@ -25,7 +25,7 @@ --- name: test tracksRegLiveness: true -registers: +registers: - { id: 0, class: sgpr_64 } - { id: 1, class: sreg_32_xm0 } - { id: 2, class: sgpr_32 } @@ -47,13 +47,13 @@ registers: - { id: 18, class: vgpr_32 } - { id: 19, class: vreg_64 } - { id: 20, class: vreg_64 } -liveins: +liveins: - { reg: '%sgpr0_sgpr1', virtual-reg: '%0' } - { reg: '%vgpr0', virtual-reg: '%3' } body: | bb.0: liveins: %sgpr0_sgpr1, %vgpr0 - + %3 = COPY killed %vgpr0 %0 = COPY killed %sgpr0_sgpr1 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) diff --git a/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir b/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir index a52b80ba86e59..08b3ecf8dbac3 100644 --- a/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir +++ b/test/CodeGen/AMDGPU/rename-independent-subregs-mac-operands.mir @@ -2,7 +2,7 @@ --- # GCN-LABEL: name: mac_invalid_operands -# GCN: undef %18.sub0 = V_MAC_F32_e32 undef %3, undef %9, undef %18.sub0, implicit %exec +# GCN: undef %18.sub0:vreg_128 = V_MAC_F32_e32 undef %3:vgpr_32, undef %9:vgpr_32, undef %18.sub0, implicit %exec name: mac_invalid_operands alignment: 0 @@ -73,13 +73,13 @@ body: | # GCN-LABEL: name: vreg_does_not_dominate -# GCN: undef %8.sub1 = V_MAC_F32_e32 undef %2, undef %1, undef %8.sub1, implicit %exec -# GCN: undef %7.sub0 = V_MOV_B32_e32 0, implicit %exec -# GCN: undef %9.sub2 = COPY %7.sub0 +# GCN: undef %8.sub1:vreg_128 = V_MAC_F32_e32 undef %2:vgpr_32, undef %1:vgpr_32, undef %8.sub1, implicit %exec +# GCN: undef %7.sub0:vreg_128 = V_MOV_B32_e32 0, implicit %exec +# GCN: undef %9.sub2:vreg_128 = COPY %7.sub0 -# GCN: undef %6.sub3 = V_ADD_F32_e32 undef %3, undef %3, implicit %exec -# GCN: undef %7.sub0 = V_ADD_F32_e64 0, 0, 0, 0, 0, 0, implicit %exec -# GCN: %8.sub1 = V_ADD_F32_e32 %8.sub1, %8.sub1, implicit %exec +# GCN: undef %6.sub3:vreg_128 = V_ADD_F32_e32 undef %3:vgpr_32, undef %3:vgpr_32, implicit %exec +# GCN: undef %7.sub0:vreg_128 = V_ADD_F32_e64 0, 0, 0, 0, 0, 0, implicit %exec +# GCN: %8.sub1:vreg_128 = V_ADD_F32_e32 %8.sub1, %8.sub1, implicit %exec # GCN: BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, # GCN: BUFFER_STORE_DWORD_OFFEN %9.sub2, %0, @@ -137,8 +137,8 @@ body: | # GCN-LABEL: name: inf_loop_tied_operand # GCN: bb.0: -# GCN-NEXT: undef %2.sub0 = V_MAC_F32_e32 1073741824, undef %0, undef %2.sub0, implicit %exec -# GCN-NEXT: dead undef %3.sub1 = COPY %2.sub0 +# GCN-NEXT: undef %2.sub0:vreg_128 = V_MAC_F32_e32 1073741824, undef %0:vgpr_32, undef %2.sub0, implicit %exec +# GCN-NEXT: dead undef %3.sub1:vreg_128 = COPY %2.sub0 name: inf_loop_tied_operand tracksRegLiveness: true diff --git a/test/CodeGen/AMDGPU/sdivrem64.ll b/test/CodeGen/AMDGPU/sdivrem64.ll index 5ad0d8efaed3f..d51eededd1c48 100644 --- a/test/CodeGen/AMDGPU/sdivrem64.ll +++ b/test/CodeGen/AMDGPU/sdivrem64.ll @@ -36,40 +36,13 @@ ;EG: BFE_UINT ;EG: BFE_UINT -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN: s_bfe_u32 -; GCN-NOT: v_mad_f32 -; SI-NOT: v_lshr_b64 -; VI-NOT: v_lshrrev_b64 -; GCN: s_endpgm +;GCN: v_mac_f32_e32 v{{[0-9]+}}, 0x4f800000, +;GCN: v_rcp_f32_e32 +;GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x5f7ffffc +;GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x2f800000 +;GCN: v_trunc_f32_e32 +;GCN: v_mac_f32_e32 v{{[0-9]+}}, 0xcf800000 +;GCN: s_endpgm define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = sdiv i64 %x, %y store i64 %result, i64 addrspace(1)* %out @@ -110,39 +83,12 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: BFE_UINT ;EG: AND_INT {{.*}}, 1, -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN-NOT: v_mad_f32 -;SI-NOT: v_lshr_b64 -;VI-NOT: v_lshrrev_b64 +;GCN: v_mac_f32_e32 v{{[0-9]+}}, 0x4f800000, +;GCN: v_rcp_f32_e32 +;GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x5f7ffffc +;GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x2f800000 +;GCN: v_trunc_f32_e32 +;GCN: v_mac_f32_e32 v{{[0-9]+}}, 0xcf800000 ;GCN: s_endpgm define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = urem i64 %x, %y diff --git a/test/CodeGen/AMDGPU/sdwa-gfx9.mir b/test/CodeGen/AMDGPU/sdwa-gfx9.mir index 90cb14bf50d30..2196e7e65c0cf 100644 --- a/test/CodeGen/AMDGPU/sdwa-gfx9.mir +++ b/test/CodeGen/AMDGPU/sdwa-gfx9.mir @@ -3,20 +3,20 @@ # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-peephole-sdwa -o - %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s # GCN-LABEL: {{^}}name: add_shr_i32 -# GCN: [[SMOV:%[0-9]+]] = S_MOV_B32 123 +# GCN: [[SMOV:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 123 -# CI: [[SHIFT:%[0-9]+]] = V_LSHRREV_B32_e64 16, %{{[0-9]+}}, implicit %exec -# CI: %{{[0-9]+}} = V_ADD_I32_e32 [[SMOV]], killed [[SHIFT]], implicit-def %vcc, implicit %exec +# CI: [[SHIFT:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %{{[0-9]+}}, implicit %exec +# CI: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_e32 [[SMOV]], killed [[SHIFT]], implicit-def %vcc, implicit %exec -# VI: [[VMOV:%[0-9]+]] = V_MOV_B32_e32 [[SMOV]], implicit %exec -# VI: %{{[0-9]+}} = V_ADD_I32_sdwa 0, [[VMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def %vcc, implicit %exec +# VI: [[VMOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[SMOV]], implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_sdwa 0, [[VMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def %vcc, implicit %exec -# GFX9: %{{[0-9]+}} = V_ADD_I32_sdwa 0, [[SMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def %vcc, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_sdwa 0, [[SMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def %vcc, implicit %exec --- name: add_shr_i32 tracksRegLiveness: true -registers: +registers: - { id: 0, class: vreg_64 } - { id: 1, class: vreg_64 } - { id: 2, class: sreg_64 } @@ -33,7 +33,7 @@ registers: body: | bb.0: liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 - + %2 = COPY %sgpr30_sgpr31 %1 = COPY %vgpr2_vgpr3 %0 = COPY %vgpr0_vgpr1 @@ -49,18 +49,18 @@ body: | # GCN-LABEL: {{^}}name: trunc_shr_f32 -# CI: [[SHIFT:%[0-9]+]] = V_LSHRREV_B32_e64 16, %{{[0-9]+}}, implicit %exec -# CI: %{{[0-9]+}} = V_TRUNC_F32_e64 0, killed [[SHIFT]], 1, 2, implicit-def %vcc, implicit %exec +# CI: [[SHIFT:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %{{[0-9]+}}, implicit %exec +# CI: %{{[0-9]+}}:vgpr_32 = V_TRUNC_F32_e64 0, killed [[SHIFT]], 1, 2, implicit-def %vcc, implicit %exec -# VI: [[SHIFT:%[0-9]+]] = V_LSHRREV_B32_e64 16, %{{[0-9]+}}, implicit %exec -# VI: %{{[0-9]+}} = V_TRUNC_F32_e64 0, killed [[SHIFT]], 1, 2, implicit-def %vcc, implicit %exec +# VI: [[SHIFT:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %{{[0-9]+}}, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_TRUNC_F32_e64 0, killed [[SHIFT]], 1, 2, implicit-def %vcc, implicit %exec -#GFX9: %{{[0-9]+}} = V_TRUNC_F32_sdwa 0, %{{[0-9]+}}, 1, 2, 6, 0, 5, implicit %exec +#GFX9: %{{[0-9]+}}:vgpr_32 = V_TRUNC_F32_sdwa 0, %{{[0-9]+}}, 1, 2, 6, 0, 5, implicit %exec --- name: trunc_shr_f32 tracksRegLiveness: true -registers: +registers: - { id: 0, class: vreg_64 } - { id: 1, class: vreg_64 } - { id: 2, class: sreg_64 } @@ -76,7 +76,7 @@ registers: body: | bb.0: liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 - + %2 = COPY %sgpr30_sgpr31 %1 = COPY %vgpr2_vgpr3 %0 = COPY %vgpr0_vgpr1 diff --git a/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir b/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir index ff1b2ad73ef0b..77c231c584a24 100644 --- a/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir +++ b/test/CodeGen/AMDGPU/sdwa-peephole-instr.mir @@ -3,35 +3,35 @@ # GFX89-LABEL: {{^}}name: vop1_instructions -# GFX89: %{{[0-9]+}} = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec -# GFX89: %{{[0-9]+}} = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec -# GFX89: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec -# GFX89: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec -# GFX89: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec -# GFX89: %{{[0-9]+}} = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit %exec -# GFX89: %{{[0-9]+}} = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec -# GFX89: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec -# GFX89: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec -# GFX89: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_sdwa 0, %{{[0-9]+}}, 0, 6, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 0, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, implicit %exec -# VI: %{{[0-9]+}} = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec -# VI: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec -# VI: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec -# VI: %{{[0-9]+}} = V_CVT_F32_I32_e64 %{{[0-9]+}}, 0, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_e64 %{{[0-9]+}}, 0, 1, implicit %exec -# GFX9: %{{[0-9]+}} = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec -# GFX9: %{{[0-9]+}} = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec -# GFX9: %{{[0-9]+}} = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec -# GFX9: %{{[0-9]+}} = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_FRACT_F32_sdwa 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_SIN_F32_sdwa 0, %{{[0-9]+}}, 1, 0, 5, 0, 5, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_CVT_U32_F32_sdwa 1, %{{[0-9]+}}, 0, 5, 0, 5, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_CVT_F32_I32_sdwa 0, %{{[0-9]+}}, 0, 1, 5, 0, 5, implicit %exec --- name: vop1_instructions tracksRegLiveness: true -registers: +registers: - { id: 0, class: vreg_64 } - { id: 1, class: vreg_64 } - { id: 2, class: sreg_64 } @@ -85,7 +85,7 @@ registers: body: | bb.0: liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 - + %2 = COPY %sgpr30_sgpr31 %1 = COPY %vgpr2_vgpr3 %0 = COPY %vgpr0_vgpr1 @@ -148,45 +148,45 @@ body: | # GCN-LABEL: {{^}}name: vop2_instructions -# VI: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec -# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec -# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec -# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec -# VI: %{{[0-9]+}} = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec -# GFX9: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec -# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec -# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec -# GFX9: %{{[0-9]+}} = V_MAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec -# GFX9: %{{[0-9]+}} = V_MAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e32 %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}, implicit %exec -# VI: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec -# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec -# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec -# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec -# VI: %{{[0-9]+}} = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 6, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, %{{[0-9]+}}, 0, 0, 6, 0, 5, 1, implicit %exec -# GFX9: %{{[0-9]+}} = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec -# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec -# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec -# GFX9: %{{[0-9]+}} = V_MAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec -# GFX9: %{{[0-9]+}} = V_MAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_AND_B32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 5, 0, 6, 5, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_e64 0, 23, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e64 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 0, implicit %exec -# VI: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec -# VI: %{{[0-9]+}} = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec -# VI: %{{[0-9]+}} = V_MAC_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, %{{[0-9]+}}, 1, 0, 6, 0, 6, 1, implicit %exec -# VI: %{{[0-9]+}} = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, %{{[0-9]+}}, 1, 0, 6, 0, 6, 1, implicit %exec +# VI: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec -# GFX9: %{{[0-9]+}} = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec -# GFX9: %{{[0-9]+}} = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec -# GFX9: %{{[0-9]+}} = V_MAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit %exec -# GFX9: %{{[0-9]+}} = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_F32_sdwa 0, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 5, 1, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_SUB_F16_sdwa 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 0, 5, 0, 6, 1, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F32_e64 1, 23, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, 0, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MAC_F16_e64 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 1, %{{[0-9]+}}, 0, 2, implicit %exec name: vop2_instructions tracksRegLiveness: true -registers: +registers: - { id: 0, class: vreg_64 } - { id: 1, class: vreg_64 } - { id: 2, class: sreg_64 } @@ -252,7 +252,7 @@ registers: body: | bb.0: liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 - + %2 = COPY %sgpr30_sgpr31 %1 = COPY %vgpr2_vgpr3 %0 = COPY %vgpr0_vgpr1 @@ -324,7 +324,7 @@ body: | # GCN-LABEL: {{^}}name: vopc_instructions -# GFX89: %{{[0-9]+}} = V_MOV_B32_e32 123, implicit %exec +# GFX89: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 123, implicit %exec # GFX89: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec # GFX89: %vcc = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec # GFX89: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec @@ -332,16 +332,16 @@ body: | # VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec -# VI: %{{[0-9]+}} = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, implicit-def %exec, implicit %exec +# VI: %{{[0-9]+}}:sreg_64 = V_CMPX_GT_F32_e64 0, 23, 0, killed %{{[0-9]+}}, 0, implicit-def %exec, implicit %exec # VI: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %3, 0, 6, 4, implicit-def %vcc, implicit %exec -# VI: %{{[0-9]+}} = V_CMPX_EQ_I32_e64 23, killed %{{[0-9]+}}, implicit-def %exec, implicit %exec +# VI: %{{[0-9]+}}:sreg_64 = V_CMPX_EQ_I32_e64 23, killed %{{[0-9]+}}, implicit-def %exec, implicit %exec # GFX9: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec -# GFX9: %{{[0-9]+}} = V_MOV_B32_e32 23, implicit %exec -# GFX9: %{{[0-9]+}} = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 23, implicit %exec +# GFX9: %{{[0-9]+}}:sreg_64 = V_CMPX_GT_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec # GFX9: %vcc = V_CMP_LT_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit %exec -# GFX9: %{{[0-9]+}} = V_MOV_B32_e32 23, implicit %exec -# GFX9: %{{[0-9]+}} = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec +# GFX9: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 23, implicit %exec +# GFX9: %{{[0-9]+}}:sreg_64 = V_CMPX_EQ_I32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 0, 6, 4, implicit-def %vcc, implicit-def %exec, implicit %exec # VI: %vcc = V_CMP_EQ_F32_sdwa 0, %{{[0-9]+}}, 0, %{{[0-9]+}}, 1, 6, 4, implicit-def %vcc, implicit %exec @@ -364,7 +364,7 @@ body: | name: vopc_instructions tracksRegLiveness: true -registers: +registers: - { id: 0, class: vreg_64 } - { id: 1, class: vreg_64 } - { id: 2, class: sreg_64 } @@ -397,7 +397,7 @@ registers: body: | bb.0: liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 - + %2 = COPY %sgpr30_sgpr31 %1 = COPY %vgpr2_vgpr3 %0 = COPY %vgpr0_vgpr1 diff --git a/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir b/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir index bd222adf6a68c..c50601e79f2e1 100644 --- a/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir +++ b/test/CodeGen/AMDGPU/sdwa-vop2-64bit.mir @@ -6,15 +6,15 @@ # GCN-LABEL: {{^}}name: vop2_64bit -# GCN: %{{[0-9]+}} = V_BCNT_U32_B32_e64 %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec -# GCN: %{{[0-9]+}} = V_BFM_B32_e64 %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec -# GCN: %{{[0-9]+}} = V_CVT_PKNORM_I16_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 0, implicit-def %vcc, implicit %exec -# GCN: %{{[0-9]+}} = V_READLANE_B32 killed %{{[0-9]+}}, 0, implicit-def %vcc, implicit %exec +# GCN: %{{[0-9]+}}:vgpr_32 = V_BCNT_U32_B32_e64 %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec +# GCN: %{{[0-9]+}}:vgpr_32 = V_BFM_B32_e64 %{{[0-9]+}}, killed %{{[0-9]+}}, implicit-def %vcc, implicit %exec +# GCN: %{{[0-9]+}}:vgpr_32 = V_CVT_PKNORM_I16_F32_e64 0, %{{[0-9]+}}, 0, killed %{{[0-9]+}}, 0, implicit-def %vcc, implicit %exec +# GCN: %{{[0-9]+}}:sgpr_32 = V_READLANE_B32 killed %{{[0-9]+}}, 0, implicit-def %vcc, implicit %exec --- name: vop2_64bit tracksRegLiveness: true -registers: +registers: - { id: 0, class: vreg_64 } - { id: 1, class: vreg_64 } - { id: 2, class: sreg_64 } @@ -37,7 +37,7 @@ registers: body: | bb.0: liveins: %vgpr0_vgpr1, %vgpr2_vgpr3, %sgpr30_sgpr31 - + %2 = COPY %sgpr30_sgpr31 %1 = COPY %vgpr2_vgpr3 %0 = COPY %vgpr0_vgpr1 diff --git a/test/CodeGen/AMDGPU/setcc.ll b/test/CodeGen/AMDGPU/setcc.ll index a3bf167e756af..122f2432eac7f 100644 --- a/test/CodeGen/AMDGPU/setcc.ll +++ b/test/CodeGen/AMDGPU/setcc.ll @@ -416,4 +416,56 @@ bb2: ret void } +; FUNC-LABEL: setcc_v2i32_expand +; GCN: v_cmp_gt_i32 +; GCN: v_cmp_gt_i32 +define amdgpu_kernel void @setcc_v2i32_expand( + <2 x i32> addrspace(1)* %a, + <2 x i32> addrspace(1)* %b, + <2 x i32> addrspace(1)* %c, + <2 x float> addrspace(1)* %r) { +entry: + %a.val = load <2 x i32>, <2 x i32> addrspace(1)* %a + %b.val = load <2 x i32>, <2 x i32> addrspace(1)* %b + %c.val = load <2 x i32>, <2 x i32> addrspace(1)* %c + + %icmp.val.1 = icmp sgt <2 x i32> %a.val, + %zext.val.1 = zext <2 x i1> %icmp.val.1 to <2 x i32> + %shl.val.1 = shl nuw <2 x i32> %zext.val.1, + %xor.val.1 = xor <2 x i32> %shl.val.1, %b.val + %bitcast.val.1 = bitcast <2 x i32> %xor.val.1 to <2 x float> + %icmp.val.2 = icmp sgt <2 x i32> %c.val, + %select.val.1 = select <2 x i1> %icmp.val.2, <2 x float> , <2 x float> %bitcast.val.1 + + store <2 x float> %select.val.1, <2 x float> addrspace(1)* %r + ret void +} + +; FUNC-LABEL: setcc_v4i32_expand +; GCN: v_cmp_gt_i32 +; GCN: v_cmp_gt_i32 +; GCN: v_cmp_gt_i32 +; GCN: v_cmp_gt_i32 +define amdgpu_kernel void @setcc_v4i32_expand( + <4 x i32> addrspace(1)* %a, + <4 x i32> addrspace(1)* %b, + <4 x i32> addrspace(1)* %c, + <4 x float> addrspace(1)* %r) { +entry: + %a.val = load <4 x i32>, <4 x i32> addrspace(1)* %a + %b.val = load <4 x i32>, <4 x i32> addrspace(1)* %b + %c.val = load <4 x i32>, <4 x i32> addrspace(1)* %c + + %icmp.val.1 = icmp sgt <4 x i32> %a.val, + %zext.val.1 = zext <4 x i1> %icmp.val.1 to <4 x i32> + %shl.val.1 = shl nuw <4 x i32> %zext.val.1, + %xor.val.1 = xor <4 x i32> %shl.val.1, %b.val + %bitcast.val.1 = bitcast <4 x i32> %xor.val.1 to <4 x float> + %icmp.val.2 = icmp sgt <4 x i32> %c.val, + %select.val.1 = select <4 x i1> %icmp.val.2, <4 x float> , <4 x float> %bitcast.val.1 + + store <4 x float> %select.val.1, <4 x float> addrspace(1)* %r + ret void +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/shrink-carry.mir b/test/CodeGen/AMDGPU/shrink-carry.mir index d5d6223cc0614..cf000ffa7747b 100644 --- a/test/CodeGen/AMDGPU/shrink-carry.mir +++ b/test/CodeGen/AMDGPU/shrink-carry.mir @@ -10,9 +10,9 @@ registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } body: | bb.0: @@ -34,9 +34,9 @@ registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } body: | bb.0: @@ -58,9 +58,9 @@ registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } body: | bb.0: @@ -82,9 +82,9 @@ registers: - { id: 0, class: vgpr_32 } - { id: 1, class: vgpr_32 } - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_64 } + - { id: 3, class: sreg_64_xexec } - { id: 4, class: vgpr_32 } - - { id: 5, class: sreg_64 } + - { id: 5, class: sreg_64_xexec } body: | bb.0: diff --git a/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir b/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir index 767118eb8d118..0ffee0c4fcf4c 100644 --- a/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir +++ b/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir @@ -8,8 +8,8 @@ ... # GCN-LABEL: name: shrink_add_vop3{{$}} -# GCN: %29, %9 = V_ADD_I32_e64 %19, %17, implicit %exec -# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec +# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %19, %17, implicit %exec +# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec name: shrink_add_vop3 alignment: 0 exposesReturnsTwice: false @@ -27,7 +27,7 @@ registers: - { id: 6, class: sreg_32 } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32_xm0 } - - { id: 9, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } - { id: 10, class: sreg_32_xm0 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sgpr_64 } @@ -91,8 +91,8 @@ body: | ... --- # GCN-LABEL: name: shrink_sub_vop3{{$}} -# GCN: %29, %9 = V_SUB_I32_e64 %19, %17, implicit %exec -# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec +# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUB_I32_e64 %19, %17, implicit %exec +# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec name: shrink_sub_vop3 alignment: 0 @@ -111,7 +111,7 @@ registers: - { id: 6, class: sreg_32 } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32_xm0 } - - { id: 9, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } - { id: 10, class: sreg_32_xm0 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sgpr_64 } @@ -175,8 +175,8 @@ body: | ... --- # GCN-LABEL: name: shrink_subrev_vop3{{$}} -# GCN: %29, %9 = V_SUBREV_I32_e64 %19, %17, implicit %exec -# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec +# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUBREV_I32_e64 %19, %17, implicit %exec +# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit %exec name: shrink_subrev_vop3 alignment: 0 @@ -195,7 +195,7 @@ registers: - { id: 6, class: sreg_32 } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32_xm0 } - - { id: 9, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } - { id: 10, class: sreg_32_xm0 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sgpr_64 } @@ -259,8 +259,8 @@ body: | ... --- # GCN-LABEL: name: check_addc_src2_vop3{{$}} -# GCN: %29, %vcc = V_ADDC_U32_e64 %19, %17, %9, implicit %exec -# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec +# GCN: %29:vgpr_32, %vcc = V_ADDC_U32_e64 %19, %17, %9, implicit %exec +# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec name: check_addc_src2_vop3 alignment: 0 exposesReturnsTwice: false @@ -278,7 +278,7 @@ registers: - { id: 6, class: sreg_32 } - { id: 7, class: sreg_32 } - { id: 8, class: sreg_32_xm0 } - - { id: 9, class: sreg_64 } + - { id: 9, class: sreg_64_xexec } - { id: 10, class: sreg_32_xm0 } - { id: 11, class: sreg_32_xm0 } - { id: 12, class: sgpr_64 } @@ -343,7 +343,7 @@ body: | ... --- # GCN-LABEL: name: shrink_addc_vop3{{$}} -# GCN: %29 = V_ADDC_U32_e32 %19, %17, implicit-def %vcc, implicit %vcc, implicit %exec +# GCN: %29:vgpr_32 = V_ADDC_U32_e32 %19, %17, implicit-def %vcc, implicit %vcc, implicit %exec # GCN %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec name: shrink_addc_vop3 @@ -429,8 +429,8 @@ body: | --- # GCN-LABEL: name: shrink_addc_undef_vcc{{$}} -# GCN: %29 = V_ADDC_U32_e32 %19, %17, implicit-def %vcc, implicit undef %vcc, implicit %exec -# GCN: %24 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec +# GCN: %29:vgpr_32 = V_ADDC_U32_e32 %19, %17, implicit-def %vcc, implicit undef %vcc, implicit %exec +# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %vcc, implicit %exec name: shrink_addc_undef_vcc alignment: 0 exposesReturnsTwice: false diff --git a/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll b/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll index 580268deb85d1..7ae4636a0b5df 100644 --- a/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll +++ b/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll @@ -3,7 +3,7 @@ ; register operands in the correct order when modifying the opcode of an ; instruction to V_ADD_I32_e32. -; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def %vcc, implicit %exec +; CHECK: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def %vcc, implicit %exec define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: diff --git a/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll new file mode 100644 index 0000000000000..f8077cd8e3ab0 --- /dev/null +++ b/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll @@ -0,0 +1,95 @@ +; RUN: llc -march=amdgcn < %s | FileCheck %s + +; Check we can compile this bugpoint-reduced test without an +; infinite loop in TLI.SimplifyDemandedBits() due to failure +; to use return value of TLO.DAG.UpdateNodeOperands() + +; Check that code was generated; we know there will be +; a s_endpgm, so check for it. + +@0 = external unnamed_addr addrspace(3) global [462 x float], align 4 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.y() #0 + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #0 + +; Function Attrs: nounwind readnone speculatable +declare float @llvm.fmuladd.f32(float, float, float) #0 + +; CHECK: s_endpgm +define amdgpu_kernel void @foo(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1, float addrspace(1)* noalias nocapture %arg2, float %arg3) local_unnamed_addr !reqd_work_group_size !0 { +bb: + %tmp = tail call i32 @llvm.amdgcn.workitem.id.y() + %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x() + %tmp5 = and i32 %tmp, 15 + %tmp6 = mul nuw nsw i32 %tmp5, 21 + %tmp7 = sub i32 %tmp6, 0 + %tmp8 = add i32 %tmp7, 0 + %tmp9 = add i32 %tmp8, 0 + %tmp10 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 0 + br label %bb12 + +bb11: ; preds = %bb30 + br i1 undef, label %bb37, label %bb38 + +bb12: ; preds = %bb30, %bb + br i1 false, label %.preheader, label %.loopexit145 + +.loopexit145: ; preds = %.preheader, %bb12 + br label %bb13 + +bb13: ; preds = %.loopexit, %.loopexit145 + %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ] + %tmp15 = add nsw i32 %tmp14, -3 + %tmp16 = mul i32 %tmp14, 21 + br i1 undef, label %bb17, label %.loopexit + +bb17: ; preds = %bb13 + %tmp18 = mul i32 %tmp15, 224 + %tmp19 = add i32 undef, %tmp18 + br label %bb21 + +.loopexit: ; preds = %bb21, %bb13 + %tmp20 = add nuw nsw i32 %tmp14, 16 + br i1 undef, label %bb13, label %bb26 + +bb21: ; preds = %bb21, %bb17 + %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ] + %tmp23 = add i32 %tmp22, %tmp16 + %tmp24 = getelementptr inbounds float, float addrspace(3)* %tmp10, i32 %tmp23 + store float undef, float addrspace(3)* %tmp24, align 4 + %tmp25 = add nuw i32 %tmp22, 8 + br i1 undef, label %bb21, label %.loopexit + +bb26: ; preds = %.loopexit + br label %bb31 + +.preheader: ; preds = %.preheader, %bb12 + %tmp27 = phi i32 [ %tmp28, %.preheader ], [ undef, %bb12 ] + %tmp28 = add nuw i32 %tmp27, 128 + %tmp29 = icmp ult i32 %tmp28, 1568 + br i1 %tmp29, label %.preheader, label %.loopexit145 + +bb30: ; preds = %bb31 + br i1 undef, label %bb11, label %bb12 + +bb31: ; preds = %bb31, %bb26 + %tmp32 = phi i32 [ %tmp9, %bb26 ], [ undef, %bb31 ] + %tmp33 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 %tmp32 + %tmp34 = load float, float addrspace(3)* %tmp33, align 4 + %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float undef, float undef) + %tmp36 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %tmp35) + br i1 undef, label %bb30, label %bb31 + +bb37: ; preds = %bb11 + br label %bb38 + +bb38: ; preds = %bb37, %bb11 + ret void +} + +attributes #0 = { nounwind readnone speculatable } + +!0 = !{i32 8, i32 16, i32 1} diff --git a/test/CodeGen/AMDGPU/spill-empty-live-interval.mir b/test/CodeGen/AMDGPU/spill-empty-live-interval.mir index 6c2e8093c0259..aceac34f286ac 100644 --- a/test/CodeGen/AMDGPU/spill-empty-live-interval.mir +++ b/test/CodeGen/AMDGPU/spill-empty-live-interval.mir @@ -7,13 +7,13 @@ # CHECK-LABEL: name: expecting_non_empty_interval -# CHECK: undef %7.sub1 = V_MAC_F32_e32 0, undef %1, undef %7.sub1, implicit %exec +# CHECK: undef %7.sub1:vreg_64 = V_MAC_F32_e32 0, undef %1:vgpr_32, undef %7.sub1, implicit %exec # CHECK-NEXT: SI_SPILL_V64_SAVE %7, %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (store 8 into %stack.0, align 4) -# CHECK-NEXT: undef %5.sub1 = V_MOV_B32_e32 1786773504, implicit %exec -# CHECK-NEXT: dead %2 = V_MUL_F32_e32 0, %5.sub1, implicit %exec +# CHECK-NEXT: undef %5.sub1:vreg_64 = V_MOV_B32_e32 1786773504, implicit %exec +# CHECK-NEXT: dead %2:vgpr_32 = V_MUL_F32_e32 0, %5.sub1, implicit %exec # CHECK: S_NOP 0, implicit %6.sub1 -# CHECK-NEXT: %8 = SI_SPILL_V64_RESTORE %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (load 8 from %stack.0, align 4) +# CHECK-NEXT: %8:vreg_64 = SI_SPILL_V64_RESTORE %stack.0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr5, 0, implicit %exec :: (load 8 from %stack.0, align 4) # CHECK-NEXT: S_NOP 0, implicit %8.sub1 # CHECK-NEXT: S_NOP 0, implicit undef %9.sub0 @@ -44,12 +44,12 @@ body: | # CHECK-LABEL: name: rematerialize_empty_interval_has_reference # CHECK-NOT: MOV -# CHECK: undef %3.sub2 = V_MOV_B32_e32 1786773504, implicit %exec +# CHECK: undef %3.sub2:vreg_128 = V_MOV_B32_e32 1786773504, implicit %exec # CHECK: bb.1: # CHECK-NEXT: S_NOP 0, implicit %3.sub2 # CHECK-NEXT: S_NOP 0, implicit undef %6.sub0 -# CHECK-NEXT: undef %4.sub2 = V_MOV_B32_e32 0, implicit %exec +# CHECK-NEXT: undef %4.sub2:vreg_128 = V_MOV_B32_e32 0, implicit %exec # CHECK-NEXT: S_NOP 0, implicit %4.sub2 name: rematerialize_empty_interval_has_reference tracksRegLiveness: true diff --git a/test/CodeGen/AMDGPU/twoaddr-mad.mir b/test/CodeGen/AMDGPU/twoaddr-mad.mir index b4e42e8337483..707676d848976 100644 --- a/test/CodeGen/AMDGPU/twoaddr-mad.mir +++ b/test/CodeGen/AMDGPU/twoaddr-mad.mir @@ -112,8 +112,8 @@ body: | # Make sure constant bus restriction isn't violated if src0 is an SGPR. # GCN-LABEL: name: test_madak_sgpr_src0_f32 -# GCN: %1 = V_MOV_B32_e32 1078523331, implicit %exec -# GCN: %2 = V_MAD_F32 0, killed %0, 0, %1, 0, %3, 0, 0, implicit %exec +# GCN: %1:vgpr_32 = V_MOV_B32_e32 1078523331, implicit %exec +# GCN: %2:vgpr_32 = V_MAD_F32 0, killed %0, 0, %1, 0, %3:vgpr_32, 0, 0, implicit %exec --- name: test_madak_sgpr_src0_f32 @@ -134,7 +134,7 @@ body: | # This can still fold if this is an inline immediate. # GCN-LABEL: name: test_madak_inlineimm_src0_f32 -# GCN: %1 = V_MADMK_F32 1073741824, 1078523331, %2, implicit %exec +# GCN: %1:vgpr_32 = V_MADMK_F32 1073741824, 1078523331, %2:vgpr_32, implicit %exec --- name: test_madak_inlineimm_src0_f32 @@ -152,7 +152,7 @@ body: | # Non-inline immediate uses constant bus already. # GCN-LABEL: name: test_madak_otherimm_src0_f32 -# GCN: %1 = V_MAC_F32_e32 1120403456, %0, %1, implicit %exec +# GCN: %1:vgpr_32 = V_MAC_F32_e32 1120403456, %0, %1, implicit %exec --- name: test_madak_otherimm_src0_f32 @@ -170,7 +170,7 @@ body: | # Non-inline immediate uses constant bus already. # GCN-LABEL: name: test_madak_other_constantlike_src0_f32 -# GCN: %1 = V_MAC_F32_e32 %stack.0, %0, %1, implicit %exec +# GCN: %1:vgpr_32 = V_MAC_F32_e32 %stack.0, %0, %1, implicit %exec --- name: test_madak_other_constantlike_src0_f32 registers: diff --git a/test/CodeGen/AMDGPU/udivrem64.ll b/test/CodeGen/AMDGPU/udivrem64.ll index bd297920d5634..91c27b09b5fa5 100644 --- a/test/CodeGen/AMDGPU/udivrem64.ll +++ b/test/CodeGen/AMDGPU/udivrem64.ll @@ -36,39 +36,12 @@ ;EG: BFE_UINT ;EG: BFE_UINT -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN-NOT: v_mad_f32 -;SI-NOT: v_lshr_b64 -;VI-NOT: v_lshrrev_b64 +;GCN: v_mac_f32_e32 v{{[0-9]+}}, 0x4f800000, +;GCN: v_rcp_f32_e32 +;GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x5f7ffffc +;GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x2f800000 +;GCN: v_trunc_f32_e32 +;GCN: v_mac_f32_e32 v{{[0-9]+}}, 0xcf800000 ;GCN: s_endpgm define amdgpu_kernel void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = udiv i64 %x, %y @@ -110,39 +83,12 @@ define amdgpu_kernel void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: BFE_UINT ;EG: AND_INT {{.*}}, 1, -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN: s_bfe_u32 -;GCN-NOT: v_mad_f32 -;SI-NOT: v_lshr_b64 -;VI-NOT: v_lshrrev_b64 +;GCN: v_mac_f32_e32 v{{[0-9]+}}, 0x4f800000, +;GCN: v_rcp_f32_e32 +;GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x5f7ffffc +;GCN: v_mul_f32_e32 v{{[0-9]+}}, 0x2f800000 +;GCN: v_trunc_f32_e32 +;GCN: v_mac_f32_e32 v{{[0-9]+}}, 0xcf800000 ;GCN: s_endpgm define amdgpu_kernel void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) { %result = urem i64 %x, %y diff --git a/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir b/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir index f8a2339626cf1..1c34789ed6006 100644 --- a/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir +++ b/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir @@ -33,9 +33,9 @@ ... # GCN-LABEL: name: fold_fi_vgpr{{$}} -# GCN: %1 = IMPLICIT_DEF +# GCN: %1:vgpr_32 = IMPLICIT_DEF -# GCN: %2 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def %vcc, implicit %exec +# GCN: %2:vgpr_32 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def %vcc, implicit %exec name: fold_fi_vgpr tracksRegLiveness: true registers: @@ -55,8 +55,8 @@ body: | ... # GCN-LABEL: name: fold_vgpr_fi{{$}} -# GCN: %1 = IMPLICIT_DEF -# GCN: %2 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def %vcc, implicit %exec +# GCN: %1:vgpr_32 = IMPLICIT_DEF +# GCN: %2:vgpr_32 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def %vcc, implicit %exec name: fold_vgpr_fi tracksRegLiveness: true registers: @@ -76,9 +76,9 @@ body: | ... # GCN-LABEL: name: fold_sgpr_fi{{$}} -# GCN: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec -# GCN: %1 = IMPLICIT_DEF -# GCN: %2 = V_ADD_I32_e32 %1, %0, implicit-def %vcc, implicit %exec +# GCN: %0:vgpr_32 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec +# GCN: %1:sgpr_32 = IMPLICIT_DEF +# GCN: %2:vgpr_32 = V_ADD_I32_e32 %1, %0, implicit-def %vcc, implicit %exec name: fold_sgpr_fi tracksRegLiveness: true registers: @@ -98,9 +98,9 @@ body: | ... # GCN-LABEL: name: fold_fi_sgpr{{$}} -# GCN: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec -# GCN: %1 = IMPLICIT_DEF -# GCN: %2 = V_ADD_I32_e32 %1, %0, implicit-def %vcc, implicit %exec +# GCN: %0:vgpr_32 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec +# GCN: %1:sgpr_32 = IMPLICIT_DEF +# GCN: %2:vgpr_32 = V_ADD_I32_e32 %1, %0, implicit-def %vcc, implicit %exec name: fold_fi_sgpr tracksRegLiveness: true registers: @@ -120,8 +120,8 @@ body: | ... # TODO: Should probably prefer folding immediate first # GCN-LABEL: name: fold_fi_imm{{$}} -# GCN: %1 = V_MOV_B32_e32 999, implicit %exec -# GCN: %2 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def %vcc, implicit %exec +# GCN: %1:vgpr_32 = V_MOV_B32_e32 999, implicit %exec +# GCN: %2:vgpr_32 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def %vcc, implicit %exec name: fold_fi_imm tracksRegLiveness: true registers: @@ -141,8 +141,8 @@ body: | ... # GCN-LABEL: name: fold_imm_fi{{$}} -# GCN: %0 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec -# GCN: %2 = V_ADD_I32_e32 999, %0, implicit-def %vcc, implicit %exec +# GCN: %0:vgpr_32 = V_MOV_B32_e32 %stack.0.alloca, implicit %exec +# GCN: %2:vgpr_32 = V_ADD_I32_e32 999, %0, implicit-def %vcc, implicit %exec name: fold_imm_fi tracksRegLiveness: true registers: diff --git a/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir b/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir index b4c0c93347c20..a190324cdc25f 100644 --- a/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir +++ b/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir @@ -1,8 +1,8 @@ # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-shrink-instructions -o - %s | FileCheck -check-prefix=GCN %s ... # GCN-LABEL: name: fold_imm_non_ssa{{$}} -# GCN: %0 = V_MOV_B32_e32 123, implicit %exec -# GCN: %2 = V_ADD_I32_e32 456, %0, implicit-def %vcc, implicit %exec +# GCN: %0:vgpr_32 = V_MOV_B32_e32 123, implicit %exec +# GCN: %2:vgpr_32 = V_ADD_I32_e32 456, %0, implicit-def %vcc, implicit %exec name: fold_imm_non_ssa tracksRegLiveness: true @@ -21,8 +21,8 @@ body: | ... # GCN-LABEL: name: fold_partially_defined_superreg{{$}} -# GCN: %1 = V_MOV_B32_e32 456, implicit %exec -# GCN: %2 = V_ADD_I32_e32 123, %1, implicit-def %vcc, implicit %exec +# GCN: %1:vgpr_32 = V_MOV_B32_e32 456, implicit %exec +# GCN: %2:vgpr_32 = V_ADD_I32_e32 123, %1, implicit-def %vcc, implicit %exec name: fold_partially_defined_superreg tracksRegLiveness: true registers: diff --git a/test/CodeGen/AMDGPU/zext-lid.ll b/test/CodeGen/AMDGPU/zext-lid.ll index 066f29277270a..9a9c1fe755007 100644 --- a/test/CodeGen/AMDGPU/zext-lid.ll +++ b/test/CodeGen/AMDGPU/zext-lid.ll @@ -63,6 +63,26 @@ bb: ret void } +; OPT-LABEL: @func_test_workitem_id_x_known_max_range( +; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 +define void @func_test_workitem_id_x_known_max_range(i32 addrspace(1)* nocapture %out) #0 { +entry: + %id = tail call i32 @llvm.amdgcn.workitem.id.x() + %and = and i32 %id, 1023 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; OPT-LABEL: @func_test_workitem_id_x_default_range( +; OPT: tail call i32 @llvm.amdgcn.workitem.id.x(), !range !6 +define void @func_test_workitem_id_x_default_range(i32 addrspace(1)* nocapture %out) #4 { +entry: + %id = tail call i32 @llvm.amdgcn.workitem.id.x() + %and = and i32 %id, 1023 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #2 declare i32 @llvm.amdgcn.workitem.id.y() #2 @@ -73,6 +93,7 @@ attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" } attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" } attributes #2 = { nounwind readnone speculatable } attributes #3 = { nounwind readnone } +attributes #4 = { nounwind } !0 = !{i32 32, i32 4, i32 1} @@ -82,3 +103,4 @@ attributes #3 = { nounwind readnone } ; OPT: !3 = !{i32 0, i32 4} ; OPT: !4 = !{i32 0, i32 1} ; OPT: !5 = !{i32 0, i32 512} +; OPT: !6 = !{i32 0, i32 1024} diff --git a/test/CodeGen/ARM/ARMLoadStoreDBG.mir b/test/CodeGen/ARM/ARMLoadStoreDBG.mir index cf5388ac1ccb9..1ff3bffd38772 100644 --- a/test/CodeGen/ARM/ARMLoadStoreDBG.mir +++ b/test/CodeGen/ARM/ARMLoadStoreDBG.mir @@ -114,8 +114,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false stack: - - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '%lr' } - - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '%r7' } + - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '%lr', callee-saved-restored: false } + - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '%r7', callee-saved-restored: true } body: | bb.0.entry: liveins: %r0, %r1, %r2, %r3, %lr, %r7 diff --git a/test/CodeGen/ARM/GlobalISel/arm-call-lowering.ll b/test/CodeGen/ARM/GlobalISel/arm-call-lowering.ll new file mode 100644 index 0000000000000..c1dd9276ddd84 --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-call-lowering.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple arm-unknown -mattr=-v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK,NOV4T +; RUN: llc -mtriple arm-unknown -mattr=+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK,V4T +; RUN: llc -mtriple arm-unknown -mattr=+v5t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=CHECK,V5T + +define arm_aapcscc void @test_indirect_call(void() *%fptr) { +; CHECK-LABEL: name: test_indirect_call +; V5T: %[[FPTR:[0-9]+]]:gpr(p0) = COPY %r0 +; V4T: %[[FPTR:[0-9]+]]:tgpr(p0) = COPY %r0 +; NOV4T: %[[FPTR:[0-9]+]]:tgpr(p0) = COPY %r0 +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; V5T: BLX %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp +; V4T: BX_CALL %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp +; NOV4T: BMOVPCRX_CALL %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +entry: + notail call arm_aapcscc void %fptr() + ret void +} + +declare arm_aapcscc void @call_target() + +define arm_aapcscc void @test_direct_call() { +; CHECK-LABEL: name: test_direct_call +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: BL @call_target, csr_aapcs, implicit-def %lr, implicit %sp +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +entry: + notail call arm_aapcscc void @call_target() + ret void +} diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir index 6c8bc7123a1ab..e2b6f878e6bf2 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-cmp.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple arm-- -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | define void @test_icmp_eq_s32() { ret void } @@ -53,11 +54,9 @@ ... --- name: test_icmp_eq_s32 -# CHECK-LABEL: name: test_icmp_eq_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -67,33 +66,27 @@ body: | bb.0: liveins: %r0, %r1 + ; CHECK-LABEL: name: test_icmp_eq_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 0, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s1) = G_ICMP intpred(eq), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 0, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_icmp_ne_s32 -# CHECK-LABEL: name: test_icmp_ne_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -103,33 +96,27 @@ body: | bb.0: liveins: %r0, %r1 + ; CHECK-LABEL: name: test_icmp_ne_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 1, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s1) = G_ICMP intpred(ne), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 1, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_icmp_ugt_s32 -# CHECK-LABEL: name: test_icmp_ugt_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -139,33 +126,27 @@ body: | bb.0: liveins: %r0, %r1 + ; CHECK-LABEL: name: test_icmp_ugt_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 8, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s1) = G_ICMP intpred(ugt), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 8, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_icmp_uge_s32 -# CHECK-LABEL: name: test_icmp_uge_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -175,33 +156,27 @@ body: | bb.0: liveins: %r0, %r1 + ; CHECK-LABEL: name: test_icmp_uge_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 2, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s1) = G_ICMP intpred(uge), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 2, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_icmp_ult_s32 -# CHECK-LABEL: name: test_icmp_ult_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -211,33 +186,27 @@ body: | bb.0: liveins: %r0, %r1 + ; CHECK-LABEL: name: test_icmp_ult_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 3, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s1) = G_ICMP intpred(ult), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 3, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_icmp_ule_s32 -# CHECK-LABEL: name: test_icmp_ule_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -247,33 +216,27 @@ body: | bb.0: liveins: %r0, %r1 + ; CHECK-LABEL: name: test_icmp_ule_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 9, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s1) = G_ICMP intpred(ule), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 9, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_icmp_sgt_s32 -# CHECK-LABEL: name: test_icmp_sgt_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -283,33 +246,27 @@ body: | bb.0: liveins: %r0, %r1 + ; CHECK-LABEL: name: test_icmp_sgt_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 12, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s1) = G_ICMP intpred(sgt), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 12, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_icmp_sge_s32 -# CHECK-LABEL: name: test_icmp_sge_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -319,33 +276,27 @@ body: | bb.0: liveins: %r0, %r1 + ; CHECK-LABEL: name: test_icmp_sge_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 10, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s1) = G_ICMP intpred(sge), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 10, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_icmp_slt_s32 -# CHECK-LABEL: name: test_icmp_slt_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -355,33 +306,27 @@ body: | bb.0: liveins: %r0, %r1 + ; CHECK-LABEL: name: test_icmp_slt_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 11, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s1) = G_ICMP intpred(slt), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 11, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_icmp_sle_s32 -# CHECK-LABEL: name: test_icmp_sle_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: gprb } - { id: 1, class: gprb } @@ -391,33 +336,27 @@ body: | bb.0: liveins: %r0, %r1 + ; CHECK-LABEL: name: test_icmp_sle_s32 + ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: CMPrr [[COPY]], [[COPY1]], 14, _, implicit-def %cpsr + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 13, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - %2(s1) = G_ICMP intpred(sle), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: CMPrr [[VREGX]], [[VREGY]], 14, _, implicit-def %cpsr - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 13, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_true_s32 -# CHECK-LABEL: name: test_fcmp_true_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -427,28 +366,23 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_true_s32 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 - %2(s1) = G_FCMP floatpred(true), %0(s32), %1 - ; CHECK: [[RES:%[0-9]+]] = MOVi 1, 14, _, _ - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_false_s32 -# CHECK-LABEL: name: test_fcmp_false_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -458,28 +392,23 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_false_s32 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 %1(s32) = COPY %s1 - %2(s1) = G_FCMP floatpred(false), %0(s32), %1 - ; CHECK: [[RES:%[0-9]+]] = MOVi 0, 14, _, _ - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_oeq_s32 -# CHECK-LABEL: name: test_fcmp_oeq_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -489,34 +418,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_oeq_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 0, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(oeq), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 0, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ogt_s32 -# CHECK-LABEL: name: test_fcmp_ogt_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -526,34 +449,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_ogt_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 12, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(ogt), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 12, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_oge_s32 -# CHECK-LABEL: name: test_fcmp_oge_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -563,34 +480,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_oge_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 10, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(oge), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 10, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_olt_s32 -# CHECK-LABEL: name: test_fcmp_olt_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -600,34 +511,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_olt_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 4, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(olt), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 4, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ole_s32 -# CHECK-LABEL: name: test_fcmp_ole_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -637,34 +542,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_ole_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 9, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(ole), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 9, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ord_s32 -# CHECK-LABEL: name: test_fcmp_ord_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -674,34 +573,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_ord_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 7, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(ord), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 7, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ugt_s32 -# CHECK-LABEL: name: test_fcmp_ugt_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -711,34 +604,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_ugt_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 8, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(ugt), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 8, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_uge_s32 -# CHECK-LABEL: name: test_fcmp_uge_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -748,34 +635,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_uge_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 5, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(uge), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 5, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ult_s32 -# CHECK-LABEL: name: test_fcmp_ult_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -785,34 +666,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_ult_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 11, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(ult), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 11, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ule_s32 -# CHECK-LABEL: name: test_fcmp_ule_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -822,34 +697,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_ule_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 13, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(ule), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 13, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_une_s32 -# CHECK-LABEL: name: test_fcmp_une_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -859,34 +728,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_une_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 1, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(une), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 1, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_uno_s32 -# CHECK-LABEL: name: test_fcmp_uno_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -896,34 +759,28 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_uno_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 6, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(uno), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 6, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_one_s32 -# CHECK-LABEL: name: test_fcmp_one_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -933,37 +790,31 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_one_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 12, %cpsr + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi1:%[0-9]+]]:gpr = MOVCCi [[MOVCCi]], 1, 4, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(one), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES1:%[0-9]+]] = MOVCCi [[ZERO]], 1, 12, %cpsr - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[RES1]], 1, 4, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ueq_s32 -# CHECK-LABEL: name: test_fcmp_ueq_s32 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -973,37 +824,31 @@ body: | bb.0: liveins: %s0, %s1 + ; CHECK-LABEL: name: test_fcmp_ueq_s32 + ; CHECK: [[COPY:%[0-9]+]]:spr = COPY %s0 + ; CHECK: [[COPY1:%[0-9]+]]:spr = COPY %s1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 0, %cpsr + ; CHECK: VCMPS [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi1:%[0-9]+]]:gpr = MOVCCi [[MOVCCi]], 1, 6, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 - %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 - %2(s1) = G_FCMP floatpred(ueq), %0(s32), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES1:%[0-9]+]] = MOVCCi [[ZERO]], 1, 0, %cpsr - ; CHECK-NEXT: VCMPS [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[RES1]], 1, 6, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_true_s64 -# CHECK-LABEL: name: test_fcmp_true_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1013,28 +858,23 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_true_s64 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 1, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 - %2(s1) = G_FCMP floatpred(true), %0(s64), %1 - ; CHECK: [[RES:%[0-9]+]] = MOVi 1, 14, _, _ - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_false_s64 -# CHECK-LABEL: name: test_fcmp_false_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1044,28 +884,23 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_false_s64 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 %1(s64) = COPY %d1 - %2(s1) = G_FCMP floatpred(false), %0(s64), %1 - ; CHECK: [[RES:%[0-9]+]] = MOVi 0, 14, _, _ - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_oeq_s64 -# CHECK-LABEL: name: test_fcmp_oeq_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1075,34 +910,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_oeq_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 0, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(oeq), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 0, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ogt_s64 -# CHECK-LABEL: name: test_fcmp_ogt_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1112,34 +941,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_ogt_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 12, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(ogt), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 12, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_oge_s64 -# CHECK-LABEL: name: test_fcmp_oge_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1149,34 +972,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_oge_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 10, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(oge), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 10, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_olt_s64 -# CHECK-LABEL: name: test_fcmp_olt_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1186,34 +1003,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_olt_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 4, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(olt), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 4, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ole_s64 -# CHECK-LABEL: name: test_fcmp_ole_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1223,34 +1034,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_ole_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 9, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(ole), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 9, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ord_s64 -# CHECK-LABEL: name: test_fcmp_ord_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1260,34 +1065,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_ord_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 7, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(ord), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 7, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ugt_s64 -# CHECK-LABEL: name: test_fcmp_ugt_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1297,34 +1096,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_ugt_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 8, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(ugt), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 8, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_uge_s64 -# CHECK-LABEL: name: test_fcmp_uge_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1334,34 +1127,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_uge_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 5, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(uge), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 5, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ult_s64 -# CHECK-LABEL: name: test_fcmp_ult_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1371,34 +1158,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_ult_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 11, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(ult), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 11, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ule_s64 -# CHECK-LABEL: name: test_fcmp_ule_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1408,34 +1189,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_ule_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 13, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(ule), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 13, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_une_s64 -# CHECK-LABEL: name: test_fcmp_une_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1445,34 +1220,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_une_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 1, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(une), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 1, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_uno_s64 -# CHECK-LABEL: name: test_fcmp_uno_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1482,34 +1251,28 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_uno_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 6, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(uno), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[ZERO]], 1, 6, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_one_s64 -# CHECK-LABEL: name: test_fcmp_one_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1519,37 +1282,31 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_one_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 12, %cpsr + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi1:%[0-9]+]]:gpr = MOVCCi [[MOVCCi]], 1, 4, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(one), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES1:%[0-9]+]] = MOVCCi [[ZERO]], 1, 12, %cpsr - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[RES1]], 1, 4, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... --- name: test_fcmp_ueq_s64 -# CHECK-LABEL: name: test_fcmp_ueq_s64 legalized: true regBankSelected: true selected: false -# CHECK: selected: true registers: - { id: 0, class: fprb } - { id: 1, class: fprb } @@ -1559,27 +1316,23 @@ body: | bb.0: liveins: %d0, %d1 + ; CHECK-LABEL: name: test_fcmp_ueq_s64 + ; CHECK: [[COPY:%[0-9]+]]:dpr = COPY %d0 + ; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY %d1 + ; CHECK: [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14, _, _ + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi:%[0-9]+]]:gpr = MOVCCi [[MOVi]], 1, 0, %cpsr + ; CHECK: VCMPD [[COPY]], [[COPY1]], 14, _, implicit-def %fpscr_nzcv + ; CHECK: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv + ; CHECK: [[MOVCCi1:%[0-9]+]]:gpr = MOVCCi [[MOVCCi]], 1, 6, %cpsr + ; CHECK: [[ANDri:%[0-9]+]]:gpr = ANDri [[MOVCCi1]], 1, 14, _, _ + ; CHECK: %r0 = COPY [[ANDri]] + ; CHECK: BX_RET 14, _, implicit %r0 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 - %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 - %2(s1) = G_FCMP floatpred(ueq), %0(s64), %1 - ; CHECK: [[ZERO:%[0-9]+]] = MOVi 0, 14, _, _ - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES1:%[0-9]+]] = MOVCCi [[ZERO]], 1, 0, %cpsr - ; CHECK-NEXT: VCMPD [[VREGX]], [[VREGY]], 14, _, implicit-def %fpscr_nzcv - ; CHECK-NEXT: FMSTAT 14, _, implicit-def %cpsr, implicit %fpscr_nzcv - ; CHECK-NEXT: [[RES:%[0-9]+]] = MOVCCi [[RES1]], 1, 6, %cpsr - %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[RET:%[0-9]+]] = ANDri [[RES]], 1, 14, _, _ - %r0 = COPY %3(s32) - ; CHECK: %r0 = COPY [[RET]] - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir index d7f208d4cf595..d96463f00c7bb 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir @@ -30,13 +30,13 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s32) = COPY %r2 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]]:gprnopc = COPY %r2 %3(s32) = G_MUL %0, %1 %4(s32) = G_ADD %3, %2 - ; CHECK: [[VREGR:%[0-9]+]] = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = MLA [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] @@ -64,13 +64,13 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s32) = COPY %r2 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]]:gprnopc = COPY %r2 %3(s32) = G_MUL %0, %1 %4(s32) = G_ADD %3, %2 - ; CHECK: [[VREGR:%[0-9]+]] = MLAv5 [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gprnopc = MLAv5 [[VREGX]], [[VREGY]], [[VREGZ]], 14, _, _ %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] @@ -98,13 +98,13 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s32) = COPY %r2 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]]:gpr = COPY %r2 %3(s32) = G_MUL %0, %1 %4(s32) = G_SUB %2, %3 - ; CHECK: [[VREGR:%[0-9]+]] = MLS [[VREGX]], [[VREGY]], [[VREGZ]], 14, _ + ; CHECK: [[VREGR:%[0-9]+]]:gpr = MLS [[VREGX]], [[VREGY]], [[VREGZ]], 14, _ %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] @@ -132,14 +132,14 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s32) = COPY %r2 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - ; CHECK: [[VREGZ:%[0-9]+]] = COPY %r2 + ; CHECK: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 + ; CHECK: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 + ; CHECK: [[VREGZ:%[0-9]+]]:gpr = COPY %r2 %3(s32) = G_MUL %0, %1 %4(s32) = G_SUB %2, %3 - ; CHECK: [[VREGM:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _ - ; CHECK: [[VREGR:%[0-9]+]] = SUBrr [[VREGZ]], [[VREGM]], 14, _, _ + ; CHECK: [[VREGM:%[0-9]+]]:gprnopc = MULv5 [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGR:%[0-9]+]]:gpr = SUBrr [[VREGZ]], [[VREGM]], 14, _, _ %r0 = COPY %4(s32) ; CHECK: %r0 = COPY [[VREGR]] diff --git a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir index 0e3ef479bc3c8..3e78f459f45fe 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -1,27 +1,19 @@ # RUN: llc -O0 -mtriple arm-- -global-isel -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | - define void @test_zext_s1() { ret void } - define void @test_sext_s1() { ret void } - define void @test_sext_s8() { ret void } - define void @test_zext_s16() { ret void } - define void @test_anyext_s8() { ret void } - define void @test_anyext_s16() { ret void } + define void @test_trunc_and_zext_s1() { ret void } + define void @test_trunc_and_sext_s1() { ret void } + define void @test_trunc_and_sext_s8() { ret void } + define void @test_trunc_and_zext_s16() { ret void } + define void @test_trunc_and_anyext_s8() { ret void } + define void @test_trunc_and_anyext_s16() { ret void } - define void @test_trunc_s32_16() { ret void } - - define void @test_add_s8() { ret void } - define void @test_add_s16() { ret void } define void @test_add_s32() { ret void } define void @test_fadd_s32() #0 { ret void } define void @test_fadd_s64() #0 { ret void } - define void @test_sub_s8() { ret void } - define void @test_sub_s16() { ret void } define void @test_sub_s32() { ret void } - define void @test_mul_s8() #1 { ret void } - define void @test_mul_s16() #1 { ret void } define void @test_mul_s32() #1 { ret void } define void @test_mulv5_s32() { ret void } @@ -32,6 +24,10 @@ define void @test_or_s32() { ret void } define void @test_xor_s32() { ret void } + define void @test_lshr_s32() { ret void } + define void @test_ashr_s32() { ret void } + define void @test_shl_s32() { ret void } + define void @test_load_from_stack() { ret void } define void @test_load_f32() #0 { ret void } define void @test_load_f64() #0 { ret void } @@ -54,34 +50,8 @@ attributes #2 = { "target-features"="+hwdiv-arm" } ... --- -name: test_zext_s1 -# CHECK-LABEL: name: test_zext_s1 -legalized: true -regBankSelected: true -selected: false -# CHECK: selected: true -registers: - - { id: 0, class: gprb } - - { id: 1, class: gprb } -body: | - bb.0: - liveins: %r0 - - %0(s1) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - - %1(s32) = G_ZEXT %0(s1) - ; CHECK: [[VREGEXT:%[0-9]+]] = ANDri [[VREGX]], 1, 14, _, _ - - %r0 = COPY %1(s32) - ; CHECK: %r0 = COPY [[VREGEXT]] - - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 -... ---- -name: test_sext_s1 -# CHECK-LABEL: name: test_sext_s1 +name: test_trunc_and_zext_s1 +# CHECK-LABEL: name: test_trunc_and_zext_s1 legalized: true regBankSelected: true selected: false @@ -94,48 +64,24 @@ body: | bb.0: liveins: %r0 - %0(s1) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - - %1(s32) = G_SEXT %0(s1) - ; CHECK: [[VREGAND:%[0-9]+]] = ANDri [[VREGX]], 1, 14, _, _ - ; CHECK: [[VREGEXT:%[0-9]+]] = RSBri [[VREGAND]], 0, 14, _, _ - - %r0 = COPY %1(s32) - ; CHECK: %r0 = COPY [[VREGEXT]] - - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 -... ---- -name: test_sext_s8 -# CHECK-LABEL: name: test_sext_s8 -legalized: true -regBankSelected: true -selected: false -# CHECK: selected: true -registers: - - { id: 0, class: gprb } - - { id: 1, class: gprb } -body: | - bb.0: - liveins: %r0 + %0(s32) = COPY %r0 + ; CHECK: [[VREG:%[0-9]+]]:gpr = COPY %r0 - %0(s8) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + ; CHECK: [[VREGTRUNC:%[0-9]+]]:gpr = COPY [[VREG]] - %1(s32) = G_SEXT %0(s8) - ; CHECK: [[VREGEXT:%[0-9]+]] = SXTB [[VREGX]], 0, 14, _ + %2(s32) = G_ZEXT %1(s1) + ; CHECK: [[VREGEXT:%[0-9]+]]:gpr = ANDri [[VREGTRUNC]], 1, 14, _, _ - %r0 = COPY %1(s32) + %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGEXT]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_zext_s16 -# CHECK-LABEL: name: test_zext_s16 +name: test_trunc_and_sext_s1 +# CHECK-LABEL: name: test_trunc_and_sext_s1 legalized: true regBankSelected: true selected: false @@ -143,51 +89,30 @@ selected: false registers: - { id: 0, class: gprb } - { id: 1, class: gprb } + - { id: 2, class: gprb } body: | bb.0: liveins: %r0 - %0(s16) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - - %1(s32) = G_ZEXT %0(s16) - ; CHECK: [[VREGEXT:%[0-9]+]] = UXTH [[VREGX]], 0, 14, _ - - %r0 = COPY %1(s32) - ; CHECK: %r0 = COPY [[VREGEXT]] - - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 -... ---- -name: test_anyext_s8 -# CHECK-LABEL: name: test_anyext_s8 -legalized: true -regBankSelected: true -selected: false -# CHECK: selected: true -registers: - - { id: 0, class: gprb } - - { id: 1, class: gprb } -body: | - bb.0: - liveins: %r0 + %0(s32) = COPY %r0 + ; CHECK: [[VREG:%[0-9]+]]:gpr = COPY %r0 - %0(s8) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + ; CHECK: [[VREGTRUNC:%[0-9]+]]:gpr = COPY [[VREG]] - %1(s32) = G_ANYEXT %0(s8) - ; CHECK: [[VREGEXT:%[0-9]+]] = COPY [[VREGX]] + %2(s32) = G_SEXT %1(s1) + ; CHECK: [[VREGAND:%[0-9]+]]:gpr = ANDri [[VREGTRUNC]], 1, 14, _, _ + ; CHECK: [[VREGEXT:%[0-9]+]]:gpr = RSBri [[VREGAND]], 0, 14, _, _ - %r0 = COPY %1(s32) + %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGEXT]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_anyext_s16 -# CHECK-LABEL: name: test_anyext_s16 +name: test_trunc_and_sext_s8 +# CHECK-LABEL: name: test_trunc_and_sext_s8 legalized: true regBankSelected: true selected: false @@ -195,25 +120,29 @@ selected: false registers: - { id: 0, class: gprb } - { id: 1, class: gprb } + - { id: 2, class: gprb } body: | bb.0: liveins: %r0 - %0(s16) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + %0(s32) = COPY %r0 + ; CHECK: [[VREG:%[0-9]+]]:gpr = COPY %r0 + + %1(s8) = G_TRUNC %0(s32) + ; CHECK: [[VREGTRUNC:%[0-9]+]]:gprnopc = COPY [[VREG]] - %1(s32) = G_ANYEXT %0(s16) - ; CHECK: [[VREGEXT:%[0-9]+]] = COPY [[VREGX]] + %2(s32) = G_SEXT %1(s8) + ; CHECK: [[VREGEXT:%[0-9]+]]:gprnopc = SXTB [[VREGTRUNC]], 0, 14, _ - %r0 = COPY %1(s32) + %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGEXT]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_trunc_s32_16 -# CHECK-LABEL: name: test_trunc_s32_16 +name: test_trunc_and_zext_s16 +# CHECK-LABEL: name: test_trunc_and_zext_s16 legalized: true regBankSelected: true selected: false @@ -221,27 +150,29 @@ selected: false registers: - { id: 0, class: gprb } - { id: 1, class: gprb } -# CHECK-DAG: id: 0, class: gpr -# CHECK-DAG: id: 1, class: gpr + - { id: 2, class: gprb } body: | bb.0: liveins: %r0 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREG:%[0-9]+]]:gpr = COPY %r0 %1(s16) = G_TRUNC %0(s32) - ; CHECK: [[VREGTRUNC:%[0-9]+]] = COPY [[VREGX]] + ; CHECK: [[VREGTRUNC:%[0-9]+]]:gprnopc = COPY [[VREG]] - %r0 = COPY %1(s16) - ; CHECK: %r0 = COPY [[VREGTRUNC]] + %2(s32) = G_ZEXT %1(s16) + ; CHECK: [[VREGEXT:%[0-9]+]]:gprnopc = UXTH [[VREGTRUNC]], 0, 14, _ + + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGEXT]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_add_s8 -# CHECK-LABEL: name: test_add_s8 +name: test_trunc_and_anyext_s8 +# CHECK-LABEL: name: test_trunc_and_anyext_s8 legalized: true regBankSelected: true selected: false @@ -250,46 +181,28 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } - - { id: 3, class: gprb } - - { id: 4, class: gprb } - - { id: 5, class: gprb } -# CHECK-DAG: id: 0, class: gpr -# CHECK-DAG: id: 1, class: gpr -# CHECK-DAG: id: 2, class: gpr -# CHECK-DAG: id: 3, class: gpr -# CHECK-DAG: id: 4, class: gpr -# CHECK-DAG: id: 5, class: gpr body: | bb.0: - liveins: %r0, %r1 - - %0(s8) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - - %1(s8) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - - %2(s32) = G_ANYEXT %0(s8) - ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] + liveins: %r0 - %3(s32) = G_ANYEXT %1(s8) - ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + %0(s32) = COPY %r0 + ; CHECK: [[VREG:%[0-9]+]]:gpr = COPY %r0 - %4(s32) = G_ADD %2, %3 - ; CHECK: [[VREGSUM:%[0-9]+]] = ADDrr [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + %1(s8) = G_TRUNC %0(s32) + ; CHECK: [[VREGTRUNC:%[0-9]+]]:gpr = COPY [[VREG]] - %5(s8) = G_TRUNC %4(s32) - ; CHECK: [[VREGSUMTR:%[0-9]+]] = COPY [[VREGSUM]] + %2(s32) = G_ANYEXT %1(s8) + ; CHECK: [[VREGEXT:%[0-9]+]]:gpr = COPY [[VREGTRUNC]] - %r0 = COPY %5(s8) - ; CHECK: %r0 = COPY [[VREGSUMTR]] + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGEXT]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_add_s16 -# CHECK-LABEL: name: test_add_s16 +name: test_trunc_and_anyext_s16 +# CHECK-LABEL: name: test_trunc_and_anyext_s16 legalized: true regBankSelected: true selected: false @@ -298,39 +211,21 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } - - { id: 3, class: gprb } - - { id: 4, class: gprb } - - { id: 5, class: gprb } -# CHECK-DAG: id: 0, class: gpr -# CHECK-DAG: id: 1, class: gpr -# CHECK-DAG: id: 2, class: gpr -# CHECK-DAG: id: 3, class: gpr -# CHECK-DAG: id: 4, class: gpr -# CHECK-DAG: id: 5, class: gpr body: | bb.0: - liveins: %r0, %r1 - - %0(s16) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - - %1(s16) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - - %2(s32) = G_ANYEXT %0(s16) - ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] + liveins: %r0 - %3(s32) = G_ANYEXT %1(s16) - ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + %0(s32) = COPY %r0 + ; CHECK: [[VREG:%[0-9]+]]:gpr = COPY %r0 - %4(s32) = G_ADD %2, %3 - ; CHECK: [[VREGSUM:%[0-9]+]] = ADDrr [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + %1(s16) = G_TRUNC %0(s32) + ; CHECK: [[VREGTRUNC:%[0-9]+]]:gpr = COPY [[VREG]] - %5(s16) = G_TRUNC %4(s32) - ; CHECK: [[VREGSUMTR:%[0-9]+]] = COPY [[VREGSUM]] + %2(s32) = G_ANYEXT %1(s16) + ; CHECK: [[VREGEXT:%[0-9]+]]:gpr = COPY [[VREGTRUNC]] - %r0 = COPY %5(s16) - ; CHECK: %r0 = COPY [[VREGSUMTR]] + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGEXT]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 @@ -354,13 +249,13 @@ body: | liveins: %r0, %r1 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 %2(s32) = G_ADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]] = ADDrr [[VREGX]], [[VREGY]], 14, _, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:gpr = ADDrr [[VREGX]], [[VREGY]], 14, _, _ %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGSUM]] @@ -387,13 +282,13 @@ body: | liveins: %s0, %s1 %0(s32) = COPY %s0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %s0 + ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY %s0 %1(s32) = COPY %s1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %s1 + ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY %s1 %2(s32) = G_FADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]] = VADDS [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VADDS [[VREGX]], [[VREGY]], 14, _ %s0 = COPY %2(s32) ; CHECK: %s0 = COPY [[VREGSUM]] @@ -420,13 +315,13 @@ body: | liveins: %d0, %d1 %0(s64) = COPY %d0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %d0 + ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY %d0 %1(s64) = COPY %d1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %d1 + ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY %d1 %2(s64) = G_FADD %0, %1 - ; CHECK: [[VREGSUM:%[0-9]+]] = VADDD [[VREGX]], [[VREGY]], 14, _ + ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VADDD [[VREGX]], [[VREGY]], 14, _ %d0 = COPY %2(s64) ; CHECK: %d0 = COPY [[VREGSUM]] @@ -435,56 +330,8 @@ body: | ; CHECK: BX_RET 14, _, implicit %d0 ... --- -name: test_sub_s8 -# CHECK-LABEL: name: test_sub_s8 -legalized: true -regBankSelected: true -selected: false -# CHECK: selected: true -registers: - - { id: 0, class: gprb } - - { id: 1, class: gprb } - - { id: 2, class: gprb } - - { id: 3, class: gprb } - - { id: 4, class: gprb } - - { id: 5, class: gprb } -# CHECK-DAG: id: 0, class: gpr -# CHECK-DAG: id: 1, class: gpr -# CHECK-DAG: id: 2, class: gpr -# CHECK-DAG: id: 3, class: gpr -# CHECK-DAG: id: 4, class: gpr -# CHECK-DAG: id: 5, class: gpr -body: | - bb.0: - liveins: %r0, %r1 - - %0(s8) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - - %1(s8) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - - %2(s32) = G_ANYEXT %0(s8) - ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] - - %3(s32) = G_ANYEXT %1(s8) - ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] - - %4(s32) = G_SUB %2, %3 - ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGXEXT]], [[VREGYEXT]], 14, _, _ - - %5(s8) = G_TRUNC %4(s32) - ; CHECK: [[VREGRESTR:%[0-9]+]] = COPY [[VREGRES]] - - %r0 = COPY %5(s8) - ; CHECK: %r0 = COPY [[VREGRESTR]] - - BX_RET 14, _, implicit %r0 - ; CHECK: BX_RET 14, _, implicit %r0 -... ---- -name: test_sub_s16 -# CHECK-LABEL: name: test_sub_s16 +name: test_sub_s32 +# CHECK-LABEL: name: test_sub_s32 legalized: true regBankSelected: true selected: false @@ -493,46 +340,28 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } - - { id: 3, class: gprb } - - { id: 4, class: gprb } - - { id: 5, class: gprb } -# CHECK-DAG: id: 0, class: gpr -# CHECK-DAG: id: 1, class: gpr -# CHECK-DAG: id: 2, class: gpr -# CHECK-DAG: id: 3, class: gpr -# CHECK-DAG: id: 4, class: gpr -# CHECK-DAG: id: 5, class: gpr body: | bb.0: liveins: %r0, %r1 - %0(s16) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - - %1(s16) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - - %2(s32) = G_ANYEXT %0(s16) - ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] - - %3(s32) = G_ANYEXT %1(s16) - ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 - %4(s32) = G_SUB %2, %3 - ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %5(s16) = G_TRUNC %4(s32) - ; CHECK: [[VREGRESTR:%[0-9]+]] = COPY [[VREGRES]] + %2(s32) = G_SUB %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = SUBrr [[VREGX]], [[VREGY]], 14, _, _ - %r0 = COPY %5(s16) - ; CHECK: %r0 = COPY [[VREGRESTR]] + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_sub_s32 -# CHECK-LABEL: name: test_sub_s32 +name: test_mul_s32 +# CHECK-LABEL: name: test_mul_s32 legalized: true regBankSelected: true selected: false @@ -541,21 +370,18 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } -# CHECK: id: 0, class: gpr -# CHECK: id: 1, class: gpr -# CHECK: id: 2, class: gpr body: | bb.0: liveins: %r0, %r1 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 - %2(s32) = G_SUB %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]] = SUBrr [[VREGX]], [[VREGY]], 14, _, _ + %2(s32) = G_MUL %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MUL [[VREGX]], [[VREGY]], 14, _, _ %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] @@ -564,8 +390,8 @@ body: | ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_mul_s8 -# CHECK-LABEL: name: test_mul_s8 +name: test_mulv5_s32 +# CHECK-LABEL: name: test_mulv5_s32 legalized: true regBankSelected: true selected: false @@ -574,46 +400,28 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } - - { id: 3, class: gprb } - - { id: 4, class: gprb } - - { id: 5, class: gprb } -# CHECK-DAG: id: 0, class: gpr -# CHECK-DAG: id: 1, class: gpr -# CHECK-DAG: id: 2, class: gprnopc -# CHECK-DAG: id: 3, class: gprnopc -# CHECK-DAG: id: 4, class: gprnopc -# CHECK-DAG: id: 5, class: gpr body: | bb.0: liveins: %r0, %r1 - %0(s8) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - - %1(s8) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - - %2(s32) = G_ANYEXT %0(s8) - ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] - - %3(s32) = G_ANYEXT %1(s8) - ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gprnopc = COPY %r0 - %4(s32) = G_MUL %2, %3 - ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gprnopc = COPY %r1 - %5(s8) = G_TRUNC %4(s32) - ; CHECK: [[VREGRESTR:%[0-9]+]] = COPY [[VREGRES]] + %2(s32) = G_MUL %0, %1 + ; CHECK: early-clobber [[VREGRES:%[0-9]+]]:gprnopc = MULv5 [[VREGX]], [[VREGY]], 14, _, _ - %r0 = COPY %5(s8) - ; CHECK: %r0 = COPY [[VREGRESTR]] + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_mul_s16 -# CHECK-LABEL: name: test_mul_s16 +name: test_sdiv_s32 +# CHECK-LABEL: name: test_sdiv_s32 legalized: true regBankSelected: true selected: false @@ -622,46 +430,28 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } - - { id: 3, class: gprb } - - { id: 4, class: gprb } - - { id: 5, class: gprb } -# CHECK-DAG: id: 0, class: gpr -# CHECK-DAG: id: 1, class: gpr -# CHECK-DAG: id: 2, class: gprnopc -# CHECK-DAG: id: 3, class: gprnopc -# CHECK-DAG: id: 4, class: gprnopc -# CHECK-DAG: id: 5, class: gpr body: | bb.0: liveins: %r0, %r1 - %0(s16) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 - - %1(s16) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 - - %2(s32) = G_ANYEXT %0(s16) - ; CHECK: [[VREGXEXT:%[0-9]+]] = COPY [[VREGX]] - - %3(s32) = G_ANYEXT %1(s16) - ; CHECK: [[VREGYEXT:%[0-9]+]] = COPY [[VREGY]] + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 - %4(s32) = G_MUL %2, %3 - ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGXEXT]], [[VREGYEXT]], 14, _, _ + %1(s32) = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %5(s16) = G_TRUNC %4(s32) - ; CHECK: [[VREGRESTR:%[0-9]+]] = COPY [[VREGRES]] + %2(s32) = G_SDIV %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = SDIV [[VREGX]], [[VREGY]], 14, _ - %r0 = COPY %5(s16) - ; CHECK: %r0 = COPY [[VREGRESTR]] + %r0 = COPY %2(s32) + ; CHECK: %r0 = COPY [[VREGRES]] BX_RET 14, _, implicit %r0 ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_mul_s32 -# CHECK-LABEL: name: test_mul_s32 +name: test_udiv_s32 +# CHECK-LABEL: name: test_udiv_s32 legalized: true regBankSelected: true selected: false @@ -670,21 +460,18 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } -# CHECK: id: 0, class: gprnopc -# CHECK: id: 1, class: gprnopc -# CHECK: id: 2, class: gprnopc body: | bb.0: liveins: %r0, %r1 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %2(s32) = G_MUL %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]] = MUL [[VREGX]], [[VREGY]], 14, _, _ + %2(s32) = G_UDIV %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = UDIV [[VREGX]], [[VREGY]], 14, _ %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] @@ -693,8 +480,8 @@ body: | ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_mulv5_s32 -# CHECK-LABEL: name: test_mulv5_s32 +name: test_and_s32 +# CHECK-LABEL: name: test_and_s32 legalized: true regBankSelected: true selected: false @@ -703,21 +490,18 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } -# CHECK: id: 0, class: gprnopc -# CHECK: id: 1, class: gprnopc -# CHECK: id: 2, class: gprnopc body: | bb.0: liveins: %r0, %r1 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %2(s32) = G_MUL %0, %1 - ; CHECK: early-clobber [[VREGRES:%[0-9]+]] = MULv5 [[VREGX]], [[VREGY]], 14, _, _ + %2(s32) = G_AND %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = ANDrr [[VREGX]], [[VREGY]], 14, _ %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] @@ -726,8 +510,8 @@ body: | ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_sdiv_s32 -# CHECK-LABEL: name: test_sdiv_s32 +name: test_or_s32 +# CHECK-LABEL: name: test_or_s32 legalized: true regBankSelected: true selected: false @@ -736,21 +520,18 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } -# CHECK: id: 0, class: gpr -# CHECK: id: 1, class: gpr -# CHECK: id: 2, class: gpr body: | bb.0: liveins: %r0, %r1 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %2(s32) = G_SDIV %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]] = SDIV [[VREGX]], [[VREGY]], 14, _ + %2(s32) = G_OR %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = ORRrr [[VREGX]], [[VREGY]], 14, _ %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] @@ -759,8 +540,8 @@ body: | ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_udiv_s32 -# CHECK-LABEL: name: test_udiv_s32 +name: test_xor_s32 +# CHECK-LABEL: name: test_xor_s32 legalized: true regBankSelected: true selected: false @@ -769,21 +550,18 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } -# CHECK: id: 0, class: gpr -# CHECK: id: 1, class: gpr -# CHECK: id: 2, class: gpr body: | bb.0: liveins: %r0, %r1 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %2(s32) = G_UDIV %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]] = UDIV [[VREGX]], [[VREGY]], 14, _ + %2(s32) = G_XOR %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]]:gpr = EORrr [[VREGX]], [[VREGY]], 14, _ %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] @@ -792,8 +570,8 @@ body: | ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_and_s32 -# CHECK-LABEL: name: test_and_s32 +name: test_lshr_s32 +# CHECK-LABEL: name: test_lshr_s32 legalized: true regBankSelected: true selected: false @@ -802,21 +580,18 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } -# CHECK: id: 0, class: gpr -# CHECK: id: 1, class: gpr -# CHECK: id: 2, class: gpr body: | bb.0: liveins: %r0, %r1 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %2(s32) = G_AND %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]] = ANDrr [[VREGX]], [[VREGY]], 14, _ + %2(s32) = G_LSHR %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MOVsr [[VREGX]], [[VREGY]], 3, 14, _, _ %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] @@ -825,8 +600,8 @@ body: | ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_or_s32 -# CHECK-LABEL: name: test_or_s32 +name: test_ashr_s32 +# CHECK-LABEL: name: test_ashr_s32 legalized: true regBankSelected: true selected: false @@ -835,21 +610,18 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } -# CHECK: id: 0, class: gpr -# CHECK: id: 1, class: gpr -# CHECK: id: 2, class: gpr body: | bb.0: liveins: %r0, %r1 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %2(s32) = G_OR %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]] = ORRrr [[VREGX]], [[VREGY]], 14, _ + %2(s32) = G_ASHR %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MOVsr [[VREGX]], [[VREGY]], 1, 14, _, _ %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] @@ -858,8 +630,8 @@ body: | ; CHECK: BX_RET 14, _, implicit %r0 ... --- -name: test_xor_s32 -# CHECK-LABEL: name: test_xor_s32 +name: test_shl_s32 +# CHECK-LABEL: name: test_shl_s32 legalized: true regBankSelected: true selected: false @@ -868,21 +640,18 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } -# CHECK: id: 0, class: gpr -# CHECK: id: 1, class: gpr -# CHECK: id: 2, class: gpr body: | bb.0: liveins: %r0, %r1 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %2(s32) = G_XOR %0, %1 - ; CHECK: [[VREGRES:%[0-9]+]] = EORrr [[VREGX]], [[VREGY]], 14, _ + %2(s32) = G_SHL %0, %1 + ; CHECK: [[VREGRES:%[0-9]+]]:gprnopc = MOVsr [[VREGX]], [[VREGY]], 2, 14, _, _ %r0 = COPY %2(s32) ; CHECK: %r0 = COPY [[VREGRES]] @@ -902,10 +671,7 @@ registers: - { id: 1, class: gprb } - { id: 2, class: gprb } - { id: 3, class: gprb } -# CHECK-DAG: id: 0, class: gpr -# CHECK-DAG: id: 1, class: gpr -# CHECK-DAG: id: 2, class: gpr -# CHECK-DAG: id: 3, class: gpr + - { id: 4, class: gprb } fixedStack: - { id: 0, offset: 0, size: 1, alignment: 4, isImmutable: true, isAliased: false } - { id: 1, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false } @@ -917,22 +683,25 @@ body: | liveins: %r0, %r1, %r2, %r3 %0(p0) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[FI32VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI32]], 0, 14, _, _ + ; CHECK: [[FI32VREG:%[0-9]+]]:gpr = ADDri %fixed-stack.[[FI32]], 0, 14, _, _ %1(s32) = G_LOAD %0(p0) :: (load 4) - ; CHECK: [[LD32VREG:%[0-9]+]] = LDRi12 [[FI32VREG]], 0, 14, _ + ; CHECK: [[LD32VREG:%[0-9]+]]:gpr = LDRi12 [[FI32VREG]], 0, 14, _ %r0 = COPY %1 ; CHECK: %r0 = COPY [[LD32VREG]] %2(p0) = G_FRAME_INDEX %fixed-stack.0 - ; CHECK: [[FI1VREG:%[0-9]+]] = ADDri %fixed-stack.[[FI1]], 0, 14, _, _ + ; CHECK: [[FI1VREG:%[0-9]+]]:gpr = ADDri %fixed-stack.[[FI1]], 0, 14, _, _ %3(s1) = G_LOAD %2(p0) :: (load 1) - ; CHECK: [[LD1VREG:%[0-9]+]] = LDRBi12 [[FI1VREG]], 0, 14, _ + ; CHECK: [[LD1VREG:%[0-9]+]]:gprnopc = LDRBi12 [[FI1VREG]], 0, 14, _ - %r0 = COPY %3 - ; CHECK: %r0 = COPY [[LD1VREG]] + %4(s32) = G_ANYEXT %3(s1) + ; CHECK: [[RES:%[0-9]+]]:gpr = COPY [[LD1VREG]] + + %r0 = COPY %4 + ; CHECK: %r0 = COPY [[RES]] BX_RET 14, _ ; CHECK: BX_RET 14, _ @@ -947,16 +716,15 @@ selected: false registers: - { id: 0, class: gprb } - { id: 1, class: fprb } -# CHECK-DAG: id: [[P:[0-9]+]], class: gpr -# CHECK-DAG: id: [[V:[0-9]+]], class: spr body: | bb.0: - liveins: %r0, %r1, %r2, %r3 + liveins: %r0 %0(p0) = COPY %r0 + ; CHECK: %[[P:[0-9]+]]:gpr = COPY %r0 %1(s32) = G_LOAD %0(p0) :: (load 4) - ; CHECK: %[[V]] = VLDRS %[[P]], 0, 14, _ + ; CHECK: %[[V:[0-9]+]]:spr = VLDRS %[[P]], 0, 14, _ %s0 = COPY %1 ; CHECK: %s0 = COPY %[[V]] @@ -974,16 +742,15 @@ selected: false registers: - { id: 0, class: gprb } - { id: 1, class: fprb } -# CHECK-DAG: id: [[P:[0-9]+]], class: gpr -# CHECK-DAG: id: [[V:[0-9]+]], class: dpr body: | bb.0: - liveins: %r0, %r1, %r2, %r3 + liveins: %r0 %0(p0) = COPY %r0 + ; CHECK: %[[P:[0-9]+]]:gpr = COPY %r0 %1(s64) = G_LOAD %0(p0) :: (load 8) - ; CHECK: %[[V]] = VLDRD %[[P]], 0, 14, _ + ; CHECK: %[[V:[0-9]+]]:dpr = VLDRD %[[P]], 0, 14, _ %d0 = COPY %1 ; CHECK: %d0 = COPY %[[V]] @@ -1013,14 +780,14 @@ registers: # CHECK: id: [[F64:[0-9]+]], class: dpr body: | bb.0: - liveins: %r0, %r1, %r2, %r3 + liveins: %r0, %r1, %s0, %d0 %0(p0) = COPY %r0 - %1(s8) = COPY %r3 - %2(s16) = COPY %r2 %3(s32) = COPY %r1 %4(s32) = COPY %s0 %5(s64) = COPY %d2 + %1(s8) = G_TRUNC %3(s32) + %2(s16) = G_TRUNC %3(s32) G_STORE %1(s8), %0(p0) :: (store 1) ; CHECK: STRBi12 %[[I8]], %[[P]], 0, 14, _ @@ -1050,18 +817,18 @@ registers: - { id: 0, class: gprb } - { id: 1, class: gprb } - { id: 2, class: gprb } -# CHECK: id: [[PTR:[0-9]+]], class: gpr -# CHECK: id: [[OFF:[0-9]+]], class: gpr -# CHECK: id: [[GEP:[0-9]+]], class: gpr body: | bb.0: liveins: %r0, %r1 %0(p0) = COPY %r0 + ; CHECK: %[[PTR:[0-9]+]]:gpr = COPY %r0 + %1(s32) = COPY %r1 + ; CHECK: %[[OFF:[0-9]+]]:gpr = COPY %r1 %2(p0) = G_GEP %0, %1(s32) - ; CHECK: %[[GEP]] = ADDrr %[[PTR]], %[[OFF]], 14, _, _ + ; CHECK: %[[GEP:[0-9]+]]:gpr = ADDrr %[[PTR]], %[[OFF]], 14, _, _ %r0 = COPY %2(p0) BX_RET 14, _, implicit %r0 @@ -1075,11 +842,10 @@ selected: false # CHECK: selected: true registers: - { id: 0, class: gprb } -# CHECK: id: [[C:[0-9]+]], class: gpr body: | bb.0: %0(s32) = G_CONSTANT 42 - ; CHECK: %[[C]] = MOVi 42, 14, _, _ + ; CHECK: %[[C:[0-9]+]]:gpr = MOVi 42, 14, _, _ %r0 = COPY %0(s32) BX_RET 14, _, implicit %r0 @@ -1093,13 +859,12 @@ selected: false # CHECK: selected: true registers: - { id: 0, class: gprb } -# CHECK: id: [[C:[0-9]+]], class: gpr body: | bb.0: ; Adding a type on G_CONSTANT changes its operand from an Imm into a CImm. ; We still want to see the same thing in the output though. %0(s32) = G_CONSTANT i32 42 - ; CHECK: %[[C]] = MOVi 42, 14, _, _ + ; CHECK: %[[C:[0-9]+]]:gpr = MOVi 42, 14, _, _ %r0 = COPY %0(s32) BX_RET 14, _, implicit %r0 @@ -1118,20 +883,20 @@ registers: - { id: 3, class: gprb } body: | bb.0: - liveins: %r0, %r1, %r2 + liveins: %r0, %r1 %0(s32) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %2(s1) = COPY %r2 - ; CHECK: [[VREGC:%[0-9]+]] = COPY %r2 + %2(s1) = G_TRUNC %1(s32) + ; CHECK: [[VREGC:%[0-9]+]]:gpr = COPY [[VREGY]] %3(s32) = G_SELECT %2(s1), %0, %1 ; CHECK: CMPri [[VREGC]], 0, 14, _, implicit-def %cpsr - ; CHECK: [[RES:%[0-9]+]] = MOVCCr [[VREGX]], [[VREGY]], 0, %cpsr + ; CHECK: [[RES:%[0-9]+]]:gpr = MOVCCr [[VREGX]], [[VREGY]], 0, %cpsr %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[RES]] @@ -1153,20 +918,20 @@ registers: - { id: 3, class: gprb } body: | bb.0: - liveins: %r0, %r1, %r2 + liveins: %r0, %r1 %0(p0) = COPY %r0 - ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY %r0 %1(p0) = COPY %r1 - ; CHECK: [[VREGY:%[0-9]+]] = COPY %r1 + ; CHECK: [[VREGY:%[0-9]+]]:gpr = COPY %r1 - %2(s1) = COPY %r2 - ; CHECK: [[VREGC:%[0-9]+]] = COPY %r2 + %2(s1) = G_TRUNC %1(p0) + ; CHECK: [[VREGC:%[0-9]+]]:gpr = COPY [[VREGY]] %3(p0) = G_SELECT %2(s1), %0, %1 ; CHECK: CMPri [[VREGC]], 0, 14, _, implicit-def %cpsr - ; CHECK: [[RES:%[0-9]+]] = MOVCCr [[VREGX]], [[VREGY]], 0, %cpsr + ; CHECK: [[RES:%[0-9]+]]:gpr = MOVCCr [[VREGX]], [[VREGY]], 0, %cpsr %r0 = COPY %3(p0) ; CHECK: %r0 = COPY [[RES]] @@ -1183,16 +948,19 @@ selected: false # CHECK: selected: true registers: - { id: 0, class: gprb } + - { id: 1, class: gprb } body: | bb.0: ; CHECK: bb.0 successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: %r0 - %0(s1) = COPY %r0 - ; CHECK: [[COND:%[0-9]+]] = COPY %r0 + %0(s32) = COPY %r0 + ; CHECK: [[COND32:%[0-9]+]]:gpr = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + ; CHECK: [[COND:%[0-9]+]]:gpr = COPY [[COND32]] - G_BRCOND %0(s1), %bb.1 + G_BRCOND %1(s1), %bb.1 ; CHECK: TSTri [[COND]], 1, 14, _, implicit-def %cpsr ; CHECK: Bcc %bb.1, 0, %cpsr G_BR %bb.2 @@ -1224,26 +992,21 @@ registers: - { id: 2, class: fprb } - { id: 3, class: gprb } - { id: 4, class: gprb } -# CHECK-DAG: id: {{[0-9]+}}, class: gpr -# CHECK-DAG: id: {{[0-9]+}}, class: gpr -# CHECK-DAG: id: {{[0-9]+}}, class: gpr -# CHECK-DAG: id: {{[0-9]+}}, class: gpr -# CHECK-DAG: id: [[DREG:[0-9]+]], class: dpr body: | bb.0: liveins: %r0, %r1, %r2, %r3 %0(s32) = COPY %r2 - ; CHECK: [[IN1:%[0-9]+]] = COPY %r2 + ; CHECK: [[IN1:%[0-9]+]]:gpr = COPY %r2 %1(s32) = COPY %r3 - ; CHECK: [[IN2:%[0-9]+]] = COPY %r3 + ; CHECK: [[IN2:%[0-9]+]]:gpr = COPY %r3 %2(s64) = G_MERGE_VALUES %0(s32), %1(s32) - ; CHECK: %[[DREG]] = VMOVDRR [[IN1]], [[IN2]] + ; CHECK: %[[DREG:[0-9]+]]:dpr = VMOVDRR [[IN1]], [[IN2]] %3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64) - ; CHECK: [[OUT1:%[0-9]+]], [[OUT2:%[0-9]+]] = VMOVRRD %[[DREG]] + ; CHECK: [[OUT1:%[0-9]+]]:gpr, [[OUT2:%[0-9]+]]:gpr = VMOVRRD %[[DREG]] %r0 = COPY %3 ; CHECK: %r0 = COPY [[OUT1]] diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll index 82e9b20731e42..0994455916edc 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll @@ -11,10 +11,12 @@ entry: define signext i1 @test_add_i1(i1 %x, i1 %y) { ; CHECK-LABEL: name: test_add_i1 ; CHECK: liveins: %r0, %r1 -; CHECK-DAG: [[VREGX:%[0-9]+]](s1) = COPY %r0 -; CHECK-DAG: [[VREGY:%[0-9]+]](s1) = COPY %r1 -; CHECK: [[SUM:%[0-9]+]](s1) = G_ADD [[VREGX]], [[VREGY]] -; CHECK: [[EXT:%[0-9]+]](s32) = G_SEXT [[SUM]] +; CHECK-DAG: [[VREGR0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[VREGX:%[0-9]+]]:_(s1) = G_TRUNC [[VREGR0]] +; CHECK-DAG: [[VREGR1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK-DAG: [[VREGY:%[0-9]+]]:_(s1) = G_TRUNC [[VREGR1]] +; CHECK: [[SUM:%[0-9]+]]:_(s1) = G_ADD [[VREGX]], [[VREGY]] +; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_SEXT [[SUM]] ; CHECK: %r0 = COPY [[EXT]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -25,10 +27,13 @@ entry: define i8 @test_add_i8(i8 %x, i8 %y) { ; CHECK-LABEL: name: test_add_i8 ; CHECK: liveins: %r0, %r1 -; CHECK-DAG: [[VREGX:%[0-9]+]](s8) = COPY %r0 -; CHECK-DAG: [[VREGY:%[0-9]+]](s8) = COPY %r1 -; CHECK: [[SUM:%[0-9]+]](s8) = G_ADD [[VREGX]], [[VREGY]] -; CHECK: %r0 = COPY [[SUM]](s8) +; CHECK-DAG: [[VREGR0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[VREGX:%[0-9]+]]:_(s8) = G_TRUNC [[VREGR0]] +; CHECK-DAG: [[VREGR1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK-DAG: [[VREGY:%[0-9]+]]:_(s8) = G_TRUNC [[VREGR1]] +; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[VREGX]], [[VREGY]] +; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] +; CHECK: %r0 = COPY [[SUM_EXT]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: %sum = add i8 %x, %y @@ -38,10 +43,13 @@ entry: define i8 @test_sub_i8(i8 %x, i8 %y) { ; CHECK-LABEL: name: test_sub_i8 ; CHECK: liveins: %r0, %r1 -; CHECK-DAG: [[VREGX:%[0-9]+]](s8) = COPY %r0 -; CHECK-DAG: [[VREGY:%[0-9]+]](s8) = COPY %r1 -; CHECK: [[RES:%[0-9]+]](s8) = G_SUB [[VREGX]], [[VREGY]] -; CHECK: %r0 = COPY [[RES]](s8) +; CHECK-DAG: [[VREGR0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[VREGX:%[0-9]+]]:_(s8) = G_TRUNC [[VREGR0]] +; CHECK-DAG: [[VREGR1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK-DAG: [[VREGY:%[0-9]+]]:_(s8) = G_TRUNC [[VREGR1]] +; CHECK: [[RES:%[0-9]+]]:_(s8) = G_SUB [[VREGX]], [[VREGY]] +; CHECK: [[RES_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]] +; CHECK: %r0 = COPY [[RES_EXT]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: %res = sub i8 %x, %y @@ -51,8 +59,9 @@ entry: define signext i8 @test_return_sext_i8(i8 %x) { ; CHECK-LABEL: name: test_return_sext_i8 ; CHECK: liveins: %r0 -; CHECK: [[VREG:%[0-9]+]](s8) = COPY %r0 -; CHECK: [[VREGEXT:%[0-9]+]](s32) = G_SEXT [[VREG]] +; CHECK: [[VREGR0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[VREG:%[0-9]+]]:_(s8) = G_TRUNC [[VREGR0]] +; CHECK: [[VREGEXT:%[0-9]+]]:_(s32) = G_SEXT [[VREG]] ; CHECK: %r0 = COPY [[VREGEXT]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -62,10 +71,13 @@ entry: define i16 @test_add_i16(i16 %x, i16 %y) { ; CHECK-LABEL: name: test_add_i16 ; CHECK: liveins: %r0, %r1 -; CHECK-DAG: [[VREGX:%[0-9]+]](s16) = COPY %r0 -; CHECK-DAG: [[VREGY:%[0-9]+]](s16) = COPY %r1 -; CHECK: [[SUM:%[0-9]+]](s16) = G_ADD [[VREGX]], [[VREGY]] -; CHECK: %r0 = COPY [[SUM]](s16) +; CHECK-DAG: [[VREGR0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[VREGX:%[0-9]+]]:_(s16) = G_TRUNC [[VREGR0]] +; CHECK-DAG: [[VREGR1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK-DAG: [[VREGY:%[0-9]+]]:_(s16) = G_TRUNC [[VREGR1]] +; CHECK: [[SUM:%[0-9]+]]:_(s16) = G_ADD [[VREGX]], [[VREGY]] +; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] +; CHECK: %r0 = COPY [[SUM_EXT]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: %sum = add i16 %x, %y @@ -75,10 +87,13 @@ entry: define i16 @test_sub_i16(i16 %x, i16 %y) { ; CHECK-LABEL: name: test_sub_i16 ; CHECK: liveins: %r0, %r1 -; CHECK-DAG: [[VREGX:%[0-9]+]](s16) = COPY %r0 -; CHECK-DAG: [[VREGY:%[0-9]+]](s16) = COPY %r1 -; CHECK: [[RES:%[0-9]+]](s16) = G_SUB [[VREGX]], [[VREGY]] -; CHECK: %r0 = COPY [[RES]](s16) +; CHECK-DAG: [[VREGR0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[VREGX:%[0-9]+]]:_(s16) = G_TRUNC [[VREGR0]] +; CHECK-DAG: [[VREGR1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK-DAG: [[VREGY:%[0-9]+]]:_(s16) = G_TRUNC [[VREGR1]] +; CHECK: [[RES:%[0-9]+]]:_(s16) = G_SUB [[VREGX]], [[VREGY]] +; CHECK: [[RES_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[RES]] +; CHECK: %r0 = COPY [[RES_EXT]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: %res = sub i16 %x, %y @@ -88,8 +103,9 @@ entry: define zeroext i16 @test_return_zext_i16(i16 %x) { ; CHECK-LABEL: name: test_return_zext_i16 ; CHECK: liveins: %r0 -; CHECK: [[VREG:%[0-9]+]](s16) = COPY %r0 -; CHECK: [[VREGEXT:%[0-9]+]](s32) = G_ZEXT [[VREG]] +; CHECK: [[VREGR0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[VREG:%[0-9]+]]:_(s16) = G_TRUNC [[VREGR0]] +; CHECK: [[VREGEXT:%[0-9]+]]:_(s32) = G_ZEXT [[VREG]] ; CHECK: %r0 = COPY [[VREGEXT]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -99,9 +115,9 @@ entry: define i32 @test_add_i32(i32 %x, i32 %y) { ; CHECK-LABEL: name: test_add_i32 ; CHECK: liveins: %r0, %r1 -; CHECK-DAG: [[VREGX:%[0-9]+]](s32) = COPY %r0 -; CHECK-DAG: [[VREGY:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[SUM:%[0-9]+]](s32) = G_ADD [[VREGX]], [[VREGY]] +; CHECK-DAG: [[VREGX:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[VREGY:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[VREGX]], [[VREGY]] ; CHECK: %r0 = COPY [[SUM]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -112,9 +128,9 @@ entry: define i32 @test_sub_i32(i32 %x, i32 %y) { ; CHECK-LABEL: name: test_sub_i32 ; CHECK: liveins: %r0, %r1 -; CHECK-DAG: [[VREGX:%[0-9]+]](s32) = COPY %r0 -; CHECK-DAG: [[VREGY:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[RES:%[0-9]+]](s32) = G_SUB [[VREGX]], [[VREGY]] +; CHECK-DAG: [[VREGX:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[VREGY:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SUB [[VREGX]], [[VREGY]] ; CHECK: %r0 = COPY [[RES]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -128,10 +144,10 @@ define i32 @test_stack_args(i32 %p0, i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5 ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 4 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 4 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[VREGP2:%[0-9]+]](s32) = COPY %r2 -; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5:%[0-9]+]](s32) = G_LOAD [[FIP5]]{{.*}}load 4 -; CHECK: [[SUM:%[0-9]+]](s32) = G_ADD [[VREGP2]], [[VREGP5]] +; CHECK: [[VREGP2:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK: [[FIP5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]]{{.*}}load 4 +; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[VREGP2]], [[VREGP5]] ; CHECK: %r0 = COPY [[SUM]] ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -146,12 +162,14 @@ define i16 @test_stack_args_signext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[VREGP1:%[0-9]+]](s16) = COPY %r1 -; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5EXT:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 -; CHECK: [[VREGP5:%[0-9]+]](s16) = G_TRUNC [[VREGP5EXT]] -; CHECK: [[SUM:%[0-9]+]](s16) = G_ADD [[VREGP1]], [[VREGP5]] -; CHECK: %r0 = COPY [[SUM]] +; CHECK: [[VREGR1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[VREGP1:%[0-9]+]]:_(s16) = G_TRUNC [[VREGR1]] +; CHECK: [[FIP5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5EXT:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 +; CHECK: [[VREGP5:%[0-9]+]]:_(s16) = G_TRUNC [[VREGP5EXT]] +; CHECK: [[SUM:%[0-9]+]]:_(s16) = G_ADD [[VREGP1]], [[VREGP5]] +; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] +; CHECK: %r0 = COPY [[SUM_EXT]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: %sum = add i16 %p1, %p5 @@ -165,12 +183,14 @@ define i8 @test_stack_args_zeroext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[VREGP2:%[0-9]+]](s8) = COPY %r2 -; CHECK: [[FIP4:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P4]] -; CHECK: [[VREGP4EXT:%[0-9]+]](s32) = G_LOAD [[FIP4]](p0){{.*}}load 4 -; CHECK: [[VREGP4:%[0-9]+]](s8) = G_TRUNC [[VREGP4EXT]] -; CHECK: [[SUM:%[0-9]+]](s8) = G_ADD [[VREGP2]], [[VREGP4]] -; CHECK: %r0 = COPY [[SUM]] +; CHECK: [[VREGR2:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK: [[VREGP2:%[0-9]+]]:_(s8) = G_TRUNC [[VREGR2]] +; CHECK: [[FIP4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P4]] +; CHECK: [[VREGP4EXT:%[0-9]+]]:_(s32) = G_LOAD [[FIP4]](p0){{.*}}load 4 +; CHECK: [[VREGP4:%[0-9]+]]:_(s8) = G_TRUNC [[VREGP4EXT]] +; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[VREGP2]], [[VREGP4]] +; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] +; CHECK: %r0 = COPY [[SUM_EXT]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: %sum = add i8 %p2, %p4 @@ -184,11 +204,13 @@ define i8 @test_stack_args_noext(i32 %p0, i16 %p1, i8 %p2, i1 %p3, ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[VREGP2:%[0-9]+]](s8) = COPY %r2 -; CHECK: [[FIP4:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P4]] -; CHECK: [[VREGP4:%[0-9]+]](s8) = G_LOAD [[FIP4]](p0){{.*}}load 1 -; CHECK: [[SUM:%[0-9]+]](s8) = G_ADD [[VREGP2]], [[VREGP4]] -; CHECK: %r0 = COPY [[SUM]] +; CHECK: [[VREGR2:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK: [[VREGP2:%[0-9]+]]:_(s8) = G_TRUNC [[VREGR2]] +; CHECK: [[FIP4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P4]] +; CHECK: [[VREGP4:%[0-9]+]]:_(s8) = G_LOAD [[FIP4]](p0){{.*}}load 1 +; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[VREGP2]], [[VREGP4]] +; CHECK: [[SUM_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUM]] +; CHECK: %r0 = COPY [[SUM_EXT]](s32) ; CHECK: BX_RET 14, _, implicit %r0 entry: %sum = add i8 %p2, %p4 @@ -202,10 +224,10 @@ define zeroext i16 @test_stack_args_extend_the_extended(i32 %p0, i16 %p1, i8 %p2 ; CHECK-DAG: id: [[P4:[0-9]]]{{.*}}offset: 0{{.*}}size: 1 ; CHECK-DAG: id: [[P5:[0-9]]]{{.*}}offset: 4{{.*}}size: 2 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5SEXT:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 -; CHECK: [[VREGP5:%[0-9]+]](s16) = G_TRUNC [[VREGP5SEXT]] -; CHECK: [[VREGP5ZEXT:%[0-9]+]](s32) = G_ZEXT [[VREGP5]] +; CHECK: [[FIP5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5SEXT:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 +; CHECK: [[VREGP5:%[0-9]+]]:_(s16) = G_TRUNC [[VREGP5SEXT]] +; CHECK: [[VREGP5ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[VREGP5]] ; CHECK: %r0 = COPY [[VREGP5ZEXT]] ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -215,8 +237,8 @@ entry: define i16 @test_ptr_arg(i16* %p) { ; CHECK-LABEL: name: test_ptr_arg ; CHECK: liveins: %r0 -; CHECK: [[VREGP:%[0-9]+]](p0) = COPY %r0 -; CHECK: [[VREGV:%[0-9]+]](s16) = G_LOAD [[VREGP]](p0){{.*}}load 2 +; CHECK: [[VREGP:%[0-9]+]]:_(p0) = COPY %r0 +; CHECK: [[VREGV:%[0-9]+]]:_(s16) = G_LOAD [[VREGP]](p0){{.*}}load 2 entry: %v = load i16, i16* %p ret i16 %v @@ -226,8 +248,8 @@ define i32* @test_ptr_ret(i32** %p) { ; Test pointer returns and pointer-to-pointer arguments ; CHECK-LABEL: name: test_ptr_ret ; CHECK: liveins: %r0 -; CHECK: [[VREGP:%[0-9]+]](p0) = COPY %r0 -; CHECK: [[VREGV:%[0-9]+]](p0) = G_LOAD [[VREGP]](p0){{.*}}load 4 +; CHECK: [[VREGP:%[0-9]+]]:_(p0) = COPY %r0 +; CHECK: [[VREGV:%[0-9]+]]:_(p0) = G_LOAD [[VREGP]](p0){{.*}}load 4 ; CHECK: %r0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -240,9 +262,9 @@ define i32 @test_ptr_arg_on_stack(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32* %p) { ; CHECK: fixedStack: ; CHECK: id: [[P:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[FIP:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P]] -; CHECK: [[VREGP:%[0-9]+]](p0) = G_LOAD [[FIP]](p0){{.*}}load 4 -; CHECK: [[VREGV:%[0-9]+]](s32) = G_LOAD [[VREGP]](p0){{.*}}load 4 +; CHECK: [[FIP:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P]] +; CHECK: [[VREGP:%[0-9]+]]:_(p0) = G_LOAD [[FIP]](p0){{.*}}load 4 +; CHECK: [[VREGV:%[0-9]+]]:_(s32) = G_LOAD [[VREGP]](p0){{.*}}load 4 ; CHECK: %r0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -257,10 +279,10 @@ define arm_aapcscc float @test_float_aapcscc(float %p0, float %p1, float %p2, ; CHECK-DAG: id: [[P4:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4 ; CHECK-DAG: id: [[P5:[0-9]+]]{{.*}}offset: 4{{.*}}size: 4 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[VREGP1:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5:%[0-9]+]](s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 -; CHECK: [[VREGV:%[0-9]+]](s32) = G_FADD [[VREGP1]], [[VREGP5]] +; CHECK: [[VREGP1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[FIP5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5:%[0-9]+]]:_(s32) = G_LOAD [[FIP5]](p0){{.*}}load 4 +; CHECK: [[VREGV:%[0-9]+]]:_(s32) = G_FADD [[VREGP1]], [[VREGP5]] ; CHECK: %r0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %r0 entry: @@ -286,10 +308,10 @@ define arm_aapcs_vfpcc float @test_float_vfpcc(float %p0, float %p1, float %p2, ; CHECK-DAG: id: [[Q0:[0-9]+]]{{.*}}offset: 0{{.*}}size: 4 ; CHECK-DAG: id: [[Q1:[0-9]+]]{{.*}}offset: 4{{.*}}size: 4 ; CHECK: liveins: %s0, %s1, %s2, %s3, %s4, %s5, %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15 -; CHECK: [[VREGP1:%[0-9]+]](s32) = COPY %s1 -; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] -; CHECK: [[VREGQ1:%[0-9]+]](s32) = G_LOAD [[FIQ1]](p0){{.*}}load 4 -; CHECK: [[VREGV:%[0-9]+]](s32) = G_FADD [[VREGP1]], [[VREGQ1]] +; CHECK: [[VREGP1:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK: [[FIQ1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] +; CHECK: [[VREGQ1:%[0-9]+]]:_(s32) = G_LOAD [[FIQ1]](p0){{.*}}load 4 +; CHECK: [[VREGV:%[0-9]+]]:_(s32) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: %s0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %s0 entry: @@ -307,10 +329,10 @@ define arm_aapcs_vfpcc double @test_double_vfpcc(double %p0, double %p1, double ; CHECK-DAG: id: [[Q0:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8 ; CHECK-DAG: id: [[Q1:[0-9]+]]{{.*}}offset: 8{{.*}}size: 8 ; CHECK: liveins: %d0, %d1, %d2, %d3, %d4, %d5, %d6, %d7 -; CHECK: [[VREGP1:%[0-9]+]](s64) = COPY %d1 -; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] -; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8 -; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGQ1]] +; CHECK: [[VREGP1:%[0-9]+]]:_(s64) = COPY %d1 +; CHECK: [[FIQ1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] +; CHECK: [[VREGQ1:%[0-9]+]]:_(s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8 +; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: %d0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %d0 entry: @@ -327,15 +349,15 @@ define arm_aapcscc double @test_double_aapcscc(double %p0, double %p1, double %p ; CHECK-DAG: id: [[P4:[0-9]+]]{{.*}}offset: 16{{.*}}size: 8 ; CHECK-DAG: id: [[P5:[0-9]+]]{{.*}}offset: 24{{.*}}size: 8 ; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK-DAG: [[VREGP1LO:%[0-9]+]](s32) = COPY %r2 -; CHECK-DAG: [[VREGP1HI:%[0-9]+]](s32) = COPY %r3 -; LITTLE: [[VREGP1:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP1LO]](s32), [[VREGP1HI]](s32) -; BIG: [[VREGP1:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP1HI]](s32), [[VREGP1LO]](s32) -; CHECK: [[FIP5:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P5]] -; CHECK: [[VREGP5:%[0-9]+]](s64) = G_LOAD [[FIP5]](p0){{.*}}load 8 -; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGP5]] -; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) -; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; CHECK-DAG: [[VREGP1LO:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK-DAG: [[VREGP1HI:%[0-9]+]]:_(s32) = COPY %r3 +; LITTLE: [[VREGP1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP1LO]](s32), [[VREGP1HI]](s32) +; BIG: [[VREGP1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP1HI]](s32), [[VREGP1LO]](s32) +; CHECK: [[FIP5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P5]] +; CHECK: [[VREGP5:%[0-9]+]]:_(s64) = G_LOAD [[FIP5]](p0){{.*}}load 8 +; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP1]], [[VREGP5]] +; LITTLE: [[VREGVLO:%[0-9]+]]:_(s32), [[VREGVHI:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; BIG: [[VREGVHI:%[0-9]+]]:_(s32), [[VREGVLO:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 @@ -355,10 +377,10 @@ define arm_aapcs_vfpcc double @test_double_gap_vfpcc(double %p0, float %filler, ; CHECK-DAG: id: [[Q0:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8 ; CHECK-DAG: id: [[Q1:[0-9]+]]{{.*}}offset: 8{{.*}}size: 8 ; CHECK: liveins: %d0, %d2, %d3, %d4, %d5, %d6, %d7, %s2 -; CHECK: [[VREGP1:%[0-9]+]](s64) = COPY %d2 -; CHECK: [[FIQ1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] -; CHECK: [[VREGQ1:%[0-9]+]](s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8 -; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP1]], [[VREGQ1]] +; CHECK: [[VREGP1:%[0-9]+]]:_(s64) = COPY %d2 +; CHECK: [[FIQ1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Q1]] +; CHECK: [[VREGQ1:%[0-9]+]]:_(s64) = G_LOAD [[FIQ1]](p0){{.*}}load 8 +; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP1]], [[VREGQ1]] ; CHECK: %d0 = COPY [[VREGV]] ; CHECK: BX_RET 14, _, implicit %d0 entry: @@ -372,15 +394,15 @@ define arm_aapcscc double @test_double_gap_aapcscc(float %filler, double %p0, ; CHECK: fixedStack: ; CHECK-DAG: id: [[P1:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8 ; CHECK: liveins: %r0, %r2, %r3 -; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r2 -; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r3 -; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32) -; BIG: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32) -; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]] -; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8 -; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]] -; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) -; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; CHECK-DAG: [[VREGP0LO:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK-DAG: [[VREGP0HI:%[0-9]+]]:_(s32) = COPY %r3 +; LITTLE: [[VREGP0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32) +; BIG: [[VREGP0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32) +; CHECK: [[FIP1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P1]] +; CHECK: [[VREGP1:%[0-9]+]]:_(s64) = G_LOAD [[FIP1]](p0){{.*}}load 8 +; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP0]], [[VREGP1]] +; LITTLE: [[VREGVLO:%[0-9]+]]:_(s32), [[VREGVHI:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; BIG: [[VREGVHI:%[0-9]+]]:_(s32), [[VREGVLO:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 @@ -395,15 +417,15 @@ define arm_aapcscc double @test_double_gap2_aapcscc(double %p0, float %filler, ; CHECK: fixedStack: ; CHECK-DAG: id: [[P1:[0-9]+]]{{.*}}offset: 0{{.*}}size: 8 ; CHECK: liveins: %r0, %r1, %r2 -; CHECK-DAG: [[VREGP0LO:%[0-9]+]](s32) = COPY %r0 -; CHECK-DAG: [[VREGP0HI:%[0-9]+]](s32) = COPY %r1 -; LITTLE: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32) -; BIG: [[VREGP0:%[0-9]+]](s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32) -; CHECK: [[FIP1:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[P1]] -; CHECK: [[VREGP1:%[0-9]+]](s64) = G_LOAD [[FIP1]](p0){{.*}}load 8 -; CHECK: [[VREGV:%[0-9]+]](s64) = G_FADD [[VREGP0]], [[VREGP1]] -; LITTLE: [[VREGVLO:%[0-9]+]](s32), [[VREGVHI:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) -; BIG: [[VREGVHI:%[0-9]+]](s32), [[VREGVLO:%[0-9]+]](s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; CHECK-DAG: [[VREGP0LO:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[VREGP0HI:%[0-9]+]]:_(s32) = COPY %r1 +; LITTLE: [[VREGP0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP0LO]](s32), [[VREGP0HI]](s32) +; BIG: [[VREGP0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[VREGP0HI]](s32), [[VREGP0LO]](s32) +; CHECK: [[FIP1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[P1]] +; CHECK: [[VREGP1:%[0-9]+]]:_(s64) = G_LOAD [[FIP1]](p0){{.*}}load 8 +; CHECK: [[VREGV:%[0-9]+]]:_(s64) = G_FADD [[VREGP0]], [[VREGP1]] +; LITTLE: [[VREGVLO:%[0-9]+]]:_(s32), [[VREGVHI:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) +; BIG: [[VREGVHI:%[0-9]+]]:_(s32), [[VREGVLO:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[VREGV]](s64) ; CHECK-DAG: %r0 = COPY [[VREGVLO]] ; CHECK-DAG: %r1 = COPY [[VREGVHI]] ; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 @@ -412,508 +434,13 @@ entry: ret double %v } -define arm_aapcscc void @test_indirect_call(void() *%fptr) { -; CHECK-LABEL: name: test_indirect_call -; CHECK: registers: -; CHECK-NEXT: id: [[FPTR:[0-9]+]], class: gpr -; CHECK: %[[FPTR]](p0) = COPY %r0 -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BLX %[[FPTR]](p0), csr_aapcs, implicit-def %lr, implicit %sp -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp -entry: - notail call arm_aapcscc void %fptr() - ret void -} - -declare arm_aapcscc void @call_target() - -define arm_aapcscc void @test_direct_call() { -; CHECK-LABEL: name: test_direct_call -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BLX @call_target, csr_aapcs, implicit-def %lr, implicit %sp -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp -entry: - notail call arm_aapcscc void @call_target() - ret void -} - -declare arm_aapcscc i32* @simple_reg_params_target(i32, i32*) - -define arm_aapcscc i32* @test_call_simple_reg_params(i32 *%a, i32 %b) { -; CHECK-LABEL: name: test_call_simple_reg_params -; CHECK-DAG: [[AVREG:%[0-9]+]](p0) = COPY %r0 -; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r1 -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK-DAG: %r0 = COPY [[BVREG]] -; CHECK-DAG: %r1 = COPY [[AVREG]] -; CHECK: BLX @simple_reg_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0 -; CHECK: [[RVREG:%[0-9]+]](p0) = COPY %r0 -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: %r0 = COPY [[RVREG]] -; CHECK: BX_RET 14, _, implicit %r0 -entry: - %r = notail call arm_aapcscc i32 *@simple_reg_params_target(i32 %b, i32 *%a) - ret i32 *%r -} - -declare arm_aapcscc i32* @simple_stack_params_target(i32, i32*, i32, i32*, i32, i32*) - -define arm_aapcscc i32* @test_call_simple_stack_params(i32 *%a, i32 %b) { -; CHECK-LABEL: name: test_call_simple_stack_params -; CHECK-DAG: [[AVREG:%[0-9]+]](p0) = COPY %r0 -; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r1 -; CHECK: ADJCALLSTACKDOWN 8, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK-DAG: %r0 = COPY [[BVREG]] -; CHECK-DAG: %r1 = COPY [[AVREG]] -; CHECK-DAG: %r2 = COPY [[BVREG]] -; CHECK-DAG: %r3 = COPY [[AVREG]] -; CHECK: [[SP1:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF1:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[FI1:%[0-9]+]](p0) = G_GEP [[SP1]], [[OFF1]](s32) -; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4 -; CHECK: [[SP2:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF2:%[0-9]+]](s32) = G_CONSTANT i32 4 -; CHECK: [[FI2:%[0-9]+]](p0) = G_GEP [[SP2]], [[OFF2]](s32) -; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store 4 -; CHECK: BLX @simple_stack_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 -; CHECK: [[RVREG:%[0-9]+]](p0) = COPY %r0 -; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: %r0 = COPY [[RVREG]] -; CHECK: BX_RET 14, _, implicit %r0 -entry: - %r = notail call arm_aapcscc i32 *@simple_stack_params_target(i32 %b, i32 *%a, i32 %b, i32 *%a, i32 %b, i32 *%a) - ret i32 *%r -} - -declare arm_aapcscc signext i16 @ext_target(i8 signext, i8 zeroext, i16 signext, i16 zeroext, i8 signext, i8 zeroext, i16 signext, i16 zeroext, i1 zeroext) - -define arm_aapcscc signext i16 @test_call_ext_params(i8 %a, i16 %b, i1 %c) { -; CHECK-LABEL: name: test_call_ext_params -; CHECK-DAG: [[AVREG:%[0-9]+]](s8) = COPY %r0 -; CHECK-DAG: [[BVREG:%[0-9]+]](s16) = COPY %r1 -; CHECK-DAG: [[CVREG:%[0-9]+]](s1) = COPY %r2 -; CHECK: ADJCALLSTACKDOWN 20, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[SEXTA:%[0-9]+]](s32) = G_SEXT [[AVREG]](s8) -; CHECK: %r0 = COPY [[SEXTA]] -; CHECK: [[ZEXTA:%[0-9]+]](s32) = G_ZEXT [[AVREG]](s8) -; CHECK: %r1 = COPY [[ZEXTA]] -; CHECK: [[SEXTB:%[0-9]+]](s32) = G_SEXT [[BVREG]](s16) -; CHECK: %r2 = COPY [[SEXTB]] -; CHECK: [[ZEXTB:%[0-9]+]](s32) = G_ZEXT [[BVREG]](s16) -; CHECK: %r3 = COPY [[ZEXTB]] -; CHECK: [[SP1:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF1:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[FI1:%[0-9]+]](p0) = G_GEP [[SP1]], [[OFF1]](s32) -; CHECK: [[SEXTA2:%[0-9]+]](s32) = G_SEXT [[AVREG]] -; CHECK: G_STORE [[SEXTA2]](s32), [[FI1]](p0){{.*}}store 4 -; CHECK: [[SP2:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF2:%[0-9]+]](s32) = G_CONSTANT i32 4 -; CHECK: [[FI2:%[0-9]+]](p0) = G_GEP [[SP2]], [[OFF2]](s32) -; CHECK: [[ZEXTA2:%[0-9]+]](s32) = G_ZEXT [[AVREG]] -; CHECK: G_STORE [[ZEXTA2]](s32), [[FI2]](p0){{.*}}store 4 -; CHECK: [[SP3:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF3:%[0-9]+]](s32) = G_CONSTANT i32 8 -; CHECK: [[FI3:%[0-9]+]](p0) = G_GEP [[SP3]], [[OFF3]](s32) -; CHECK: [[SEXTB2:%[0-9]+]](s32) = G_SEXT [[BVREG]] -; CHECK: G_STORE [[SEXTB2]](s32), [[FI3]](p0){{.*}}store 4 -; CHECK: [[SP4:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF4:%[0-9]+]](s32) = G_CONSTANT i32 12 -; CHECK: [[FI4:%[0-9]+]](p0) = G_GEP [[SP4]], [[OFF4]](s32) -; CHECK: [[ZEXTB2:%[0-9]+]](s32) = G_ZEXT [[BVREG]] -; CHECK: G_STORE [[ZEXTB2]](s32), [[FI4]](p0){{.*}}store 4 -; CHECK: [[SP5:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF5:%[0-9]+]](s32) = G_CONSTANT i32 16 -; CHECK: [[FI5:%[0-9]+]](p0) = G_GEP [[SP5]], [[OFF5]](s32) -; CHECK: [[ZEXTC:%[0-9]+]](s32) = G_ZEXT [[CVREG]] -; CHECK: G_STORE [[ZEXTC]](s32), [[FI5]](p0){{.*}}store 4 -; CHECK: BLX @ext_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 -; CHECK: [[RVREG:%[0-9]+]](s16) = COPY %r0 -; CHECK: ADJCALLSTACKUP 20, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[RExtVREG:%[0-9]+]](s32) = G_SEXT [[RVREG]] -; CHECK: %r0 = COPY [[RExtVREG]] -; CHECK: BX_RET 14, _, implicit %r0 -entry: - %r = notail call arm_aapcscc signext i16 @ext_target(i8 signext %a, i8 zeroext %a, i16 signext %b, i16 zeroext %b, i8 signext %a, i8 zeroext %a, i16 signext %b, i16 zeroext %b, i1 zeroext %c) - ret i16 %r -} - -declare arm_aapcs_vfpcc double @vfpcc_fp_target(float, double) - -define arm_aapcs_vfpcc double @test_call_vfpcc_fp_params(double %a, float %b) { -; CHECK-LABEL: name: test_call_vfpcc_fp_params -; CHECK-DAG: [[AVREG:%[0-9]+]](s64) = COPY %d0 -; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %s2 -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK-DAG: %s0 = COPY [[BVREG]] -; CHECK-DAG: %d1 = COPY [[AVREG]] -; CHECK: BLX @vfpcc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %d1, implicit-def %d0 -; CHECK: [[RVREG:%[0-9]+]](s64) = COPY %d0 -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: %d0 = COPY [[RVREG]] -; CHECK: BX_RET 14, _, implicit %d0 -entry: - %r = notail call arm_aapcs_vfpcc double @vfpcc_fp_target(float %b, double %a) - ret double %r -} - -declare arm_aapcscc double @aapcscc_fp_target(float, double, float, double) - -define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) { -; CHECK-LABEL: name: test_call_aapcs_fp_params -; CHECK-DAG: [[A1:%[0-9]+]](s32) = COPY %r0 -; CHECK-DAG: [[A2:%[0-9]+]](s32) = COPY %r1 -; LITTLE-DAG: [[AVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[A1]](s32), [[A2]](s32) -; BIG-DAG: [[AVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[A2]](s32), [[A1]](s32) -; CHECK-DAG: [[BVREG:%[0-9]+]](s32) = COPY %r2 -; CHECK: ADJCALLSTACKDOWN 16, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK-DAG: %r0 = COPY [[BVREG]] -; CHECK-DAG: [[A1:%[0-9]+]](s32), [[A2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[AVREG]](s64) -; LITTLE-DAG: %r2 = COPY [[A1]] -; LITTLE-DAG: %r3 = COPY [[A2]] -; BIG-DAG: %r2 = COPY [[A2]] -; BIG-DAG: %r3 = COPY [[A1]] -; CHECK: [[SP1:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF1:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[FI1:%[0-9]+]](p0) = G_GEP [[SP1]], [[OFF1]](s32) -; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4 -; CHECK: [[SP2:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF2:%[0-9]+]](s32) = G_CONSTANT i32 8 -; CHECK: [[FI2:%[0-9]+]](p0) = G_GEP [[SP2]], [[OFF2]](s32) -; CHECK: G_STORE [[AVREG]](s64), [[FI2]](p0){{.*}}store 8 -; CHECK: BLX @aapcscc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 -; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r0 -; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r1 -; LITTLE: [[RVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[R1]](s32), [[R2]](s32) -; BIG: [[RVREG:%[0-9]+]](s64) = G_MERGE_VALUES [[R2]](s32), [[R1]](s32) -; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[RVREG]](s64) -; LITTLE-DAG: %r0 = COPY [[R1]] -; LITTLE-DAG: %r1 = COPY [[R2]] -; BIG-DAG: %r0 = COPY [[R2]] -; BIG-DAG: %r1 = COPY [[R1]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 -entry: - %r = notail call arm_aapcscc double @aapcscc_fp_target(float %b, double %a, float %b, double %a) - ret double %r -} - -declare arm_aapcscc float @different_call_conv_target(float) - -define arm_aapcs_vfpcc float @test_call_different_call_conv(float %x) { -; CHECK-LABEL: name: test_call_different_call_conv -; CHECK: [[X:%[0-9]+]](s32) = COPY %s0 -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: %r0 = COPY [[X]] -; CHECK: BLX @different_call_conv_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit-def %r0 -; CHECK: [[R:%[0-9]+]](s32) = COPY %r0 -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: %s0 = COPY [[R]] -; CHECK: BX_RET 14, _, implicit %s0 -entry: - %r = notail call arm_aapcscc float @different_call_conv_target(float %x) - ret float %r -} - -declare arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32]) - -define arm_aapcscc [3 x i32] @test_tiny_int_arrays([2 x i32] %arr) { -; CHECK-LABEL: name: test_tiny_int_arrays -; CHECK: liveins: %r0, %r1 -; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 -; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[ARG_ARR:%[0-9]+]](s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARG_ARR]](s64) -; CHECK: %r0 = COPY [[R0]] -; CHECK: %r1 = COPY [[R1]] -; CHECK: BLX @tiny_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 -; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 -; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[R2:%[0-9]+]](s32) = COPY %r2 -; CHECK: [[RES_ARR:%[0-9]+]](s96) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32) -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[RES_ARR]](s96) -; FIXME: This doesn't seem correct with regard to the AAPCS docs (which say -; that composite types larger than 4 bytes should be passed through memory), -; but it's what DAGISel does. We should fix it in the common code for both. -; CHECK: %r0 = COPY [[R0]] -; CHECK: %r1 = COPY [[R1]] -; CHECK: %r2 = COPY [[R2]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1, implicit %r2 -entry: - %r = notail call arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32] %arr) - ret [3 x i32] %r -} - -declare arm_aapcscc void @multiple_int_arrays_target([2 x i32], [2 x i32]) - -define arm_aapcscc void @test_multiple_int_arrays([2 x i32] %arr0, [2 x i32] %arr1) { -; CHECK-LABEL: name: test_multiple_int_arrays -; CHECK: liveins: %r0, %r1 -; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 -; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[R2:%[0-9]+]](s32) = COPY %r2 -; CHECK: [[R3:%[0-9]+]](s32) = COPY %r3 -; CHECK: [[ARG_ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: [[ARG_ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[R2]](s32), [[R3]](s32) -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARG_ARR0]](s64) -; CHECK: [[R2:%[0-9]+]](s32), [[R3:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARG_ARR1]](s64) -; CHECK: %r0 = COPY [[R0]] -; CHECK: %r1 = COPY [[R1]] -; CHECK: %r2 = COPY [[R2]] -; CHECK: %r3 = COPY [[R3]] -; CHECK: BLX @multiple_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BX_RET 14, _ -entry: - notail call arm_aapcscc void @multiple_int_arrays_target([2 x i32] %arr0, [2 x i32] %arr1) - ret void -} - -declare arm_aapcscc void @large_int_arrays_target([20 x i32]) - -define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) { -; CHECK-LABEL: name: test_large_int_arrays -; CHECK: fixedStack: -; The parameters live in separate stack locations, one for each element that -; doesn't fit in the registers. -; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4, -; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 60, size: 4 -; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK-DAG: [[R0:%[0-9]+]](s32) = COPY %r0 -; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r1 -; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r2 -; CHECK-DAG: [[R3:%[0-9]+]](s32) = COPY %r3 -; CHECK: [[FIRST_STACK_ELEMENT_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[FIRST_STACK_ID]] -; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]](s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]] -; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] -; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]](s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] -; CHECK: [[ARG_ARR:%[0-9]+]](s640) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) -; CHECK: ADJCALLSTACKDOWN 64, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32), [[R3:%[0-9]+]](s32), [[FIRST_STACK_ELEMENT:%[0-9]+]](s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARG_ARR]](s640) -; CHECK: %r0 = COPY [[R0]] -; CHECK: %r1 = COPY [[R1]] -; CHECK: %r2 = COPY [[R2]] -; CHECK: %r3 = COPY [[R3]] -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF_FIRST_ELEMENT:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[FIRST_STACK_ARG_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF_FIRST_ELEMENT]](s32) -; CHECK: G_STORE [[FIRST_STACK_ELEMENT]](s32), [[FIRST_STACK_ARG_ADDR]]{{.*}}store 4 -; Match the second-to-last offset, so we can get the correct SP for the last element -; CHECK: G_CONSTANT i32 56 -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]](s32) = G_CONSTANT i32 60 -; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF_LAST_ELEMENT]](s32) -; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4 -; CHECK: BLX @large_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 -; CHECK: ADJCALLSTACKUP 64, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: BX_RET 14, _ -entry: - notail call arm_aapcscc void @large_int_arrays_target([20 x i32] %arr) - ret void -} - -declare arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double]) - -define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { -; CHECK-LABEL: name: test_fp_arrays_aapcs -; CHECK: fixedStack: -; CHECK: id: [[ARR2_ID:[0-9]+]], type: default, offset: 0, size: 8, -; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK: [[ARR0_0:%[0-9]+]](s32) = COPY %r0 -; CHECK: [[ARR0_1:%[0-9]+]](s32) = COPY %r1 -; LITTLE: [[ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR0_0]](s32), [[ARR0_1]](s32) -; BIG: [[ARR0:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR0_1]](s32), [[ARR0_0]](s32) -; CHECK: [[ARR1_0:%[0-9]+]](s32) = COPY %r2 -; CHECK: [[ARR1_1:%[0-9]+]](s32) = COPY %r3 -; LITTLE: [[ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR1_0]](s32), [[ARR1_1]](s32) -; BIG: [[ARR1:%[0-9]+]](s64) = G_MERGE_VALUES [[ARR1_1]](s32), [[ARR1_0]](s32) -; CHECK: [[ARR2_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]] -; CHECK: [[ARR2:%[0-9]+]](s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]] -; CHECK: [[ARR_MERGED:%[0-9]+]](s192) = G_MERGE_VALUES [[ARR0]](s64), [[ARR1]](s64), [[ARR2]](s64) -; CHECK: ADJCALLSTACKDOWN 8, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[ARR0:%[0-9]+]](s64), [[ARR1:%[0-9]+]](s64), [[ARR2:%[0-9]+]](s64) = G_UNMERGE_VALUES [[ARR_MERGED]](s192) -; CHECK: [[ARR0_0:%[0-9]+]](s32), [[ARR0_1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARR0]](s64) -; LITTLE: %r0 = COPY [[ARR0_0]](s32) -; LITTLE: %r1 = COPY [[ARR0_1]](s32) -; BIG: %r0 = COPY [[ARR0_1]](s32) -; BIG: %r1 = COPY [[ARR0_0]](s32) -; CHECK: [[ARR1_0:%[0-9]+]](s32), [[ARR1_1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARR1]](s64) -; LITTLE: %r2 = COPY [[ARR1_0]](s32) -; LITTLE: %r3 = COPY [[ARR1_1]](s32) -; BIG: %r2 = COPY [[ARR1_1]](s32) -; BIG: %r3 = COPY [[ARR1_0]](s32) -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[ARR2_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[ARR2_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[ARR2_OFFSET]](s32) -; CHECK: G_STORE [[ARR2]](s64), [[ARR2_ADDR]](p0){{.*}}store 8 -; CHECK: BLX @fp_arrays_aapcs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 -; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 -; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[R_MERGED:%[0-9]+]](s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[R_MERGED]](s64) -; CHECK: %r0 = COPY [[R0]] -; CHECK: %r1 = COPY [[R1]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 -entry: - %r = notail call arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double] %arr) - ret [2 x float] %r -} - -declare arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double], [3 x float], [4 x double]) - -define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x float] %y, [4 x double] %z) { -; CHECK-LABEL: name: test_fp_arrays_aapcs_vfp -; CHECK: fixedStack: -; CHECK-DAG: id: [[Z0_ID:[0-9]+]], type: default, offset: 0, size: 8, -; CHECK-DAG: id: [[Z1_ID:[0-9]+]], type: default, offset: 8, size: 8, -; CHECK-DAG: id: [[Z2_ID:[0-9]+]], type: default, offset: 16, size: 8, -; CHECK-DAG: id: [[Z3_ID:[0-9]+]], type: default, offset: 24, size: 8, -; CHECK: liveins: %d0, %d1, %d2, %s6, %s7, %s8 -; CHECK: [[X0:%[0-9]+]](s64) = COPY %d0 -; CHECK: [[X1:%[0-9]+]](s64) = COPY %d1 -; CHECK: [[X2:%[0-9]+]](s64) = COPY %d2 -; CHECK: [[Y0:%[0-9]+]](s32) = COPY %s6 -; CHECK: [[Y1:%[0-9]+]](s32) = COPY %s7 -; CHECK: [[Y2:%[0-9]+]](s32) = COPY %s8 -; CHECK: [[Z0_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Z0_ID]] -; CHECK: [[Z0:%[0-9]+]](s64) = G_LOAD [[Z0_FI]]{{.*}}load 8 -; CHECK: [[Z1_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Z1_ID]] -; CHECK: [[Z1:%[0-9]+]](s64) = G_LOAD [[Z1_FI]]{{.*}}load 8 -; CHECK: [[Z2_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Z2_ID]] -; CHECK: [[Z2:%[0-9]+]](s64) = G_LOAD [[Z2_FI]]{{.*}}load 8 -; CHECK: [[Z3_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Z3_ID]] -; CHECK: [[Z3:%[0-9]+]](s64) = G_LOAD [[Z3_FI]]{{.*}}load 8 -; CHECK: [[X_ARR:%[0-9]+]](s192) = G_MERGE_VALUES [[X0]](s64), [[X1]](s64), [[X2]](s64) -; CHECK: [[Y_ARR:%[0-9]+]](s96) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32), [[Y2]](s32) -; CHECK: [[Z_ARR:%[0-9]+]](s256) = G_MERGE_VALUES [[Z0]](s64), [[Z1]](s64), [[Z2]](s64), [[Z3]](s64) -; CHECK: ADJCALLSTACKDOWN 32, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[X0:%[0-9]+]](s64), [[X1:%[0-9]+]](s64), [[X2:%[0-9]+]](s64) = G_UNMERGE_VALUES [[X_ARR]](s192) -; CHECK: [[Y0:%[0-9]+]](s32), [[Y1:%[0-9]+]](s32), [[Y2:%[0-9]+]](s32) = G_UNMERGE_VALUES [[Y_ARR]](s96) -; CHECK: [[Z0:%[0-9]+]](s64), [[Z1:%[0-9]+]](s64), [[Z2:%[0-9]+]](s64), [[Z3:%[0-9]+]](s64) = G_UNMERGE_VALUES [[Z_ARR]](s256) -; CHECK: %d0 = COPY [[X0]](s64) -; CHECK: %d1 = COPY [[X1]](s64) -; CHECK: %d2 = COPY [[X2]](s64) -; CHECK: %s6 = COPY [[Y0]](s32) -; CHECK: %s7 = COPY [[Y1]](s32) -; CHECK: %s8 = COPY [[Y2]](s32) -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[Z0_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[Z0_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Z0_OFFSET]](s32) -; CHECK: G_STORE [[Z0]](s64), [[Z0_ADDR]](p0){{.*}}store 8 -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[Z1_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 8 -; CHECK: [[Z1_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Z1_OFFSET]](s32) -; CHECK: G_STORE [[Z1]](s64), [[Z1_ADDR]](p0){{.*}}store 8 -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[Z2_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 16 -; CHECK: [[Z2_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Z2_OFFSET]](s32) -; CHECK: G_STORE [[Z2]](s64), [[Z2_ADDR]](p0){{.*}}store 8 -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[Z3_OFFSET:%[0-9]+]](s32) = G_CONSTANT i32 24 -; CHECK: [[Z3_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Z3_OFFSET]](s32) -; CHECK: G_STORE [[Z3]](s64), [[Z3_ADDR]](p0){{.*}}store 8 -; CHECK: BLX @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit %d2, implicit %s6, implicit %s7, implicit %s8, implicit-def %s0, implicit-def %s1, implicit-def %s2, implicit-def %s3 -; CHECK: [[R0:%[0-9]+]](s32) = COPY %s0 -; CHECK: [[R1:%[0-9]+]](s32) = COPY %s1 -; CHECK: [[R2:%[0-9]+]](s32) = COPY %s2 -; CHECK: [[R3:%[0-9]+]](s32) = COPY %s3 -; CHECK: [[R_MERGED:%[0-9]+]](s128) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32) -; CHECK: ADJCALLSTACKUP 32, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32), [[R3:%[0-9]+]](s32) = G_UNMERGE_VALUES [[R_MERGED]](s128) -; CHECK: %s0 = COPY [[R0]] -; CHECK: %s1 = COPY [[R1]] -; CHECK: %s2 = COPY [[R2]] -; CHECK: %s3 = COPY [[R3]] -; CHECK: BX_RET 14, _, implicit %s0, implicit %s1, implicit %s2, implicit %s3 -entry: - %r = notail call arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double] %x, [3 x float] %y, [4 x double] %z) - ret [4 x float] %r -} - -declare arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr) - -define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) { -; CHECK-LABEL: name: test_tough_arrays -; CHECK: fixedStack: -; The parameters live in separate stack locations, one for each element that -; doesn't fit in the registers. -; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4, -; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 76, size: 4 -; CHECK: liveins: %r0, %r1, %r2, %r3 -; CHECK-DAG: [[R0:%[0-9]+]](s32) = COPY %r0 -; CHECK-DAG: [[R1:%[0-9]+]](s32) = COPY %r1 -; CHECK-DAG: [[R2:%[0-9]+]](s32) = COPY %r2 -; CHECK-DAG: [[R3:%[0-9]+]](s32) = COPY %r3 -; CHECK: [[FIRST_STACK_ELEMENT_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[FIRST_STACK_ID]] -; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]](s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]] -; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] -; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]](s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] -; CHECK: [[ARG_ARR:%[0-9]+]](s768) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) -; CHECK: ADJCALLSTACKDOWN 80, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32), [[R2:%[0-9]+]](s32), [[R3:%[0-9]+]](s32), [[FIRST_STACK_ELEMENT:%[0-9]+]](s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]](s32) = G_UNMERGE_VALUES [[ARG_ARR]](s768) -; CHECK: %r0 = COPY [[R0]] -; CHECK: %r1 = COPY [[R1]] -; CHECK: %r2 = COPY [[R2]] -; CHECK: %r3 = COPY [[R3]] -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF_FIRST_ELEMENT:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK: [[FIRST_STACK_ARG_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF_FIRST_ELEMENT]](s32) -; CHECK: G_STORE [[FIRST_STACK_ELEMENT]](s32), [[FIRST_STACK_ARG_ADDR]]{{.*}}store 4 -; Match the second-to-last offset, so we can get the correct SP for the last element -; CHECK: G_CONSTANT i32 72 -; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp -; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]](s32) = G_CONSTANT i32 76 -; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[OFF_LAST_ELEMENT]](s32) -; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4 -; CHECK: BLX @tough_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 -; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 -; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[RES_ARR:%[0-9]+]](s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: ADJCALLSTACKUP 80, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[RES_ARR]](s64) -; CHECK: %r0 = COPY [[R0]] -; CHECK: %r1 = COPY [[R1]] -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 -entry: - %r = notail call arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr) - ret [2 x i32*] %r -} - -declare arm_aapcscc {i32, i32} @structs_target({i32, i32}) - -define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x) { -; CHECK-LABEL: test_structs -; CHECK: liveins: %r0, %r1 -; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 -; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) -; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[X0:%[0-9]+]](s32), [[X1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[X]](s64) -; CHECK-DAG: %r0 = COPY [[X0]](s32) -; CHECK-DAG: %r1 = COPY [[X1]](s32) -; CHECK: BLX @structs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 -; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0 -; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[R:%[0-9]+]](s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) -; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp -; CHECK: [[R0:%[0-9]+]](s32), [[R1:%[0-9]+]](s32) = G_UNMERGE_VALUES [[R]](s64) -; CHECK: %r0 = COPY [[R0]](s32) -; CHECK: %r1 = COPY [[R1]](s32) -; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 - %r = notail call arm_aapcscc {i32, i32} @structs_target({i32, i32} %x) - ret {i32, i32} %r -} - define i32 @test_shufflevector_s32_v2s32(i32 %arg) { ; CHECK-LABEL: name: test_shufflevector_s32_v2s32 -; CHECK: [[ARG:%[0-9]+]](s32) = COPY %r0 -; CHECK-DAG: [[UNDEF:%[0-9]+]](s32) = G_IMPLICIT_DEF -; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32) -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], [[MASK]](<2 x s32>) +; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF +; CHECK-DAG: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[MASK:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], [[MASK]](<2 x s32>) ; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>) %vec = insertelement <1 x i32> undef, i32 %arg, i32 0 %shuffle = shufflevector <1 x i32> %vec, <1 x i32> undef, <2 x i32> zeroinitializer @@ -923,15 +450,15 @@ define i32 @test_shufflevector_s32_v2s32(i32 %arg) { define i32 @test_shufflevector_v2s32_v3s32(i32 %arg1, i32 %arg2) { ; CHECK-LABEL: name: test_shufflevector_v2s32_v3s32 -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0 -; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1 -; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF -; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK-DAG: [[MASK:%[0-9]+]](<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32), [[C1]](s32) -; CHECK-DAG: [[V1:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32) -; CHECK-DAG: [[V2:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32) -; CHECK: [[VEC:%[0-9]+]](<3 x s32>) = G_SHUFFLE_VECTOR [[V2]](<2 x s32>), [[UNDEF]], [[MASK]](<3 x s32>) +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[ARG2:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF +; CHECK-DAG: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK-DAG: [[MASK:%[0-9]+]]:_(<3 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C0]](s32), [[C1]](s32) +; CHECK-DAG: [[V1:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32) +; CHECK-DAG: [[V2:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<3 x s32>) = G_SHUFFLE_VECTOR [[V2]](<2 x s32>), [[UNDEF]], [[MASK]](<3 x s32>) ; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<3 x s32>) %v1 = insertelement <2 x i32> undef, i32 %arg1, i32 0 %v2 = insertelement <2 x i32> %v1, i32 %arg2, i32 1 @@ -943,15 +470,15 @@ define i32 @test_shufflevector_v2s32_v3s32(i32 %arg1, i32 %arg2) { define i32 @test_shufflevector_v2s32_v4s32(i32 %arg1, i32 %arg2) { ; CHECK-LABEL: name: test_shufflevector_v2s32_v4s32 -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0 -; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1 -; CHECK-DAG: [[UNDEF:%[0-9]+]](<2 x s32>) = G_IMPLICIT_DEF -; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK-DAG: [[MASK:%[0-9]+]](<4 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32), [[C0]](s32), [[C0]](s32) -; CHECK-DAG: [[V1:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32) -; CHECK-DAG: [[V2:%[0-9]+]](<2 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32) -; CHECK: [[VEC:%[0-9]+]](<4 x s32>) = G_SHUFFLE_VECTOR [[V2]](<2 x s32>), [[UNDEF]], [[MASK]](<4 x s32>) +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[ARG2:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF +; CHECK-DAG: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK-DAG: [[MASK:%[0-9]+]]:_(<4 x s32>) = G_MERGE_VALUES [[C0]](s32), [[C0]](s32), [[C0]](s32), [[C0]](s32) +; CHECK-DAG: [[V1:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32) +; CHECK-DAG: [[V2:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[V2]](<2 x s32>), [[UNDEF]], [[MASK]](<4 x s32>) ; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<4 x s32>) %v1 = insertelement <2 x i32> undef, i32 %arg1, i32 0 %v2 = insertelement <2 x i32> %v1, i32 %arg2, i32 1 @@ -962,21 +489,21 @@ define i32 @test_shufflevector_v2s32_v4s32(i32 %arg1, i32 %arg2) { define i32 @test_shufflevector_v4s32_v2s32(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4) { ; CHECK-LABEL: name: test_shufflevector_v4s32_v2s32 -; CHECK: [[ARG1:%[0-9]+]](s32) = COPY %r0 -; CHECK: [[ARG2:%[0-9]+]](s32) = COPY %r1 -; CHECK: [[ARG3:%[0-9]+]](s32) = COPY %r2 -; CHECK: [[ARG4:%[0-9]+]](s32) = COPY %r3 -; CHECK-DAG: [[UNDEF:%[0-9]+]](<4 x s32>) = G_IMPLICIT_DEF -; CHECK-DAG: [[C0:%[0-9]+]](s32) = G_CONSTANT i32 0 -; CHECK-DAG: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK-DAG: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 -; CHECK-DAG: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3 -; CHECK-DAG: [[MASK:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C3]](s32) -; CHECK-DAG: [[V1:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32) -; CHECK-DAG: [[V2:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32) -; CHECK-DAG: [[V3:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[V2]], [[ARG3]](s32), [[C2]](s32) -; CHECK-DAG: [[V4:%[0-9]+]](<4 x s32>) = G_INSERT_VECTOR_ELT [[V3]], [[ARG4]](s32), [[C3]](s32) -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_SHUFFLE_VECTOR [[V4]](<4 x s32>), [[UNDEF]], [[MASK]](<2 x s32>) +; CHECK: [[ARG1:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[ARG2:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[ARG3:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK: [[ARG4:%[0-9]+]]:_(s32) = COPY %r3 +; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF +; CHECK-DAG: [[C0:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK-DAG: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK-DAG: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +; CHECK-DAG: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 +; CHECK-DAG: [[MASK:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C3]](s32) +; CHECK-DAG: [[V1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[UNDEF]], [[ARG1]](s32), [[C0]](s32) +; CHECK-DAG: [[V2:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[V1]], [[ARG2]](s32), [[C1]](s32) +; CHECK-DAG: [[V3:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[V2]], [[ARG3]](s32), [[C2]](s32) +; CHECK-DAG: [[V4:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[V3]], [[ARG4]](s32), [[C3]](s32) +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[V4]](<4 x s32>), [[UNDEF]], [[MASK]](<2 x s32>) ; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>) %v1 = insertelement <4 x i32> undef, i32 %arg1, i32 0 %v2 = insertelement <4 x i32> %v1, i32 %arg2, i32 1 @@ -991,9 +518,9 @@ define i32 @test_shufflevector_v4s32_v2s32(i32 %arg1, i32 %arg2, i32 %arg3, i32 define i32 @test_constantstruct_v2s32() { ; CHECK-LABEL: name: test_constantstruct_v2s32 -; CHECK: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) +; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) ; CHECK: G_EXTRACT_VECTOR_ELT [[VEC]](<2 x s32>) %vec = extractvalue %struct.v2s32 {<2 x i32>}, 0 %elt = extractelement <2 x i32> %vec, i32 0 @@ -1004,16 +531,16 @@ define i32 @test_constantstruct_v2s32() { define i32 @test_constantstruct_v2s32_s32_s32() { ; CHECK-LABEL: name: test_constantstruct_v2s32_s32_s32 -; CHECK: [[C1:%[0-9]+]](s32) = G_CONSTANT i32 1 -; CHECK: [[C2:%[0-9]+]](s32) = G_CONSTANT i32 2 -; CHECK: [[VEC:%[0-9]+]](<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) -; CHECK: [[C3:%[0-9]+]](s32) = G_CONSTANT i32 3 -; CHECK: [[C4:%[0-9]+]](s32) = G_CONSTANT i32 4 -; CHECK: [[C5:%[0-9]+]](s128) = G_IMPLICIT_DEF -; CHECK: [[C6:%[0-9]+]](s128) = G_INSERT [[C5]], [[VEC]](<2 x s32>), 0 -; CHECK: [[C7:%[0-9]+]](s128) = G_INSERT [[C6]], [[C3]](s32), 64 -; CHECK: [[C8:%[0-9]+]](s128) = G_INSERT [[C7]], [[C4]](s32), 96 -; CHECK: [[EXT:%[0-9]+]](<2 x s32>) = G_EXTRACT [[C8]](s128), 0 +; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 +; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_MERGE_VALUES [[C1]](s32), [[C2]](s32) +; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 +; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 +; CHECK: [[C5:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF +; CHECK: [[C6:%[0-9]+]]:_(s128) = G_INSERT [[C5]], [[VEC]](<2 x s32>), 0 +; CHECK: [[C7:%[0-9]+]]:_(s128) = G_INSERT [[C6]], [[C3]](s32), 64 +; CHECK: [[C8:%[0-9]+]]:_(s128) = G_INSERT [[C7]], [[C4]](s32), 96 +; CHECK: [[EXT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[C8]](s128), 0 ; CHECK: G_EXTRACT_VECTOR_ELT [[EXT]](<2 x s32>) %vec = extractvalue %struct.v2s32.s32.s32 {<2 x i32>, i32 3, i32 4}, 0 %elt = extractelement <2 x i32> %vec, i32 0 diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel-divmod.ll b/test/CodeGen/ARM/GlobalISel/arm-isel-divmod.ll index c2e8c5abca4e1..5d83adeb42a81 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-isel-divmod.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-isel-divmod.ll @@ -1,4 +1,3 @@ -; We use V6 ops so we can easily check for the extensions (sxth vs bit tricks). ; RUN: llc -mtriple arm-gnueabi -mattr=+v6,+hwdiv-arm -global-isel %s -o - | FileCheck %s -check-prefixes=CHECK,HWDIV ; RUN: llc -mtriple arm-gnueabi -mattr=+v6,-hwdiv-arm -global-isel %s -o - | FileCheck %s -check-prefixes=CHECK,SOFT-AEABI ; RUN: llc -mtriple arm-gnu -mattr=+v6,+hwdiv-arm -global-isel %s -o - | FileCheck %s -check-prefixes=CHECK,HWDIV @@ -6,136 +5,108 @@ define arm_aapcscc i32 @test_sdiv_i32(i32 %a, i32 %b) { ; CHECK-LABEL: test_sdiv_i32: -; HWDIV: sdiv r0, r0, r1 -; SOFT-AEABI: blx __aeabi_idiv -; SOFT-DEFAULT: blx __divsi3 +; HWDIV: sdiv +; SOFT-AEABI: bl __aeabi_idiv +; SOFT-DEFAULT: bl __divsi3 %r = sdiv i32 %a, %b ret i32 %r } define arm_aapcscc i32 @test_udiv_i32(i32 %a, i32 %b) { ; CHECK-LABEL: test_udiv_i32: -; HWDIV: udiv r0, r0, r1 -; SOFT-AEABI: blx __aeabi_uidiv -; SOFT-DEFAULT: blx __udivsi3 +; HWDIV: udiv +; SOFT-AEABI: bl __aeabi_uidiv +; SOFT-DEFAULT: bl __udivsi3 %r = udiv i32 %a, %b ret i32 %r } define arm_aapcscc i16 @test_sdiv_i16(i16 %a, i16 %b) { ; CHECK-LABEL: test_sdiv_i16: -; CHECK-DAG: sxth r0, r0 -; CHECK-DAG: sxth r1, r1 -; HWDIV: sdiv r0, r0, r1 -; SOFT-AEABI: blx __aeabi_idiv -; SOFT-DEFAULT: blx __divsi3 +; HWDIV: sdiv +; SOFT-AEABI: bl __aeabi_idiv +; SOFT-DEFAULT: bl __divsi3 %r = sdiv i16 %a, %b ret i16 %r } define arm_aapcscc i16 @test_udiv_i16(i16 %a, i16 %b) { ; CHECK-LABEL: test_udiv_i16: -; CHECK-DAG: uxth r0, r0 -; CHECK-DAG: uxth r1, r1 -; HWDIV: udiv r0, r0, r1 -; SOFT-AEABI: blx __aeabi_uidiv -; SOFT-DEFAULT: blx __udivsi3 +; HWDIV: udiv +; SOFT-AEABI: bl __aeabi_uidiv +; SOFT-DEFAULT: bl __udivsi3 %r = udiv i16 %a, %b ret i16 %r } define arm_aapcscc i8 @test_sdiv_i8(i8 %a, i8 %b) { ; CHECK-LABEL: test_sdiv_i8: -; CHECK-DAG: sxtb r0, r0 -; CHECK-DAG: sxtb r1, r1 -; HWDIV: sdiv r0, r0, r1 -; SOFT-AEABI: blx __aeabi_idiv -; SOFT-DEFAULT: blx __divsi3 +; HWDIV: sdiv +; SOFT-AEABI: bl __aeabi_idiv +; SOFT-DEFAULT: bl __divsi3 %r = sdiv i8 %a, %b ret i8 %r } define arm_aapcscc i8 @test_udiv_i8(i8 %a, i8 %b) { ; CHECK-LABEL: test_udiv_i8: -; CHECK-DAG: uxtb r0, r0 -; CHECK-DAG: uxtb r1, r1 -; HWDIV: udiv r0, r0, r1 -; SOFT-AEABI: blx __aeabi_uidiv -; SOFT-DEFAULT: blx __udivsi3 +; HWDIV: udiv +; SOFT-AEABI: bl __aeabi_uidiv +; SOFT-DEFAULT: bl __udivsi3 %r = udiv i8 %a, %b ret i8 %r } define arm_aapcscc i32 @test_srem_i32(i32 %x, i32 %y) { ; CHECK-LABEL: test_srem_i32: -; HWDIV: sdiv [[Q:r[0-9]+]], r0, r1 -; HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 -; HWDIV: sub r0, r0, [[P]] -; SOFT-AEABI: blx __aeabi_idivmod -; SOFT-DEFAULT: blx __modsi3 +; HWDIV: sdiv +; SOFT-AEABI: bl __aeabi_idivmod +; SOFT-DEFAULT: bl __modsi3 %r = srem i32 %x, %y ret i32 %r } define arm_aapcscc i32 @test_urem_i32(i32 %x, i32 %y) { ; CHECK-LABEL: test_urem_i32: -; HWDIV: udiv [[Q:r[0-9]+]], r0, r1 -; HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 -; HWDIV: sub r0, r0, [[P]] -; SOFT-AEABI: blx __aeabi_uidivmod -; SOFT-DEFAULT: blx __umodsi3 +; HWDIV: udiv +; SOFT-AEABI: bl __aeabi_uidivmod +; SOFT-DEFAULT: bl __umodsi3 %r = urem i32 %x, %y ret i32 %r } define arm_aapcscc i16 @test_srem_i16(i16 %x, i16 %y) { ; CHECK-LABEL: test_srem_i16: -; CHECK-DAG: sxth r0, r0 -; CHECK-DAG: sxth r1, r1 -; HWDIV: sdiv [[Q:r[0-9]+]], r0, r1 -; HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 -; HWDIV: sub r0, r0, [[P]] -; SOFT-AEABI: blx __aeabi_idivmod -; SOFT-DEFAULT: blx __modsi3 +; HWDIV: sdiv +; SOFT-AEABI: bl __aeabi_idivmod +; SOFT-DEFAULT: bl __modsi3 %r = srem i16 %x, %y ret i16 %r } define arm_aapcscc i16 @test_urem_i16(i16 %x, i16 %y) { ; CHECK-LABEL: test_urem_i16: -; CHECK-DAG: uxth r0, r0 -; CHECK-DAG: uxth r1, r1 -; HWDIV: udiv [[Q:r[0-9]+]], r0, r1 -; HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 -; HWDIV: sub r0, r0, [[P]] -; SOFT-AEABI: blx __aeabi_uidivmod -; SOFT-DEFAULT: blx __umodsi3 +; HWDIV: udiv +; SOFT-AEABI: bl __aeabi_uidivmod +; SOFT-DEFAULT: bl __umodsi3 %r = urem i16 %x, %y ret i16 %r } define arm_aapcscc i8 @test_srem_i8(i8 %x, i8 %y) { ; CHECK-LABEL: test_srem_i8: -; CHECK-DAG: sxtb r0, r0 -; CHECK-DAG: sxtb r1, r1 -; HWDIV: sdiv [[Q:r[0-9]+]], r0, r1 -; HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 -; HWDIV: sub r0, r0, [[P]] -; SOFT-AEABI: blx __aeabi_idivmod -; SOFT-DEFAULT: blx __modsi3 +; HWDIV: sdiv +; SOFT-AEABI: bl __aeabi_idivmod +; SOFT-DEFAULT: bl __modsi3 %r = srem i8 %x, %y ret i8 %r } define arm_aapcscc i8 @test_urem_i8(i8 %x, i8 %y) { ; CHECK-LABEL: test_urem_i8: -; CHECK-DAG: uxtb r0, r0 -; CHECK-DAG: uxtb r1, r1 -; HWDIV: udiv [[Q:r[0-9]+]], r0, r1 -; HWDIV: mul [[P:r[0-9]+]], [[Q]], r1 -; HWDIV: sub r0, r0, [[P]] -; SOFT-AEABI: blx __aeabi_uidivmod -; SOFT-DEFAULT: blx __umodsi3 +; HWDIV: udiv +; SOFT-AEABI: bl __aeabi_uidivmod +; SOFT-DEFAULT: bl __umodsi3 %r = urem i8 %x, %y ret i8 %r } diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll b/test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll index 98b39e444ac77..3fd3de2db867f 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-isel-fp.ll @@ -4,14 +4,14 @@ define arm_aapcscc float @test_frem_float(float %x, float %y) { ; CHECK-LABEL: test_frem_float: -; CHECK: blx fmodf +; CHECK: bl fmodf %r = frem float %x, %y ret float %r } define arm_aapcscc double @test_frem_double(double %x, double %y) { ; CHECK-LABEL: test_frem_double: -; CHECK: blx fmod +; CHECK: bl fmod %r = frem double %x, %y ret double %r } @@ -19,7 +19,7 @@ define arm_aapcscc double @test_frem_double(double %x, double %y) { declare float @llvm.pow.f32(float %x, float %y) define arm_aapcscc float @test_fpow_float(float %x, float %y) { ; CHECK-LABEL: test_fpow_float: -; CHECK: blx powf +; CHECK: bl powf %r = call float @llvm.pow.f32(float %x, float %y) ret float %r } @@ -27,7 +27,7 @@ define arm_aapcscc float @test_fpow_float(float %x, float %y) { declare double @llvm.pow.f64(double %x, double %y) define arm_aapcscc double @test_fpow_double(double %x, double %y) { ; CHECK-LABEL: test_fpow_double: -; CHECK: blx pow +; CHECK: bl pow %r = call double @llvm.pow.f64(double %x, double %y) ret double %r } @@ -35,8 +35,8 @@ define arm_aapcscc double @test_fpow_double(double %x, double %y) { define arm_aapcscc float @test_add_float(float %x, float %y) { ; CHECK-LABEL: test_add_float: ; HARD: vadd.f32 -; SOFT-AEABI: blx __aeabi_fadd -; SOFT-DEFAULT: blx __addsf3 +; SOFT-AEABI: bl __aeabi_fadd +; SOFT-DEFAULT: bl __addsf3 %r = fadd float %x, %y ret float %r } @@ -44,8 +44,8 @@ define arm_aapcscc float @test_add_float(float %x, float %y) { define arm_aapcscc double @test_add_double(double %x, double %y) { ; CHECK-LABEL: test_add_double: ; HARD: vadd.f64 -; SOFT-AEABI: blx __aeabi_dadd -; SOFT-DEFAULT: blx __adddf3 +; SOFT-AEABI: bl __aeabi_dadd +; SOFT-DEFAULT: bl __adddf3 %r = fadd double %x, %y ret double %r } @@ -55,8 +55,8 @@ define arm_aapcs_vfpcc i32 @test_cmp_float_ogt(float %x, float %y) { ; HARD: vcmp.f32 ; HARD: vmrs APSR_nzcv, fpscr ; HARD-NEXT: movgt -; SOFT-AEABI: blx __aeabi_fcmpgt -; SOFT-DEFAULT: blx __gtsf2 +; SOFT-AEABI: bl __aeabi_fcmpgt +; SOFT-DEFAULT: bl __gtsf2 entry: %v = fcmp ogt float %x, %y %r = zext i1 %v to i32 @@ -70,10 +70,10 @@ define arm_aapcs_vfpcc i32 @test_cmp_float_one(float %x, float %y) { ; HARD: movgt ; HARD-NOT: vcmp ; HARD: movmi -; SOFT-AEABI-DAG: blx __aeabi_fcmpgt -; SOFT-AEABI-DAG: blx __aeabi_fcmplt -; SOFT-DEFAULT-DAG: blx __gtsf2 -; SOFT-DEFAULT-DAG: blx __ltsf2 +; SOFT-AEABI-DAG: bl __aeabi_fcmpgt +; SOFT-AEABI-DAG: bl __aeabi_fcmplt +; SOFT-DEFAULT-DAG: bl __gtsf2 +; SOFT-DEFAULT-DAG: bl __ltsf2 entry: %v = fcmp one float %x, %y %r = zext i1 %v to i32 diff --git a/test/CodeGen/ARM/GlobalISel/arm-isel.ll b/test/CodeGen/ARM/GlobalISel/arm-isel.ll index 419bcf71c1065..3582122ba0578 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ b/test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -388,8 +388,6 @@ entry: define arm_aapcscc i32 @test_cmp_i16_slt(i16 %a, i16 %b) { ; CHECK-LABEL: test_cmp_i16_slt: -; CHECK-DAG: sxth r0, r0 -; CHECK-DAG: sxth r1, r1 ; CHECK-DAG: mov [[V:r[0-9]+]], #0 ; CHECK: cmp r0, r1 ; CHECK: movlt [[V]], #1 @@ -441,9 +439,9 @@ define arm_aapcscc void @test_brcond(i32 %n) { ; CHECK-NEXT: movgt [[RCMP:r[0-9]+]], #1 ; CHECK: tst [[RCMP]], #1 ; CHECK-NEXT: bne [[FALSE:.L[[:alnum:]_]+]] -; CHECK: blx brcond1 +; CHECK: bl brcond1 ; CHECK: [[FALSE]]: -; CHECK: blx brcond2 +; CHECK: bl brcond2 entry: %cmp = icmp sgt i32 %n, 0 br i1 %cmp, label %if.true, label %if.false diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir index f436c3774c869..996f5406b160f 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-divmod.mir @@ -37,19 +37,19 @@ body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; HWDIV: [[R:%[0-9]+]](s32) = G_SDIV [[X]], [[Y]] + ; HWDIV: [[R:%[0-9]+]]:_(s32) = G_SDIV [[X]], [[Y]] ; SOFT-NOT: G_SDIV ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-AEABI: [[R:%[0-9]+]](s32) = COPY %r0 - ; SOFT-DEFAULT: BLX $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s32) = COPY %r0 + ; SOFT-DEFAULT: BL $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SDIV %2(s32) = G_SDIV %0, %1 @@ -73,19 +73,19 @@ body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; HWDIV: [[R:%[0-9]+]](s32) = G_UDIV [[X]], [[Y]] + ; HWDIV: [[R:%[0-9]+]]:_(s32) = G_UDIV [[X]], [[Y]] ; SOFT-NOT: G_UDIV ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-AEABI: [[R:%[0-9]+]](s32) = COPY %r0 - ; SOFT-DEFAULT: BLX $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s32) = COPY %r0 + ; SOFT-DEFAULT: BL $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UDIV %2(s32) = G_UDIV %0, %1 @@ -105,32 +105,47 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s16) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s16) = COPY %r1 - ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_SEXT [[X]](s16) - ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_SEXT [[Y]](s16) - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 - ; HWDIV: [[R32:%[0-9]+]](s32) = G_SDIV [[X32]], [[Y32]] + ; CHECK-DAG: [[R0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r1 + ; The G_TRUNC will combine with the extensions introduced by the legalizer, + ; leading to the following complicated sequences. + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY [[R0]] + ; CHECK: [[SHIFTEDX:%[0-9]+]]:_(s32) = G_SHL [[X]], [[BITS]] + ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDX]], [[BITS]] + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[Y:%[0-9]+]]:_(s32) = COPY [[R1]] + ; CHECK: [[SHIFTEDY:%[0-9]+]]:_(s32) = G_SHL [[Y]], [[BITS]] + ; CHECK: [[Y32:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDY]], [[BITS]] + %0(s32) = COPY %r0 + %1(s16) = G_TRUNC %0(s32) + %2(s32) = COPY %r1 + %3(s16) = G_TRUNC %2(s32) + ; HWDIV: [[R32:%[0-9]+]]:_(s32) = G_SDIV [[X32]], [[Y32]] ; SOFT-NOT: G_SDIV ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BLX $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r0 - ; SOFT-DEFAULT: BLX $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r0 + ; SOFT-DEFAULT: BL $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SDIV - ; CHECK: [[R:%[0-9]+]](s16) = G_TRUNC [[R32]] + ; CHECK: [[R16:%[0-9]+]]:_(s16) = G_TRUNC [[R32]] + ; CHECK: [[R:%[0-9]+]]:_(s32) = G_SEXT [[R16]] ; SOFT-NOT: G_SDIV - %2(s16) = G_SDIV %0, %1 + %4(s16) = G_SDIV %1, %3 ; CHECK: %r0 = COPY [[R]] - %r0 = COPY %2(s16) + %5(s32) = G_SEXT %4(s16) + %r0 = COPY %5(s32) BX_RET 14, _, implicit %r0 ... --- @@ -145,32 +160,45 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s16) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s16) = COPY %r1 - ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_ZEXT [[X]](s16) - ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_ZEXT [[Y]](s16) - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 - ; HWDIV: [[R32:%[0-9]+]](s32) = G_UDIV [[X32]], [[Y32]] + ; CHECK-DAG: [[R0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r1 + ; The G_TRUNC will combine with the extensions introduced by the legalizer, + ; leading to the following complicated sequences. + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY [[R0]] + ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_AND [[X]], [[BITS]] + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[Y:%[0-9]+]]:_(s32) = COPY [[R1]] + ; CHECK: [[Y32:%[0-9]+]]:_(s32) = G_AND [[Y]], [[BITS]] + %0(s32) = COPY %r0 + %1(s16) = G_TRUNC %0(s32) + %2(s32) = COPY %r1 + %3(s16) = G_TRUNC %2(s32) + ; HWDIV: [[R32:%[0-9]+]]:_(s32) = G_UDIV [[X32]], [[Y32]] ; SOFT-NOT: G_UDIV ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BLX $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r0 - ; SOFT-DEFAULT: BLX $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r0 + ; SOFT-DEFAULT: BL $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UDIV - ; CHECK: [[R:%[0-9]+]](s16) = G_TRUNC [[R32]] + ; CHECK: [[R16:%[0-9]+]]:_(s16) = G_TRUNC [[R32]] + ; CHECK: [[R:%[0-9]+]]:_(s32) = G_ZEXT [[R16]] ; SOFT-NOT: G_UDIV - %2(s16) = G_UDIV %0, %1 + %4(s16) = G_UDIV %1, %3 ; CHECK: %r0 = COPY [[R]] - %r0 = COPY %2(s16) + %5(s32) = G_ZEXT %4(s16) + %r0 = COPY %5(s32) BX_RET 14, _, implicit %r0 ... --- @@ -185,32 +213,47 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s8) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s8) = COPY %r1 - ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_SEXT [[X]](s8) - ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_SEXT [[Y]](s8) - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 - ; HWDIV: [[R32:%[0-9]+]](s32) = G_SDIV [[X32]], [[Y32]] + ; CHECK-DAG: [[R0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r1 + ; The G_TRUNC will combine with the extensions introduced by the legalizer, + ; leading to the following complicated sequences. + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY [[R0]] + ; CHECK: [[SHIFTEDX:%[0-9]+]]:_(s32) = G_SHL [[X]], [[BITS]] + ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDX]], [[BITS]] + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[Y:%[0-9]+]]:_(s32) = COPY [[R1]] + ; CHECK: [[SHIFTEDY:%[0-9]+]]:_(s32) = G_SHL [[Y]], [[BITS]] + ; CHECK: [[Y32:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDY]], [[BITS]] + %0(s32) = COPY %r0 + %1(s8) = G_TRUNC %0(s32) + %2(s32) = COPY %r1 + %3(s8) = G_TRUNC %2(s32) + ; HWDIV: [[R32:%[0-9]+]]:_(s32) = G_SDIV [[X32]], [[Y32]] ; SOFT-NOT: G_SDIV ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BLX $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r0 - ; SOFT-DEFAULT: BLX $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_idiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r0 + ; SOFT-DEFAULT: BL $__divsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SDIV - ; CHECK: [[R:%[0-9]+]](s8) = G_TRUNC [[R32]] + ; CHECK: [[R8:%[0-9]+]]:_(s8) = G_TRUNC [[R32]] + ; CHECK: [[R:%[0-9]+]]:_(s32) = G_SEXT [[R8]] ; SOFT-NOT: G_SDIV - %2(s8) = G_SDIV %0, %1 + %4(s8) = G_SDIV %1, %3 ; CHECK: %r0 = COPY [[R]] - %r0 = COPY %2(s8) + %5(s32) = G_SEXT %4(s8) + %r0 = COPY %5(s32) BX_RET 14, _, implicit %r0 ... --- @@ -225,32 +268,45 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s8) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s8) = COPY %r1 - ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_ZEXT [[X]](s8) - ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_ZEXT [[Y]](s8) - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 - ; HWDIV: [[R32:%[0-9]+]](s32) = G_UDIV [[X32]], [[Y32]] + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 + ; The G_TRUNC will combine with the extensions introduced by the legalizer, + ; leading to the following complicated sequences. + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY [[R0]] + ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_AND [[X]], [[BITS]] + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[Y:%[0-9]+]]:_(s32) = COPY [[R1]] + ; CHECK: [[Y32:%[0-9]+]]:_(s32) = G_AND [[Y]], [[BITS]] + %0(s32) = COPY %r0 + %1(s8) = G_TRUNC %0(s32) + %2(s32) = COPY %r1 + %3(s8) = G_TRUNC %2(s32) + ; HWDIV: [[R32:%[0-9]+]]:_(s32) = G_UDIV [[X32]], [[Y32]] ; SOFT-NOT: G_UDIV ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BLX $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r0 - ; SOFT-DEFAULT: BLX $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_uidiv, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r0 + ; SOFT-DEFAULT: BL $__udivsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UDIV - ; CHECK: [[R:%[0-9]+]](s8) = G_TRUNC [[R32]] + ; CHECK: [[R8:%[0-9]+]]:_(s8) = G_TRUNC [[R32]] + ; CHECK: [[R:%[0-9]+]]:_(s32) = G_ZEXT [[R8]] ; SOFT-NOT: G_UDIV - %2(s8) = G_UDIV %0, %1 + %4(s8) = G_UDIV %1, %3 ; CHECK: %r0 = COPY [[R]] - %r0 = COPY %2(s8) + %5(s32) = G_ZEXT %4(s8) + %r0 = COPY %5(s32) BX_RET 14, _, implicit %r0 ... --- @@ -269,21 +325,21 @@ body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; HWDIV: [[Q:%[0-9]+]](s32) = G_SDIV [[X]], [[Y]] - ; HWDIV: [[P:%[0-9]+]](s32) = G_MUL [[Q]], [[Y]] - ; HWDIV: [[R:%[0-9]+]](s32) = G_SUB [[X]], [[P]] + ; HWDIV: [[Q:%[0-9]+]]:_(s32) = G_SDIV [[X]], [[Y]] + ; HWDIV: [[P:%[0-9]+]]:_(s32) = G_MUL [[Q]], [[Y]] + ; HWDIV: [[R:%[0-9]+]]:_(s32) = G_SUB [[X]], [[P]] ; SOFT-NOT: G_SREM ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 - ; SOFT-AEABI: [[R:%[0-9]+]](s32) = COPY %r1 - ; SOFT-DEFAULT: BLX $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s32) = COPY %r1 + ; SOFT-DEFAULT: BL $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SREM %2(s32) = G_SREM %0, %1 @@ -307,21 +363,21 @@ body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; HWDIV: [[Q:%[0-9]+]](s32) = G_UDIV [[X]], [[Y]] - ; HWDIV: [[P:%[0-9]+]](s32) = G_MUL [[Q]], [[Y]] - ; HWDIV: [[R:%[0-9]+]](s32) = G_SUB [[X]], [[P]] + ; HWDIV: [[Q:%[0-9]+]]:_(s32) = G_UDIV [[X]], [[Y]] + ; HWDIV: [[P:%[0-9]+]]:_(s32) = G_MUL [[Q]], [[Y]] + ; HWDIV: [[R:%[0-9]+]]:_(s32) = G_SUB [[X]], [[P]] ; SOFT-NOT: G_UREM ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 - ; SOFT-AEABI: [[R:%[0-9]+]](s32) = COPY %r1 - ; SOFT-DEFAULT: BLX $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s32) = COPY %r1 + ; SOFT-DEFAULT: BL $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UREM %2(s32) = G_UREM %0, %1 @@ -341,34 +397,49 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s16) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s16) = COPY %r1 - ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_SEXT [[X]](s16) - ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_SEXT [[Y]](s16) - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 - ; HWDIV: [[Q32:%[0-9]+]](s32) = G_SDIV [[X32]], [[Y32]] - ; HWDIV: [[P32:%[0-9]+]](s32) = G_MUL [[Q32]], [[Y32]] - ; HWDIV: [[R32:%[0-9]+]](s32) = G_SUB [[X32]], [[P32]] + ; CHECK-DAG: [[R0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r1 + ; The G_TRUNC will combine with the extensions introduced by the legalizer, + ; leading to the following complicated sequences. + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY [[R0]] + ; CHECK: [[SHIFTEDX:%[0-9]+]]:_(s32) = G_SHL [[X]], [[BITS]] + ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDX]], [[BITS]] + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[Y:%[0-9]+]]:_(s32) = COPY [[R1]] + ; CHECK: [[SHIFTEDY:%[0-9]+]]:_(s32) = G_SHL [[Y]], [[BITS]] + ; CHECK: [[Y32:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDY]], [[BITS]] + %0(s32) = COPY %r0 + %1(s16) = G_TRUNC %0(s32) + %2(s32) = COPY %r1 + %3(s16) = G_TRUNC %2(s32) + ; HWDIV: [[Q32:%[0-9]+]]:_(s32) = G_SDIV [[X32]], [[Y32]] + ; HWDIV: [[P32:%[0-9]+]]:_(s32) = G_MUL [[Q32]], [[Y32]] + ; HWDIV: [[R32:%[0-9]+]]:_(s32) = G_SUB [[X32]], [[P32]] ; SOFT-NOT: G_SREM ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BLX $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r1 - ; SOFT-DEFAULT: BLX $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r1 + ; SOFT-DEFAULT: BL $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SREM - ; CHECK: [[R:%[0-9]+]](s16) = G_TRUNC [[R32]] + ; CHECK: [[R16:%[0-9]+]]:_(s16) = G_TRUNC [[R32]] + ; CHECK: [[R:%[0-9]+]]:_(s32) = G_SEXT [[R16]] ; SOFT-NOT: G_SREM - %2(s16) = G_SREM %0, %1 + %4(s16) = G_SREM %1, %3 ; CHECK: %r0 = COPY [[R]] - %r0 = COPY %2(s16) + %5(s32) = G_SEXT %4(s16) + %r0 = COPY %5(s32) BX_RET 14, _, implicit %r0 ... --- @@ -383,34 +454,47 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s16) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s16) = COPY %r1 - ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_ZEXT [[X]](s16) - ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_ZEXT [[Y]](s16) - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 - ; HWDIV: [[Q32:%[0-9]+]](s32) = G_UDIV [[X32]], [[Y32]] - ; HWDIV: [[P32:%[0-9]+]](s32) = G_MUL [[Q32]], [[Y32]] - ; HWDIV: [[R32:%[0-9]+]](s32) = G_SUB [[X32]], [[P32]] + ; CHECK-DAG: [[R0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r1 + ; The G_TRUNC will combine with the extensions introduced by the legalizer, + ; leading to the following complicated sequences. + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY [[R0]] + ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_AND [[X]], [[BITS]] + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK: [[Y:%[0-9]+]]:_(s32) = COPY [[R1]] + ; CHECK: [[Y32:%[0-9]+]]:_(s32) = G_AND [[Y]], [[BITS]] + %0(s32) = COPY %r0 + %1(s16) = G_TRUNC %0(s32) + %2(s32) = COPY %r1 + %3(s16) = G_TRUNC %2(s32) + ; HWDIV: [[Q32:%[0-9]+]]:_(s32) = G_UDIV [[X32]], [[Y32]] + ; HWDIV: [[P32:%[0-9]+]]:_(s32) = G_MUL [[Q32]], [[Y32]] + ; HWDIV: [[R32:%[0-9]+]]:_(s32) = G_SUB [[X32]], [[P32]] ; SOFT-NOT: G_UREM ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BLX $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r1 - ; SOFT-DEFAULT: BLX $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r1 + ; SOFT-DEFAULT: BL $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UREM - ; CHECK: [[R:%[0-9]+]](s16) = G_TRUNC [[R32]] + ; CHECK: [[R16:%[0-9]+]]:_(s16) = G_TRUNC [[R32]] + ; CHECK: [[R:%[0-9]+]]:_(s32) = G_ZEXT [[R16]] ; SOFT-NOT: G_UREM - %2(s16) = G_UREM %0, %1 + %4(s16) = G_UREM %1, %3 ; CHECK: %r0 = COPY [[R]] - %r0 = COPY %2(s16) + %5(s32) = G_ZEXT %4(s16) + %r0 = COPY %5(s32) BX_RET 14, _, implicit %r0 ... --- @@ -425,34 +509,49 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s8) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s8) = COPY %r1 - ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_SEXT [[X]](s8) - ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_SEXT [[Y]](s8) - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 - ; HWDIV: [[Q32:%[0-9]+]](s32) = G_SDIV [[X32]], [[Y32]] - ; HWDIV: [[P32:%[0-9]+]](s32) = G_MUL [[Q32]], [[Y32]] - ; HWDIV: [[R32:%[0-9]+]](s32) = G_SUB [[X32]], [[P32]] + ; CHECK-DAG: [[R0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r1 + ; The G_TRUNC will combine with the extensions introduced by the legalizer, + ; leading to the following complicated sequences. + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY [[R0]] + ; CHECK: [[SHIFTEDX:%[0-9]+]]:_(s32) = G_SHL [[X]], [[BITS]] + ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDX]], [[BITS]] + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[Y:%[0-9]+]]:_(s32) = COPY [[R1]] + ; CHECK: [[SHIFTEDY:%[0-9]+]]:_(s32) = G_SHL [[Y]], [[BITS]] + ; CHECK: [[Y32:%[0-9]+]]:_(s32) = G_ASHR [[SHIFTEDY]], [[BITS]] + %0(s32) = COPY %r0 + %1(s8) = G_TRUNC %0(s32) + %2(s32) = COPY %r1 + %3(s8) = G_TRUNC %2(s32) + ; HWDIV: [[Q32:%[0-9]+]]:_(s32) = G_SDIV [[X32]], [[Y32]] + ; HWDIV: [[P32:%[0-9]+]]:_(s32) = G_MUL [[Q32]], [[Y32]] + ; HWDIV: [[R32:%[0-9]+]]:_(s32) = G_SUB [[X32]], [[P32]] ; SOFT-NOT: G_SREM ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BLX $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r1 - ; SOFT-DEFAULT: BLX $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_idivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r1 + ; SOFT-DEFAULT: BL $__modsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_SREM - ; CHECK: [[R:%[0-9]+]](s8) = G_TRUNC [[R32]] + ; CHECK: [[R8:%[0-9]+]]:_(s8) = G_TRUNC [[R32]] + ; CHECK: [[R:%[0-9]+]]:_(s32) = G_SEXT [[R8]] ; SOFT-NOT: G_SREM - %2(s8) = G_SREM %0, %1 + %4(s8) = G_SREM %1, %3 ; CHECK: %r0 = COPY [[R]] - %r0 = COPY %2(s8) + %5(s32) = G_SEXT %4(s8) + %r0 = COPY %5(s32) BX_RET 14, _, implicit %r0 ... --- @@ -467,33 +566,46 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s8) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s8) = COPY %r1 - ; CHECK-DAG: [[X32:%[0-9]+]](s32) = G_ZEXT [[X]](s8) - ; CHECK-DAG: [[Y32:%[0-9]+]](s32) = G_ZEXT [[Y]](s8) - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 - ; HWDIV: [[Q32:%[0-9]+]](s32) = G_UDIV [[X32]], [[Y32]] - ; HWDIV: [[P32:%[0-9]+]](s32) = G_MUL [[Q32]], [[Y32]] - ; HWDIV: [[R32:%[0-9]+]](s32) = G_SUB [[X32]], [[P32]] + ; CHECK-DAG: [[R0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r1 + ; The G_TRUNC will combine with the extensions introduced by the legalizer, + ; leading to the following complicated sequences. + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[X:%[0-9]+]]:_(s32) = COPY [[R0]] + ; CHECK: [[X32:%[0-9]+]]:_(s32) = G_AND [[X]], [[BITS]] + ; CHECK: [[BITS:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK: [[Y:%[0-9]+]]:_(s32) = COPY [[R1]] + ; CHECK: [[Y32:%[0-9]+]]:_(s32) = G_AND [[Y]], [[BITS]] + %0(s32) = COPY %r0 + %1(s8) = G_TRUNC %0(s32) + %2(s32) = COPY %r1 + %3(s8) = G_TRUNC %2(s32) + ; HWDIV: [[Q32:%[0-9]+]]:_(s32) = G_UDIV [[X32]], [[Y32]] + ; HWDIV: [[P32:%[0-9]+]]:_(s32) = G_MUL [[Q32]], [[Y32]] + ; HWDIV: [[R32:%[0-9]+]]:_(s32) = G_SUB [[X32]], [[P32]] ; SOFT-NOT: G_UREM ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X32]] ; SOFT-DAG: %r1 = COPY [[Y32]] - ; SOFT-AEABI: BLX $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-AEABI: [[R32:%[0-9]+]](s32) = COPY %r1 - ; SOFT-DEFAULT: BLX $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: [[R32:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_uidivmod, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-AEABI: [[R32:%[0-9]+]]:_(s32) = COPY %r1 + ; SOFT-DEFAULT: BL $__umodsi3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: [[R32:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_UREM - ; CHECK: [[R:%[0-9]+]](s8) = G_TRUNC [[R32]] + ; CHECK: [[R8:%[0-9]+]]:_(s8) = G_TRUNC [[R32]] + ; CHECK: [[R:%[0-9]+]]:_(s32) = G_ZEXT [[R8]] ; SOFT-NOT: G_UREM - %2(s8) = G_UREM %0, %1 + %4(s8) = G_UREM %1, %3 ; CHECK: %r0 = COPY [[R]] - %r0 = COPY %2(s8) + %5(s32) = G_ZEXT %4(s8) + %r0 = COPY %5(s32) BX_RET 14, _, implicit %r0 ... diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir index 8ef1c065822de..bdb064a9c18df 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalize-fp.mir @@ -65,8 +65,8 @@ body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %0(s32) = COPY %r0 %1(s32) = COPY %r1 ; CHECK-NOT: G_FREM @@ -75,10 +75,10 @@ body: | ; SOFT-DAG: %r1 = COPY [[Y]] ; HARD-DAG: %s0 = COPY [[X]] ; HARD-DAG: %s1 = COPY [[Y]] - ; SOFT: BLX $fmodf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; HARD: BLX $fmodf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0 - ; SOFT: [[R:%[0-9]+]](s32) = COPY %r0 - ; HARD: [[R:%[0-9]+]](s32) = COPY %s0 + ; SOFT: BL $fmodf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; HARD: BL $fmodf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0 + ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 + ; HARD: [[R:%[0-9]+]]:_(s32) = COPY %s0 ; CHECK: ADJCALLSTACKUP ; CHECK-NOT: G_FREM %2(s32) = G_FREM %0, %1 @@ -114,16 +114,16 @@ body: | ; through R0-R1, ends up in R0-R1 or R1-R0, and the second value, received ; through R2-R3, ends up in R2-R3 or R3-R2, when passed to fmod. ; For hard float, the values need to end up in D0 and D1. - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]] - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]] + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]] %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) ; CHECK-NOT: G_FREM @@ -134,8 +134,8 @@ body: | ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] ; HARD-DAG: %d0 = COPY [[X]] ; HARD-DAG: %d1 = COPY [[Y]] - ; SOFT: BLX $fmod, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 - ; HARD: BLX $fmod, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 + ; SOFT: BL $fmod, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; HARD: BL $fmod, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 ; CHECK: ADJCALLSTACKUP ; CHECK-NOT: G_FREM %6(s64) = G_FREM %4, %5 @@ -160,8 +160,8 @@ body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %0(s32) = COPY %r0 %1(s32) = COPY %r1 ; CHECK-NOT: G_FPOW @@ -170,10 +170,10 @@ body: | ; SOFT-DAG: %r1 = COPY [[Y]] ; HARD-DAG: %s0 = COPY [[X]] ; HARD-DAG: %s1 = COPY [[Y]] - ; SOFT: BLX $powf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; HARD: BLX $powf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0 - ; SOFT: [[R:%[0-9]+]](s32) = COPY %r0 - ; HARD: [[R:%[0-9]+]](s32) = COPY %s0 + ; SOFT: BL $powf, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; HARD: BL $powf, {{.*}}, implicit %s0, implicit %s1, implicit-def %s0 + ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 + ; HARD: [[R:%[0-9]+]]:_(s32) = COPY %s0 ; CHECK: ADJCALLSTACKUP ; CHECK-NOT: G_FPOW %2(s32) = G_FPOW %0, %1 @@ -209,16 +209,16 @@ body: | ; through R0-R1, ends up in R0-R1 or R1-R0, and the second value, received ; through R2-R3, ends up in R2-R3 or R3-R2, when passed to pow. ; For hard float, the values need to end up in D0 and D1. - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]] - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]] + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]] %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) ; CHECK-NOT: G_FPOW @@ -229,8 +229,8 @@ body: | ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] ; HARD-DAG: %d0 = COPY [[X]] ; HARD-DAG: %d1 = COPY [[Y]] - ; SOFT: BLX $pow, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 - ; HARD: BLX $pow, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 + ; SOFT: BL $pow, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; HARD: BL $pow, {{.*}}, implicit %d0, implicit %d1, implicit-def %d0 ; CHECK: ADJCALLSTACKUP ; CHECK-NOT: G_FPOW %6(s64) = G_FPOW %4, %5 @@ -255,18 +255,18 @@ body: | bb.0: liveins: %r0, %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; HARD: [[R:%[0-9]+]](s32) = G_FADD [[X]], [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s32) = G_FADD [[X]], [[Y]] ; SOFT-NOT: G_FADD ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fadd, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__addsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[R:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fadd, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__addsf3, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[R:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_FADD %2(s32) = G_FADD %0, %1 @@ -296,27 +296,27 @@ body: | bb.0: liveins: %r0, %r1, %r2, %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %0(s32) = COPY %r0 %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]] - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]] + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]] + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]] %4(s64) = G_MERGE_VALUES %0(s32), %1(s32) %5(s64) = G_MERGE_VALUES %2(s32), %3(s32) - ; HARD: [[R:%[0-9]+]](s64) = G_FADD [[X]], [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s64) = G_FADD [[X]], [[Y]] ; SOFT-NOT: G_FADD ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r{{[0-1]}} = COPY [[X0]] ; SOFT-DAG: %r{{[0-1]}} = COPY [[X1]] ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y0]] ; SOFT-DAG: %r{{[2-3]}} = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dadd, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 - ; SOFT-DEFAULT: BLX $__adddf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-AEABI: BL $__aeabi_dadd, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 + ; SOFT-DEFAULT: BL $__adddf3, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 ; SOFT: ADJCALLSTACKUP ; SOFT-NOT: G_FADD %6(s64) = G_FADD %4, %5 @@ -345,16 +345,16 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(true), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(true), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(true), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP - ; SOFT: [[REXT:%[0-9]+]](s32) = G_CONSTANT i32 -1 - ; SOFT: [[R:%[0-9]+]](s1) = G_TRUNC [[REXT]](s32) + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; SOFT: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[REXT]](s32) ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -378,16 +378,16 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(false), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(false), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(false), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP - ; SOFT: [[REXT:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT: [[R:%[0-9]+]](s1) = G_TRUNC [[REXT]](s32) + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[REXT]](s32) ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -411,24 +411,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(oeq), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(oeq), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__eqsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__eqsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -452,24 +452,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(ogt), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ogt), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sgt), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -493,24 +493,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(oge), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(oge), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpge, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__gesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpge, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__gesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sge), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -534,24 +534,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(olt), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(olt), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(slt), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -575,24 +575,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(ole), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ole), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ole), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmple, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__lesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmple, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__lesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sle), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -616,23 +616,23 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(ord), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ord), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -656,24 +656,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(ugt), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ugt), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ugt), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmple, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__lesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmple, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__lesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sgt), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -697,24 +697,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(uge), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(uge), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(uge), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sge), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -738,24 +738,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(ult), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ult), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ult), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpge, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__gesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpge, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__gesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(slt), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -779,24 +779,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(ule), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ule), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ule), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sle), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -820,24 +820,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(une), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(une), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(une), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__nesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__nesf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(ne), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -861,24 +861,24 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(uno), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(uno), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(uno), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(ne), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -902,39 +902,39 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(one), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(one), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(one), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET1:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__gtsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET1:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R1:%[0-9]+]](s1) = G_ICMP intpred(sgt), [[RET1]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[RET1]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET2:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmplt, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__ltsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET2:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R2:%[0-9]+]](s1) = G_ICMP intpred(slt), [[RET2]](s32), [[ZERO]] - ; SOFT-AEABI: [[R1EXT:%[0-9]+]](s32) = COPY [[RET1]] - ; SOFT-AEABI: [[R2EXT:%[0-9]+]](s32) = COPY [[RET2]] - ; SOFT-DEFAULT: [[R1EXT:%[0-9]+]](s32) = G_ANYEXT [[R1]] - ; SOFT-DEFAULT: [[R2EXT:%[0-9]+]](s32) = G_ANYEXT [[R2]] - ; SOFT: [[REXT:%[0-9]+]](s32) = G_OR [[R1EXT]], [[R2EXT]] - ; SOFT: [[R:%[0-9]+]](s1) = G_TRUNC [[REXT]] + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[RET2]](s32), [[ZERO]] + ; SOFT-AEABI: [[R1EXT:%[0-9]+]]:_(s32) = COPY [[RET1]] + ; SOFT-AEABI: [[R2EXT:%[0-9]+]]:_(s32) = COPY [[RET2]] + ; SOFT-DEFAULT: [[R1EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[R1]] + ; SOFT-DEFAULT: [[R2EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[R2]] + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_OR [[R1EXT]], [[R2EXT]] + ; SOFT: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[REXT]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -958,39 +958,39 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK-DAG: [[X:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[Y:%[0-9]+]](s32) = COPY %r1 + ; CHECK-DAG: [[X:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[Y:%[0-9]+]]:_(s32) = COPY %r1 %2(s1) = G_FCMP floatpred(ueq), %0(s32), %1 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ueq), [[X]](s32), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ueq), [[X]](s32), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__eqsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET1:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__eqsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET1:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R1:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET1]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET1]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X]] ; SOFT-DAG: %r1 = COPY [[Y]] - ; SOFT-AEABI: BLX $__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 - ; SOFT: [[RET2:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_fcmpun, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT-DEFAULT: BL $__unordsf2, {{.*}}, implicit %r0, implicit %r1, implicit-def %r0 + ; SOFT: [[RET2:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R2:%[0-9]+]](s1) = G_ICMP intpred(ne), [[RET2]](s32), [[ZERO]] - ; SOFT-AEABI: [[R1EXT:%[0-9]+]](s32) = COPY [[RET1]] - ; SOFT-AEABI: [[R2EXT:%[0-9]+]](s32) = COPY [[RET2]] - ; SOFT-DEFAULT: [[R1EXT:%[0-9]+]](s32) = G_ANYEXT [[R1]] - ; SOFT-DEFAULT: [[R2EXT:%[0-9]+]](s32) = G_ANYEXT [[R2]] - ; SOFT: [[REXT:%[0-9]+]](s32) = G_OR [[R1EXT]], [[R2EXT]] - ; SOFT: [[R:%[0-9]+]](s1) = G_TRUNC [[REXT]] + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[RET2]](s32), [[ZERO]] + ; SOFT-AEABI: [[R1EXT:%[0-9]+]]:_(s32) = COPY [[RET1]] + ; SOFT-AEABI: [[R2EXT:%[0-9]+]]:_(s32) = COPY [[RET2]] + ; SOFT-DEFAULT: [[R1EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[R1]] + ; SOFT-DEFAULT: [[R2EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[R2]] + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_OR [[R1EXT]], [[R2EXT]] + ; SOFT: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[REXT]] ; SOFT-NOT: G_FCMP %3(s32) = G_ZEXT %2(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %3(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1020,22 +1020,22 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(true), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(true), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(true), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP - ; SOFT: [[REXT:%[0-9]+]](s32) = G_CONSTANT i32 -1 - ; SOFT: [[R:%[0-9]+]](s1) = G_TRUNC [[REXT]](s32) + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; SOFT: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[REXT]](s32) ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1065,22 +1065,22 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(false), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(false), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(false), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP - ; SOFT: [[REXT:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT: [[R:%[0-9]+]](s1) = G_TRUNC [[REXT]](s32) + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[REXT]](s32) ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1110,32 +1110,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(oeq), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(oeq), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__eqdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__eqdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1165,32 +1165,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(ogt), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ogt), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sgt), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1220,32 +1220,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(oge), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(oge), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpge, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__gedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpge, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__gedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sge), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1275,32 +1275,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(olt), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(olt), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(olt), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(slt), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1330,32 +1330,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(ole), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ole), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ole), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmple, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__ledf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmple, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__ledf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sle), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1385,31 +1385,31 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(ord), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ord), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1439,32 +1439,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(ugt), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ugt), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ugt), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmple, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__ledf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmple, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__ledf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sgt), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1494,32 +1494,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(uge), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(uge), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(uge), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sge), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1549,32 +1549,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(ult), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ult), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ult), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpge, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__gedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpge, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__gedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(slt), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1604,32 +1604,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(ule), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ule), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ule), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(sle), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1659,32 +1659,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(une), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(une), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(une), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__nedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__nedf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(ne), [[RET]](s32), [[ZERO]] + ; SOFT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1714,32 +1714,32 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(uno), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(uno), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(uno), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-AEABI: [[R:%[0-9]+]](s1) = G_TRUNC [[RET]](s32) - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R:%[0-9]+]](s1) = G_ICMP intpred(ne), [[RET]](s32), [[ZERO]] + ; SOFT-AEABI: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[RET]](s32) + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[RET]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1769,49 +1769,49 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(one), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(one), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(one), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET1:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpgt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__gtdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET1:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R1:%[0-9]+]](s1) = G_ICMP intpred(sgt), [[RET1]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[RET1]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET2:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmplt, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__ltdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET2:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R2:%[0-9]+]](s1) = G_ICMP intpred(slt), [[RET2]](s32), [[ZERO]] - ; SOFT-AEABI: [[R1EXT:%[0-9]+]](s32) = COPY [[RET1]] - ; SOFT-AEABI: [[R2EXT:%[0-9]+]](s32) = COPY [[RET2]] - ; SOFT-DEFAULT: [[R1EXT:%[0-9]+]](s32) = G_ANYEXT [[R1]] - ; SOFT-DEFAULT: [[R2EXT:%[0-9]+]](s32) = G_ANYEXT [[R2]] - ; SOFT: [[REXT:%[0-9]+]](s32) = G_OR [[R1EXT]], [[R2EXT]] - ; SOFT: [[R:%[0-9]+]](s1) = G_TRUNC [[REXT]] + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[RET2]](s32), [[ZERO]] + ; SOFT-AEABI: [[R1EXT:%[0-9]+]]:_(s32) = COPY [[RET1]] + ; SOFT-AEABI: [[R2EXT:%[0-9]+]]:_(s32) = COPY [[RET2]] + ; SOFT-DEFAULT: [[R1EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[R1]] + ; SOFT-DEFAULT: [[R2EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[R2]] + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_OR [[R1EXT]], [[R2EXT]] + ; SOFT: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[REXT]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 @@ -1841,49 +1841,49 @@ body: | %1(s32) = COPY %r1 %2(s32) = COPY %r2 %3(s32) = COPY %r3 - ; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0 - ; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1 - ; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2 - ; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3 + ; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 + ; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 + ; CHECK-DAG: [[Y0:%[0-9]+]]:_(s32) = COPY %r2 + ; CHECK-DAG: [[Y1:%[0-9]+]]:_(s32) = COPY %r3 %4(s64) = G_MERGE_VALUES %0(s32), %1 %5(s64) = G_MERGE_VALUES %2(s32), %3 - ; HARD-DAG: [[X:%[0-9]+]](s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) - ; HARD-DAG: [[Y:%[0-9]+]](s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) + ; HARD-DAG: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) + ; HARD-DAG: [[Y:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32) %6(s1) = G_FCMP floatpred(ueq), %4(s64), %5 - ; HARD: [[R:%[0-9]+]](s1) = G_FCMP floatpred(ueq), [[X]](s64), [[Y]] + ; HARD: [[R:%[0-9]+]]:_(s1) = G_FCMP floatpred(ueq), [[X]](s64), [[Y]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__eqdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET1:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpeq, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__eqdf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET1:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R1:%[0-9]+]](s1) = G_ICMP intpred(eq), [[RET1]](s32), [[ZERO]] + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[RET1]](s32), [[ZERO]] ; SOFT-NOT: G_FCMP ; SOFT: ADJCALLSTACKDOWN ; SOFT-DAG: %r0 = COPY [[X0]] ; SOFT-DAG: %r1 = COPY [[X1]] ; SOFT-DAG: %r2 = COPY [[Y0]] ; SOFT-DAG: %r3 = COPY [[Y1]] - ; SOFT-AEABI: BLX $__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT-DEFAULT: BLX $__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 - ; SOFT: [[RET2:%[0-9]+]](s32) = COPY %r0 + ; SOFT-AEABI: BL $__aeabi_dcmpun, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT-DEFAULT: BL $__unorddf2, {{.*}}, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 + ; SOFT: [[RET2:%[0-9]+]]:_(s32) = COPY %r0 ; SOFT: ADJCALLSTACKUP - ; SOFT-DEFAULT: [[ZERO:%[0-9]+]](s32) = G_CONSTANT i32 0 - ; SOFT-DEFAULT: [[R2:%[0-9]+]](s1) = G_ICMP intpred(ne), [[RET2]](s32), [[ZERO]] - ; SOFT-AEABI: [[R1EXT:%[0-9]+]](s32) = COPY [[RET1]] - ; SOFT-AEABI: [[R2EXT:%[0-9]+]](s32) = COPY [[RET2]] - ; SOFT-DEFAULT: [[R1EXT:%[0-9]+]](s32) = G_ANYEXT [[R1]] - ; SOFT-DEFAULT: [[R2EXT:%[0-9]+]](s32) = G_ANYEXT [[R2]] - ; SOFT: [[REXT:%[0-9]+]](s32) = G_OR [[R1EXT]], [[R2EXT]] - ; SOFT: [[R:%[0-9]+]](s1) = G_TRUNC [[REXT]] + ; SOFT-DEFAULT: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SOFT-DEFAULT: [[R2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[RET2]](s32), [[ZERO]] + ; SOFT-AEABI: [[R1EXT:%[0-9]+]]:_(s32) = COPY [[RET1]] + ; SOFT-AEABI: [[R2EXT:%[0-9]+]]:_(s32) = COPY [[RET2]] + ; SOFT-DEFAULT: [[R1EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[R1]] + ; SOFT-DEFAULT: [[R2EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[R2]] + ; SOFT: [[REXT:%[0-9]+]]:_(s32) = G_OR [[R1EXT]], [[R2EXT]] + ; SOFT: [[R:%[0-9]+]]:_(s1) = G_TRUNC [[REXT]] ; SOFT-NOT: G_FCMP %7(s32) = G_ZEXT %6(s1) - ; CHECK: [[REXT:%[0-9]+]](s32) = G_ZEXT [[R]](s1) + ; CHECK: [[REXT:%[0-9]+]]:_(s32) = G_ZEXT [[R]](s1) %r0 = COPY %7(s32) ; CHECK: %r0 = COPY [[REXT]] BX_RET 14, _, implicit %r0 diff --git a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir index 1fb7c79cd24f2..6ac44aaad72ec 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-legalizer.mir @@ -27,6 +27,10 @@ define void @test_xor_s16() { ret void } define void @test_xor_s32() { ret void } + define void @test_lshr_s32() { ret void } + define void @test_ashr_s32() { ret void } + define void @test_shl_s32() { ret void } + define void @test_load_from_stack() { ret void } define void @test_legal_loads() #0 { ret void } define void @test_legal_stores() #0 { ret void } @@ -44,9 +48,6 @@ define void @test_brcond() { ret void } - define void @test_fadd_s32() #0 { ret void } - define void @test_fadd_s64() #0 { ret void } - @a_global = global i32 42 define void @test_global_variable() { ret void } @@ -67,10 +68,10 @@ body: | bb.0: liveins: %r0 - %0(s8) = COPY %r0 + %0(s8) = G_CONSTANT i8 42 %1(s32) = G_SEXT %0 ; G_SEXT with s8 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s32) = G_SEXT {{%[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_SEXT {{%[0-9]+}} %r0 = COPY %1(s32) BX_RET 14, _, implicit %r0 ... @@ -89,10 +90,10 @@ body: | bb.0: liveins: %r0 - %0(s16) = COPY %r0 + %0(s16) = G_CONSTANT i16 42 %1(s32) = G_ZEXT %0 ; G_ZEXT with s16 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s32) = G_ZEXT {{%[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_ZEXT {{%[0-9]+}} %r0 = COPY %1(s32) BX_RET 14, _, implicit %r0 ... @@ -108,18 +109,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 + %0(s8) = G_CONSTANT i8 12 + %1(s8) = G_CONSTANT i8 30 %2(s8) = G_ADD %0, %1 ; G_ADD with s8 should widen - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_ADD {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_ADD {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_ADD {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s8) + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_ADD {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_ADD {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_ADD {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s8) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... --- @@ -134,18 +137,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 + %0(s16) = G_CONSTANT i16 32 + %1(s16) = G_CONSTANT i16 10 %2(s16) = G_ADD %0, %1 ; G_ADD with s16 should widen - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_ADD {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_ADD {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_ADD {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s16) + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_ADD {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_ADD {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_ADD {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s16) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... @@ -169,7 +174,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_ADD %0, %1 ; G_ADD with s32 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s32) = G_ADD {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_ADD {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 @@ -186,18 +191,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 + %0(s8) = G_CONSTANT i8 48 + %1(s8) = G_CONSTANT i8 6 %2(s8) = G_SUB %0, %1 ; G_SUB with s8 should widen - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_SUB {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_SUB {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_SUB {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s8) + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_SUB {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_SUB {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_SUB {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s8) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... --- @@ -212,18 +219,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 + %0(s16) = G_CONSTANT i16 58 + %1(s16) = G_CONSTANT i16 16 %2(s16) = G_SUB %0, %1 ; G_SUB with s16 should widen - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_SUB {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_SUB {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_SUB {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s16) + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_SUB {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_SUB {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_SUB {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s16) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... @@ -247,7 +256,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_SUB %0, %1 ; G_SUB with s32 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s32) = G_SUB {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_SUB {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 @@ -264,18 +273,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 + %0(s8) = G_CONSTANT i8 7 + %1(s8) = G_CONSTANT i8 6 %2(s8) = G_MUL %0, %1 ; G_MUL with s8 should widen - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_MUL {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_MUL {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_MUL {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s8) + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_MUL {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_MUL {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_MUL {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s8) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... --- @@ -290,18 +301,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 + %0(s16) = G_CONSTANT i16 3 + %1(s16) = G_CONSTANT i16 14 %2(s16) = G_MUL %0, %1 ; G_MUL with s16 should widen - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_MUL {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_MUL {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_MUL {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s16) + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_MUL {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_MUL {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_MUL {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s16) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... @@ -325,7 +338,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_MUL %0, %1 ; G_MUL with s32 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s32) = G_MUL {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_MUL {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 @@ -342,18 +355,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 + %0(s8) = G_CONSTANT i8 46 + %1(s8) = G_CONSTANT i8 58 %2(s8) = G_AND %0, %1 ; G_AND with s8 should widen - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_AND {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_AND {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s8) + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_AND {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_AND {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_AND {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s8) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... --- @@ -368,18 +383,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 + %0(s16) = G_CONSTANT i16 43 + %1(s16) = G_CONSTANT i16 106 %2(s16) = G_AND %0, %1 ; G_AND with s16 should widen - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_AND {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_AND {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s16) + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_AND {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_AND {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_AND {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s16) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... @@ -403,7 +420,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_AND %0, %1 ; G_AND with s32 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s32) = G_AND {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_AND {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 @@ -420,18 +437,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 + %0(s8) = G_CONSTANT i8 32 + %1(s8) = G_CONSTANT i8 10 %2(s8) = G_OR %0, %1 ; G_OR with s8 should widen - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_OR {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_OR {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s8) + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_OR {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_OR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_OR {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s8) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... --- @@ -446,18 +465,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 + %0(s16) = G_CONSTANT i16 34 + %1(s16) = G_CONSTANT i16 10 %2(s16) = G_OR %0, %1 ; G_OR with s16 should widen - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_OR {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_OR {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s16) + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_OR {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_OR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_OR {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s16) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... @@ -481,7 +502,7 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_OR %0, %1 ; G_OR with s32 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s32) = G_OR {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_OR {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 @@ -498,18 +519,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 + %0(s8) = G_CONSTANT i8 10 + %1(s8) = G_CONSTANT i8 32 %2(s8) = G_XOR %0, %1 ; G_XOR with s8 should widen - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_XOR {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s8) = G_XOR {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s8) + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_XOR {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_XOR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s8) = G_XOR {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s8) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... --- @@ -524,18 +547,20 @@ registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } + - { id: 3, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 + %0(s16) = G_CONSTANT i16 40 + %1(s16) = G_CONSTANT i16 2 %2(s16) = G_XOR %0, %1 ; G_XOR with s16 should widen - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_XOR {{%[0-9]+, %[0-9]+}} - ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s16) = G_XOR {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s16) + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_XOR {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_XOR {{%[0-9]+, %[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s16) = G_XOR {{%[0-9]+, %[0-9]+}} + %3(s32) = G_SEXT %2(s16) + %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... @@ -559,7 +584,82 @@ body: | %1(s32) = COPY %r1 %2(s32) = G_XOR %0, %1 ; G_XOR with s32 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s32) = G_XOR {{%[0-9]+, %[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_XOR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_lshr_s32 +# CHECK-LABEL: name: test_lshr_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_LSHR %0, %1 + ; G_LSHR with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}:_(s32) = G_LSHR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_ashr_s32 +# CHECK-LABEL: name: test_ashr_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_ASHR %0, %1 + ; G_ASHR with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}:_(s32) = G_ASHR {{%[0-9]+, %[0-9]+}} + %r0 = COPY %2(s32) + BX_RET 14, _, implicit %r0 + +... +--- +name: test_shl_s32 +# CHECK-LABEL: name: test_shl_s32 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.0: + liveins: %r0, %r1 + + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_SHL %0, %1 + ; G_SHL with s32 is legal, so we should find it unchanged in the output + ; CHECK: {{%[0-9]+}}:_(s32) = G_SHL {{%[0-9]+, %[0-9]+}} %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 @@ -587,8 +687,8 @@ body: | liveins: %r0, %r1, %r2, %r3 ; This is legal, so we should find it unchanged in the output - ; CHECK: [[FIVREG:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[FRAME_INDEX]] - ; CHECK: {{%[0-9]+}}(s32) = G_LOAD [[FIVREG]](p0) :: (load 4) + ; CHECK: [[FIVREG:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[FRAME_INDEX]] + ; CHECK: {{%[0-9]+}}:_(s32) = G_LOAD [[FIVREG]](p0) :: (load 4) %0(p0) = G_FRAME_INDEX %fixed-stack.2 %1(s32) = G_LOAD %0(p0) :: (load 4) BX_RET 14, _ @@ -614,12 +714,12 @@ body: | liveins: %r0, %r1, %r2, %r3 ; These are all legal, so we should find them unchanged in the output - ; CHECK-DAG: {{%[0-9]+}}(s64) = G_LOAD %0 - ; CHECK-DAG: {{%[0-9]+}}(s32) = G_LOAD %0 - ; CHECK-DAG: {{%[0-9]+}}(s16) = G_LOAD %0 - ; CHECK-DAG: {{%[0-9]+}}(s8) = G_LOAD %0 - ; CHECK-DAG: {{%[0-9]+}}(s1) = G_LOAD %0 - ; CHECK-DAG: {{%[0-9]+}}(p0) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}:_(s64) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}:_(s32) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}:_(s16) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}:_(s8) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}:_(s1) = G_LOAD %0 + ; CHECK-DAG: {{%[0-9]+}}:_(p0) = G_LOAD %0 %0(p0) = COPY %r0 %1(s32) = G_LOAD %0(p0) :: (load 4) %2(s16) = G_LOAD %0(p0) :: (load 2) @@ -661,11 +761,11 @@ body: | G_STORE %1(s64), %0(p0) :: (store 8) %2(s32) = COPY %r2 G_STORE %2(s32), %0(p0) :: (store 4) - %3(s16) = COPY %r3 + %3(s16) = G_CONSTANT i16 42 G_STORE %3(s16), %0(p0) :: (store 2) - %4(s8) = COPY %r4 + %4(s8) = G_CONSTANT i8 21 G_STORE %4(s8), %0(p0) :: (store 1) - %5(s1) = COPY %r5 + %5(s1) = G_CONSTANT i1 1 G_STORE %5(s1), %0(p0) :: (store 1) %6(p0) = COPY %r6 G_STORE %6(p0), %0(p0) :: (store 4) @@ -690,7 +790,7 @@ body: | %0(p0) = COPY %r0 %1(s32) = COPY %r1 - ; CHECK: {{%[0-9]+}}(p0) = G_GEP {{%[0-9]+}}, {{%[0-9]+}}(s32) + ; CHECK: {{%[0-9]+}}:_(p0) = G_GEP {{%[0-9]+}}, {{%[0-9]+}}(s32) %2(p0) = G_GEP %0, %1(s32) %r0 = COPY %2(p0) @@ -712,24 +812,24 @@ registers: body: | bb.0: %0(s32) = G_CONSTANT 42 - ; CHECK: {{%[0-9]+}}(s32) = G_CONSTANT 42 + ; CHECK: {{%[0-9]+}}:_(s32) = G_CONSTANT 42 %1(s16) = G_CONSTANT i16 21 ; CHECK-NOT: G_CONSTANT i16 - ; CHECK: [[EXT:%[0-9]+]](s32) = G_CONSTANT i32 21 - ; CHECK: {{%[0-9]+}}(s16) = G_TRUNC [[EXT]](s32) + ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; CHECK: {{%[0-9]+}}:_(s16) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i16 %2(s8) = G_CONSTANT i8 10 ; CHECK-NOT: G_CONSTANT i8 - ; CHECK: [[EXT:%[0-9]+]](s32) = G_CONSTANT i32 10 - ; CHECK: {{%[0-9]+}}(s8) = G_TRUNC [[EXT]](s32) + ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK: {{%[0-9]+}}:_(s8) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i8 %3(s1) = G_CONSTANT i1 1 ; CHECK-NOT: G_CONSTANT i1 - ; CHECK: [[EXT:%[0-9]+]](s32) = G_CONSTANT i32 -1 - ; CHECK: {{%[0-9]+}}(s1) = G_TRUNC [[EXT]](s32) + ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK: {{%[0-9]+}}:_(s1) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i1 %r0 = COPY %0(s32) @@ -752,12 +852,12 @@ body: | bb.0: liveins: %r0, %r1 - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 + %0(s8) = G_CONSTANT i8 42 + %1(s8) = G_CONSTANT i8 43 %2(s1) = G_ICMP intpred(ne), %0(s8), %1 ; G_ICMP with s8 should widen - ; CHECK: {{%[0-9]+}}(s1) = G_ICMP intpred(ne), {{%[0-9]+}}(s32), {{%[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s1) = G_ICMP intpred(ne), {{%[0-9]+}}(s8), {{%[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s1) = G_ICMP intpred(ne), {{%[0-9]+}}(s32), {{%[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s1) = G_ICMP intpred(ne), {{%[0-9]+}}(s8), {{%[0-9]+}} %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 @@ -779,12 +879,12 @@ body: | bb.0: liveins: %r0, %r1 - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 + %0(s16) = G_CONSTANT i16 42 + %1(s16) = G_CONSTANT i16 46 %2(s1) = G_ICMP intpred(slt), %0(s16), %1 ; G_ICMP with s16 should widen - ; CHECK: {{%[0-9]+}}(s1) = G_ICMP intpred(slt), {{%[0-9]+}}(s32), {{%[0-9]+}} - ; CHECK-NOT: {{%[0-9]+}}(s1) = G_ICMP intpred(slt), {{%[0-9]+}}(s16), {{%[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s1) = G_ICMP intpred(slt), {{%[0-9]+}}(s32), {{%[0-9]+}} + ; CHECK-NOT: {{%[0-9]+}}:_(s1) = G_ICMP intpred(slt), {{%[0-9]+}}(s16), {{%[0-9]+}} %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 @@ -810,7 +910,7 @@ body: | %1(s32) = COPY %r1 %2(s1) = G_ICMP intpred(eq), %0(s32), %1 ; G_ICMP with s32 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s1) = G_ICMP intpred(eq), {{%[0-9]+}}(s32), {{%[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s1) = G_ICMP intpred(eq), {{%[0-9]+}}(s32), {{%[0-9]+}} %3(s32) = G_ZEXT %2(s1) %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 @@ -834,10 +934,10 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - %2(s1) = COPY %r2 + %2(s1) = G_CONSTANT i1 1 %3(s32) = G_SELECT %2(s1), %0, %1 ; G_SELECT with s32 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s32) = G_SELECT {{%[0-9]+}}(s1), {{%[0-9]+}}, {{%[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(s32) = G_SELECT {{%[0-9]+}}(s1), {{%[0-9]+}}, {{%[0-9]+}} %r0 = COPY %3(s32) BX_RET 14, _, implicit %r0 ... @@ -860,10 +960,10 @@ body: | %0(p0) = COPY %r0 %1(p0) = COPY %r1 - %2(s1) = COPY %r2 + %2(s1) = G_CONSTANT i1 0 %3(p0) = G_SELECT %2(s1), %0, %1 ; G_SELECT with p0 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(p0) = G_SELECT {{%[0-9]+}}(s1), {{%[0-9]+}}, {{%[0-9]+}} + ; CHECK: {{%[0-9]+}}:_(p0) = G_SELECT {{%[0-9]+}}(s1), {{%[0-9]+}}, {{%[0-9]+}} %r0 = COPY %3(p0) BX_RET 14, _, implicit %r0 ... @@ -900,56 +1000,6 @@ body: | %r0 = COPY %0(s32) BX_RET 14, _, implicit %r0 -... ---- -name: test_fadd_s32 -# CHECK-LABEL: name: test_fadd_s32 -legalized: false -# CHECK: legalized: true -regBankSelected: false -selected: false -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.0: - liveins: %r0, %r1 - - %0(s32) = COPY %r0 - %1(s32) = COPY %r1 - %2(s32) = G_FADD %0, %1 - ; G_FADD with s32 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s32) = G_FADD {{%[0-9]+, %[0-9]+}} - %r0 = COPY %2(s32) - BX_RET 14, _, implicit %r0 - -... ---- -name: test_fadd_s64 -# CHECK-LABEL: name: test_fadd_s64 -legalized: false -# CHECK: legalized: true -regBankSelected: false -selected: false -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.0: - liveins: %d0, %d1 - - %0(s64) = COPY %d0 - %1(s64) = COPY %d1 - %2(s64) = G_FADD %0, %1 - ; G_FADD with s64 is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(s64) = G_FADD {{%[0-9]+, %[0-9]+}} - %d0 = COPY %2(s64) - BX_RET 14, _, implicit %d0 - ... --- name: test_global_variable @@ -969,7 +1019,7 @@ body: | %0(s32) = COPY %r0 %1(p0) = G_GLOBAL_VALUE @a_global ; G_GLOBAL_VALUE is legal, so we should find it unchanged in the output - ; CHECK: {{%[0-9]+}}(p0) = G_GLOBAL_VALUE @a_global + ; CHECK: {{%[0-9]+}}:_(p0) = G_GLOBAL_VALUE @a_global %r0 = COPY %1(p0) BX_RET 14, _, implicit %r0 diff --git a/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll b/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll new file mode 100644 index 0000000000000..92c4e2905d88a --- /dev/null +++ b/test/CodeGen/ARM/GlobalISel/arm-param-lowering.ll @@ -0,0 +1,476 @@ +; RUN: llc -mtriple arm-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LITTLE +; RUN: llc -mtriple armeb-unknown -mattr=+vfp2,+v4t -global-isel -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=BIG + +declare arm_aapcscc i32* @simple_reg_params_target(i32, i32*) + +define arm_aapcscc i32* @test_call_simple_reg_params(i32 *%a, i32 %b) { +; CHECK-LABEL: name: test_call_simple_reg_params +; CHECK-DAG: [[AVREG:%[0-9]+]]:_(p0) = COPY %r0 +; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK-DAG: %r0 = COPY [[BVREG]] +; CHECK-DAG: %r1 = COPY [[AVREG]] +; CHECK: BL @simple_reg_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0 +; CHECK: [[RVREG:%[0-9]+]]:_(p0) = COPY %r0 +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: %r0 = COPY [[RVREG]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %r = notail call arm_aapcscc i32 *@simple_reg_params_target(i32 %b, i32 *%a) + ret i32 *%r +} + +declare arm_aapcscc i32* @simple_stack_params_target(i32, i32*, i32, i32*, i32, i32*) + +define arm_aapcscc i32* @test_call_simple_stack_params(i32 *%a, i32 %b) { +; CHECK-LABEL: name: test_call_simple_stack_params +; CHECK-DAG: [[AVREG:%[0-9]+]]:_(p0) = COPY %r0 +; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: ADJCALLSTACKDOWN 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK-DAG: %r0 = COPY [[BVREG]] +; CHECK-DAG: %r1 = COPY [[AVREG]] +; CHECK-DAG: %r2 = COPY [[BVREG]] +; CHECK-DAG: %r3 = COPY [[AVREG]] +; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_GEP [[SP1]], [[OFF1]](s32) +; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4 +; CHECK: [[SP2:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 +; CHECK: [[FI2:%[0-9]+]]:_(p0) = G_GEP [[SP2]], [[OFF2]](s32) +; CHECK: G_STORE [[AVREG]](p0), [[FI2]](p0){{.*}}store 4 +; CHECK: BL @simple_stack_params_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 +; CHECK: [[RVREG:%[0-9]+]]:_(p0) = COPY %r0 +; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: %r0 = COPY [[RVREG]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %r = notail call arm_aapcscc i32 *@simple_stack_params_target(i32 %b, i32 *%a, i32 %b, i32 *%a, i32 %b, i32 *%a) + ret i32 *%r +} + +declare arm_aapcscc signext i16 @ext_target(i8 signext, i8 zeroext, i16 signext, i16 zeroext, i8 signext, i8 zeroext, i16 signext, i16 zeroext, i1 zeroext) + +define arm_aapcscc signext i16 @test_call_ext_params(i8 %a, i16 %b, i1 %c) { +; CHECK-LABEL: name: test_call_ext_params +; CHECK-DAG: [[R0VREG:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[AVREG:%[0-9]+]]:_(s8) = G_TRUNC [[R0VREG]] +; CHECK-DAG: [[R1VREG:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s16) = G_TRUNC [[R1VREG]] +; CHECK-DAG: [[R2VREG:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK-DAG: [[CVREG:%[0-9]+]]:_(s1) = G_TRUNC [[R2VREG]] +; CHECK: ADJCALLSTACKDOWN 20, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[SEXTA:%[0-9]+]]:_(s32) = G_SEXT [[AVREG]](s8) +; CHECK: %r0 = COPY [[SEXTA]] +; CHECK: [[ZEXTA:%[0-9]+]]:_(s32) = G_ZEXT [[AVREG]](s8) +; CHECK: %r1 = COPY [[ZEXTA]] +; CHECK: [[SEXTB:%[0-9]+]]:_(s32) = G_SEXT [[BVREG]](s16) +; CHECK: %r2 = COPY [[SEXTB]] +; CHECK: [[ZEXTB:%[0-9]+]]:_(s32) = G_ZEXT [[BVREG]](s16) +; CHECK: %r3 = COPY [[ZEXTB]] +; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_GEP [[SP1]], [[OFF1]](s32) +; CHECK: [[SEXTA2:%[0-9]+]]:_(s32) = G_SEXT [[AVREG]] +; CHECK: G_STORE [[SEXTA2]](s32), [[FI1]](p0){{.*}}store 4 +; CHECK: [[SP2:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 +; CHECK: [[FI2:%[0-9]+]]:_(p0) = G_GEP [[SP2]], [[OFF2]](s32) +; CHECK: [[ZEXTA2:%[0-9]+]]:_(s32) = G_ZEXT [[AVREG]] +; CHECK: G_STORE [[ZEXTA2]](s32), [[FI2]](p0){{.*}}store 4 +; CHECK: [[SP3:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 +; CHECK: [[FI3:%[0-9]+]]:_(p0) = G_GEP [[SP3]], [[OFF3]](s32) +; CHECK: [[SEXTB2:%[0-9]+]]:_(s32) = G_SEXT [[BVREG]] +; CHECK: G_STORE [[SEXTB2]](s32), [[FI3]](p0){{.*}}store 4 +; CHECK: [[SP4:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 +; CHECK: [[FI4:%[0-9]+]]:_(p0) = G_GEP [[SP4]], [[OFF4]](s32) +; CHECK: [[ZEXTB2:%[0-9]+]]:_(s32) = G_ZEXT [[BVREG]] +; CHECK: G_STORE [[ZEXTB2]](s32), [[FI4]](p0){{.*}}store 4 +; CHECK: [[SP5:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 +; CHECK: [[FI5:%[0-9]+]]:_(p0) = G_GEP [[SP5]], [[OFF5]](s32) +; CHECK: [[ZEXTC:%[0-9]+]]:_(s32) = G_ZEXT [[CVREG]] +; CHECK: G_STORE [[ZEXTC]](s32), [[FI5]](p0){{.*}}store 4 +; CHECK: BL @ext_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0 +; CHECK: [[R0VREG:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[RVREG:%[0-9]+]]:_(s16) = G_TRUNC [[R0VREG]] +; CHECK: ADJCALLSTACKUP 20, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[RExtVREG:%[0-9]+]]:_(s32) = G_SEXT [[RVREG]] +; CHECK: %r0 = COPY [[RExtVREG]] +; CHECK: BX_RET 14, _, implicit %r0 +entry: + %r = notail call arm_aapcscc signext i16 @ext_target(i8 signext %a, i8 zeroext %a, i16 signext %b, i16 zeroext %b, i8 signext %a, i8 zeroext %a, i16 signext %b, i16 zeroext %b, i1 zeroext %c) + ret i16 %r +} + +declare arm_aapcs_vfpcc double @vfpcc_fp_target(float, double) + +define arm_aapcs_vfpcc double @test_call_vfpcc_fp_params(double %a, float %b) { +; CHECK-LABEL: name: test_call_vfpcc_fp_params +; CHECK-DAG: [[AVREG:%[0-9]+]]:_(s64) = COPY %d0 +; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY %s2 +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK-DAG: %s0 = COPY [[BVREG]] +; CHECK-DAG: %d1 = COPY [[AVREG]] +; CHECK: BL @vfpcc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %s0, implicit %d1, implicit-def %d0 +; CHECK: [[RVREG:%[0-9]+]]:_(s64) = COPY %d0 +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: %d0 = COPY [[RVREG]] +; CHECK: BX_RET 14, _, implicit %d0 +entry: + %r = notail call arm_aapcs_vfpcc double @vfpcc_fp_target(float %b, double %a) + ret double %r +} + +declare arm_aapcscc double @aapcscc_fp_target(float, double, float, double) + +define arm_aapcscc double @test_call_aapcs_fp_params(double %a, float %b) { +; CHECK-LABEL: name: test_call_aapcs_fp_params +; CHECK-DAG: [[A1:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[A2:%[0-9]+]]:_(s32) = COPY %r1 +; LITTLE-DAG: [[AVREG:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[A1]](s32), [[A2]](s32) +; BIG-DAG: [[AVREG:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[A2]](s32), [[A1]](s32) +; CHECK-DAG: [[BVREG:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK: ADJCALLSTACKDOWN 16, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK-DAG: %r0 = COPY [[BVREG]] +; CHECK-DAG: [[A1:%[0-9]+]]:_(s32), [[A2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AVREG]](s64) +; LITTLE-DAG: %r2 = COPY [[A1]] +; LITTLE-DAG: %r3 = COPY [[A2]] +; BIG-DAG: %r2 = COPY [[A2]] +; BIG-DAG: %r3 = COPY [[A1]] +; CHECK: [[SP1:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[FI1:%[0-9]+]]:_(p0) = G_GEP [[SP1]], [[OFF1]](s32) +; CHECK: G_STORE [[BVREG]](s32), [[FI1]](p0){{.*}}store 4 +; CHECK: [[SP2:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 +; CHECK: [[FI2:%[0-9]+]]:_(p0) = G_GEP [[SP2]], [[OFF2]](s32) +; CHECK: G_STORE [[AVREG]](s64), [[FI2]](p0){{.*}}store 8 +; CHECK: BL @aapcscc_fp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 +; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[R2:%[0-9]+]]:_(s32) = COPY %r1 +; LITTLE: [[RVREG:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R1]](s32), [[R2]](s32) +; BIG: [[RVREG:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R2]](s32), [[R1]](s32) +; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[RVREG]](s64) +; LITTLE-DAG: %r0 = COPY [[R1]] +; LITTLE-DAG: %r1 = COPY [[R2]] +; BIG-DAG: %r0 = COPY [[R2]] +; BIG-DAG: %r1 = COPY [[R1]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +entry: + %r = notail call arm_aapcscc double @aapcscc_fp_target(float %b, double %a, float %b, double %a) + ret double %r +} + +declare arm_aapcscc float @different_call_conv_target(float) + +define arm_aapcs_vfpcc float @test_call_different_call_conv(float %x) { +; CHECK-LABEL: name: test_call_different_call_conv +; CHECK: [[X:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: %r0 = COPY [[X]] +; CHECK: BL @different_call_conv_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit-def %r0 +; CHECK: [[R:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: %s0 = COPY [[R]] +; CHECK: BX_RET 14, _, implicit %s0 +entry: + %r = notail call arm_aapcscc float @different_call_conv_target(float %x) + ret float %r +} + +declare arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32]) + +define arm_aapcscc [3 x i32] @test_tiny_int_arrays([2 x i32] %arr) { +; CHECK-LABEL: name: test_tiny_int_arrays +; CHECK: liveins: %r0, %r1 +; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[ARG_ARR:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s64) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: BL @tiny_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[R2:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK: [[RES_ARR:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32) +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[RES_ARR]](s96) +; FIXME: This doesn't seem correct with regard to the AAPCS docs (which say +; that composite types larger than 4 bytes should be passed through memory), +; but it's what DAGISel does. We should fix it in the common code for both. +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: %r2 = COPY [[R2]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1, implicit %r2 +entry: + %r = notail call arm_aapcscc [3 x i32] @tiny_int_arrays_target([2 x i32] %arr) + ret [3 x i32] %r +} + +declare arm_aapcscc void @multiple_int_arrays_target([2 x i32], [2 x i32]) + +define arm_aapcscc void @test_multiple_int_arrays([2 x i32] %arr0, [2 x i32] %arr1) { +; CHECK-LABEL: name: test_multiple_int_arrays +; CHECK: liveins: %r0, %r1 +; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[R2:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK: [[R3:%[0-9]+]]:_(s32) = COPY %r3 +; CHECK: [[ARG_ARR0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: [[ARG_ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R2]](s32), [[R3]](s32) +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR0]](s64) +; CHECK: [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR1]](s64) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: %r2 = COPY [[R2]] +; CHECK: %r3 = COPY [[R3]] +; CHECK: BL @multiple_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: BX_RET 14, _ +entry: + notail call arm_aapcscc void @multiple_int_arrays_target([2 x i32] %arr0, [2 x i32] %arr1) + ret void +} + +declare arm_aapcscc void @large_int_arrays_target([20 x i32]) + +define arm_aapcscc void @test_large_int_arrays([20 x i32] %arr) { +; CHECK-LABEL: name: test_large_int_arrays +; CHECK: fixedStack: +; The parameters live in separate stack locations, one for each element that +; doesn't fit in the registers. +; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4, +; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 60, size: 4 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK-DAG: [[R0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK-DAG: [[R2:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK-DAG: [[R3:%[0-9]+]]:_(s32) = COPY %r3 +; CHECK: [[FIRST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[FIRST_STACK_ID]] +; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]] +; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] +; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] +; CHECK: [[ARG_ARR:%[0-9]+]]:_(s640) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) +; CHECK: ADJCALLSTACKDOWN 64, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s640) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: %r2 = COPY [[R2]] +; CHECK: %r3 = COPY [[R3]] +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF_FIRST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[FIRST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF_FIRST_ELEMENT]](s32) +; CHECK: G_STORE [[FIRST_STACK_ELEMENT]](s32), [[FIRST_STACK_ARG_ADDR]]{{.*}}store 4 +; Match the second-to-last offset, so we can get the correct SP for the last element +; CHECK: G_CONSTANT i32 56 +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 +; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF_LAST_ELEMENT]](s32) +; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4 +; CHECK: BL @large_int_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3 +; CHECK: ADJCALLSTACKUP 64, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: BX_RET 14, _ +entry: + notail call arm_aapcscc void @large_int_arrays_target([20 x i32] %arr) + ret void +} + +declare arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double]) + +define arm_aapcscc [2 x float] @test_fp_arrays_aapcs([3 x double] %arr) { +; CHECK-LABEL: name: test_fp_arrays_aapcs +; CHECK: fixedStack: +; CHECK: id: [[ARR2_ID:[0-9]+]], type: default, offset: 0, size: 8, +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK: [[ARR0_0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[ARR0_1:%[0-9]+]]:_(s32) = COPY %r1 +; LITTLE: [[ARR0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARR0_0]](s32), [[ARR0_1]](s32) +; BIG: [[ARR0:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARR0_1]](s32), [[ARR0_0]](s32) +; CHECK: [[ARR1_0:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK: [[ARR1_1:%[0-9]+]]:_(s32) = COPY %r3 +; LITTLE: [[ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARR1_0]](s32), [[ARR1_1]](s32) +; BIG: [[ARR1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ARR1_1]](s32), [[ARR1_0]](s32) +; CHECK: [[ARR2_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[ARR2_ID]] +; CHECK: [[ARR2:%[0-9]+]]:_(s64) = G_LOAD [[ARR2_FI]]{{.*}}load 8 from %fixed-stack.[[ARR2_ID]] +; CHECK: [[ARR_MERGED:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[ARR0]](s64), [[ARR1]](s64), [[ARR2]](s64) +; CHECK: ADJCALLSTACKDOWN 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[ARR0:%[0-9]+]]:_(s64), [[ARR1:%[0-9]+]]:_(s64), [[ARR2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[ARR_MERGED]](s192) +; CHECK: [[ARR0_0:%[0-9]+]]:_(s32), [[ARR0_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR0]](s64) +; LITTLE: %r0 = COPY [[ARR0_0]](s32) +; LITTLE: %r1 = COPY [[ARR0_1]](s32) +; BIG: %r0 = COPY [[ARR0_1]](s32) +; BIG: %r1 = COPY [[ARR0_0]](s32) +; CHECK: [[ARR1_0:%[0-9]+]]:_(s32), [[ARR1_1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARR1]](s64) +; LITTLE: %r2 = COPY [[ARR1_0]](s32) +; LITTLE: %r3 = COPY [[ARR1_1]](s32) +; BIG: %r2 = COPY [[ARR1_1]](s32) +; BIG: %r3 = COPY [[ARR1_0]](s32) +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[ARR2_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[ARR2_ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[ARR2_OFFSET]](s32) +; CHECK: G_STORE [[ARR2]](s64), [[ARR2_ADDR]](p0){{.*}}store 8 +; CHECK: BL @fp_arrays_aapcs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[R_MERGED:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: ADJCALLSTACKUP 8, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R_MERGED]](s64) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +entry: + %r = notail call arm_aapcscc [2 x float] @fp_arrays_aapcs_target([3 x double] %arr) + ret [2 x float] %r +} + +declare arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double], [3 x float], [4 x double]) + +define arm_aapcs_vfpcc [4 x float] @test_fp_arrays_aapcs_vfp([3 x double] %x, [3 x float] %y, [4 x double] %z) { +; CHECK-LABEL: name: test_fp_arrays_aapcs_vfp +; CHECK: fixedStack: +; CHECK-DAG: id: [[Z0_ID:[0-9]+]], type: default, offset: 0, size: 8, +; CHECK-DAG: id: [[Z1_ID:[0-9]+]], type: default, offset: 8, size: 8, +; CHECK-DAG: id: [[Z2_ID:[0-9]+]], type: default, offset: 16, size: 8, +; CHECK-DAG: id: [[Z3_ID:[0-9]+]], type: default, offset: 24, size: 8, +; CHECK: liveins: %d0, %d1, %d2, %s6, %s7, %s8 +; CHECK: [[X0:%[0-9]+]]:_(s64) = COPY %d0 +; CHECK: [[X1:%[0-9]+]]:_(s64) = COPY %d1 +; CHECK: [[X2:%[0-9]+]]:_(s64) = COPY %d2 +; CHECK: [[Y0:%[0-9]+]]:_(s32) = COPY %s6 +; CHECK: [[Y1:%[0-9]+]]:_(s32) = COPY %s7 +; CHECK: [[Y2:%[0-9]+]]:_(s32) = COPY %s8 +; CHECK: [[Z0_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z0_ID]] +; CHECK: [[Z0:%[0-9]+]]:_(s64) = G_LOAD [[Z0_FI]]{{.*}}load 8 +; CHECK: [[Z1_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z1_ID]] +; CHECK: [[Z1:%[0-9]+]]:_(s64) = G_LOAD [[Z1_FI]]{{.*}}load 8 +; CHECK: [[Z2_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z2_ID]] +; CHECK: [[Z2:%[0-9]+]]:_(s64) = G_LOAD [[Z2_FI]]{{.*}}load 8 +; CHECK: [[Z3_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[Z3_ID]] +; CHECK: [[Z3:%[0-9]+]]:_(s64) = G_LOAD [[Z3_FI]]{{.*}}load 8 +; CHECK: [[X_ARR:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[X0]](s64), [[X1]](s64), [[X2]](s64) +; CHECK: [[Y_ARR:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[Y0]](s32), [[Y1]](s32), [[Y2]](s32) +; CHECK: [[Z_ARR:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[Z0]](s64), [[Z1]](s64), [[Z2]](s64), [[Z3]](s64) +; CHECK: ADJCALLSTACKDOWN 32, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[X0:%[0-9]+]]:_(s64), [[X1:%[0-9]+]]:_(s64), [[X2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[X_ARR]](s192) +; CHECK: [[Y0:%[0-9]+]]:_(s32), [[Y1:%[0-9]+]]:_(s32), [[Y2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[Y_ARR]](s96) +; CHECK: [[Z0:%[0-9]+]]:_(s64), [[Z1:%[0-9]+]]:_(s64), [[Z2:%[0-9]+]]:_(s64), [[Z3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[Z_ARR]](s256) +; CHECK: %d0 = COPY [[X0]](s64) +; CHECK: %d1 = COPY [[X1]](s64) +; CHECK: %d2 = COPY [[X2]](s64) +; CHECK: %s6 = COPY [[Y0]](s32) +; CHECK: %s7 = COPY [[Y1]](s32) +; CHECK: %s8 = COPY [[Y2]](s32) +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[Z0_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[Z0_ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[Z0_OFFSET]](s32) +; CHECK: G_STORE [[Z0]](s64), [[Z0_ADDR]](p0){{.*}}store 8 +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[Z1_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 +; CHECK: [[Z1_ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[Z1_OFFSET]](s32) +; CHECK: G_STORE [[Z1]](s64), [[Z1_ADDR]](p0){{.*}}store 8 +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[Z2_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 +; CHECK: [[Z2_ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[Z2_OFFSET]](s32) +; CHECK: G_STORE [[Z2]](s64), [[Z2_ADDR]](p0){{.*}}store 8 +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[Z3_OFFSET:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 +; CHECK: [[Z3_ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[Z3_OFFSET]](s32) +; CHECK: G_STORE [[Z3]](s64), [[Z3_ADDR]](p0){{.*}}store 8 +; CHECK: BL @fp_arrays_aapcs_vfp_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %d0, implicit %d1, implicit %d2, implicit %s6, implicit %s7, implicit %s8, implicit-def %s0, implicit-def %s1, implicit-def %s2, implicit-def %s3 +; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %s0 +; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %s1 +; CHECK: [[R2:%[0-9]+]]:_(s32) = COPY %s2 +; CHECK: [[R3:%[0-9]+]]:_(s32) = COPY %s3 +; CHECK: [[R_MERGED:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32) +; CHECK: ADJCALLSTACKUP 32, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R_MERGED]](s128) +; CHECK: %s0 = COPY [[R0]] +; CHECK: %s1 = COPY [[R1]] +; CHECK: %s2 = COPY [[R2]] +; CHECK: %s3 = COPY [[R3]] +; CHECK: BX_RET 14, _, implicit %s0, implicit %s1, implicit %s2, implicit %s3 +entry: + %r = notail call arm_aapcs_vfpcc [4 x float] @fp_arrays_aapcs_vfp_target([3 x double] %x, [3 x float] %y, [4 x double] %z) + ret [4 x float] %r +} + +declare arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr) + +define arm_aapcscc [2 x i32*] @test_tough_arrays([6 x [4 x i32]] %arr) { +; CHECK-LABEL: name: test_tough_arrays +; CHECK: fixedStack: +; The parameters live in separate stack locations, one for each element that +; doesn't fit in the registers. +; CHECK-DAG: id: [[FIRST_STACK_ID:[0-9]+]], type: default, offset: 0, size: 4, +; CHECK-DAG: id: [[LAST_STACK_ID:[-0]+]], type: default, offset: 76, size: 4 +; CHECK: liveins: %r0, %r1, %r2, %r3 +; CHECK-DAG: [[R0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[R1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK-DAG: [[R2:%[0-9]+]]:_(s32) = COPY %r2 +; CHECK-DAG: [[R3:%[0-9]+]]:_(s32) = COPY %r3 +; CHECK: [[FIRST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[FIRST_STACK_ID]] +; CHECK: [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[FIRST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[FIRST_STACK_ID]] +; CHECK: [[LAST_STACK_ELEMENT_FI:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[LAST_STACK_ID]] +; CHECK: [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_LOAD [[LAST_STACK_ELEMENT_FI]]{{.*}}load 4 from %fixed-stack.[[LAST_STACK_ID]] +; CHECK: [[ARG_ARR:%[0-9]+]]:_(s768) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32), [[R2]](s32), [[R3]](s32), [[FIRST_STACK_ELEMENT]](s32), {{.*}}, [[LAST_STACK_ELEMENT]](s32) +; CHECK: ADJCALLSTACKDOWN 80, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32), [[R2:%[0-9]+]]:_(s32), [[R3:%[0-9]+]]:_(s32), [[FIRST_STACK_ELEMENT:%[0-9]+]]:_(s32), {{.*}}, [[LAST_STACK_ELEMENT:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ARG_ARR]](s768) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: %r2 = COPY [[R2]] +; CHECK: %r3 = COPY [[R3]] +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF_FIRST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: [[FIRST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF_FIRST_ELEMENT]](s32) +; CHECK: G_STORE [[FIRST_STACK_ELEMENT]](s32), [[FIRST_STACK_ARG_ADDR]]{{.*}}store 4 +; Match the second-to-last offset, so we can get the correct SP for the last element +; CHECK: G_CONSTANT i32 72 +; CHECK: [[SP:%[0-9]+]]:_(p0) = COPY %sp +; CHECK: [[OFF_LAST_ELEMENT:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 +; CHECK: [[LAST_STACK_ARG_ADDR:%[0-9]+]]:_(p0) = G_GEP [[SP]], [[OFF_LAST_ELEMENT]](s32) +; CHECK: G_STORE [[LAST_STACK_ELEMENT]](s32), [[LAST_STACK_ARG_ADDR]]{{.*}}store 4 +; CHECK: BL @tough_arrays_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[RES_ARR:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: ADJCALLSTACKUP 80, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[RES_ARR]](s64) +; CHECK: %r0 = COPY [[R0]] +; CHECK: %r1 = COPY [[R1]] +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 +entry: + %r = notail call arm_aapcscc [2 x i32*] @tough_arrays_target([6 x [4 x i32]] %arr) + ret [2 x i32*] %r +} + +declare arm_aapcscc {i32, i32} @structs_target({i32, i32}) + +define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x) { +; CHECK-LABEL: test_structs +; CHECK: liveins: %r0, %r1 +; CHECK-DAG: [[X0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK-DAG: [[X1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[X:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[X0]](s32), [[X1]](s32) +; CHECK: ADJCALLSTACKDOWN 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[X0:%[0-9]+]]:_(s32), [[X1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[X]](s64) +; CHECK-DAG: %r0 = COPY [[X0]](s32) +; CHECK-DAG: %r1 = COPY [[X1]](s32) +; CHECK: BL @structs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit-def %r0, implicit-def %r1 +; CHECK: [[R0:%[0-9]+]]:_(s32) = COPY %r0 +; CHECK: [[R1:%[0-9]+]]:_(s32) = COPY %r1 +; CHECK: [[R:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[R0]](s32), [[R1]](s32) +; CHECK: ADJCALLSTACKUP 0, 0, 14, _, implicit-def %sp, implicit %sp +; CHECK: [[R0:%[0-9]+]]:_(s32), [[R1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[R]](s64) +; CHECK: %r0 = COPY [[R0]](s32) +; CHECK: %r1 = COPY [[R1]](s32) +; CHECK: BX_RET 14, _, implicit %r0, implicit %r1 + %r = notail call arm_aapcscc {i32, i32} @structs_target({i32, i32} %x) + ret {i32, i32} %r +} diff --git a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index eb6aabb63e0f7..6a0d85737e3b2 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -1,17 +1,8 @@ # RUN: llc -mtriple arm-- -global-isel -run-pass=regbankselect %s -o - | FileCheck %s --- | define void @test_add_s32() { ret void } - define void @test_add_s16() { ret void } - define void @test_add_s8() { ret void } - define void @test_add_s1() { ret void } - define void @test_sub_s32() { ret void } - define void @test_sub_s16() { ret void } - define void @test_sub_s8() { ret void } - define void @test_mul_s32() { ret void } - define void @test_mul_s16() { ret void } - define void @test_mul_s8() { ret void } define void @test_sdiv_s32() #1 { ret void } define void @test_udiv_s32() #1 { ret void } @@ -20,6 +11,10 @@ define void @test_or_s32() { ret void} define void @test_xor_s32() { ret void} + define void @test_lshr_s32() { ret void } + define void @test_ashr_s32() { ret void } + define void @test_shl_s32() { ret void } + define void @test_loads() #0 { ret void } define void @test_stores() #0 { ret void } @@ -78,111 +73,6 @@ body: | %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 -... ---- -name: test_add_s16 -# CHECK-LABEL: name: test_add_s16 -legalized: true -regBankSelected: false -selected: false -# CHECK: registers: -# CHECK: - { id: 0, class: gprb, preferred-register: '' } -# CHECK: - { id: 1, class: gprb, preferred-register: '' } -# CHECK: - { id: 2, class: gprb, preferred-register: '' } -# CHECK: - { id: 3, class: gprb, preferred-register: '' } -# CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } - -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } - - { id: 5, class: _ } -body: | - bb.0: - liveins: %r0, %r1 - - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 - %2(s32) = G_ANYEXT %0(s16) - %3(s32) = G_ANYEXT %1(s16) - %4(s32) = G_ADD %2, %3 - %5(s16) = G_TRUNC %4(s32) - %r0 = COPY %5(s16) - BX_RET 14, _, implicit %r0 - -... ---- -name: test_add_s8 -# CHECK-LABEL: name: test_add_s8 -legalized: true -regBankSelected: false -selected: false -# CHECK: registers: -# CHECK: - { id: 0, class: gprb, preferred-register: '' } -# CHECK: - { id: 1, class: gprb, preferred-register: '' } -# CHECK: - { id: 2, class: gprb, preferred-register: '' } -# CHECK: - { id: 3, class: gprb, preferred-register: '' } -# CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } - -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } - - { id: 5, class: _ } -body: | - bb.0: - liveins: %r0, %r1 - - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 - %2(s32) = G_ANYEXT %0(s8) - %3(s32) = G_ANYEXT %1(s8) - %4(s32) = G_ADD %2, %3 - %5(s8) = G_TRUNC %4(s32) - %r0 = COPY %5(s8) - BX_RET 14, _, implicit %r0 - -... ---- -name: test_add_s1 -# CHECK-LABEL: name: test_add_s1 -legalized: true -regBankSelected: false -selected: false -# CHECK: registers: -# CHECK: - { id: 0, class: gprb, preferred-register: '' } -# CHECK: - { id: 1, class: gprb, preferred-register: '' } -# CHECK: - { id: 2, class: gprb, preferred-register: '' } -# CHECK: - { id: 3, class: gprb, preferred-register: '' } -# CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } - -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } - - { id: 5, class: _ } -body: | - bb.0: - liveins: %r0, %r1 - - %0(s1) = COPY %r0 - %1(s1) = COPY %r1 - %2(s32) = G_ANYEXT %0(s1) - %3(s32) = G_ANYEXT %1(s1) - %4(s32) = G_ADD %2, %3 - %5(s1) = G_TRUNC %4(s32) - %r0 = COPY %5(s1) - BX_RET 14, _, implicit %r0 - ... --- name: test_sub_s32 @@ -211,43 +101,8 @@ body: | ... --- -name: test_sub_s16 -# CHECK-LABEL: name: test_sub_s16 -legalized: true -regBankSelected: false -selected: false -# CHECK: registers: -# CHECK: - { id: 0, class: gprb, preferred-register: '' } -# CHECK: - { id: 1, class: gprb, preferred-register: '' } -# CHECK: - { id: 2, class: gprb, preferred-register: '' } -# CHECK: - { id: 3, class: gprb, preferred-register: '' } -# CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } - -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } - - { id: 5, class: _ } -body: | - bb.0: - liveins: %r0, %r1 - - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 - %2(s32) = G_ANYEXT %0(s16) - %3(s32) = G_ANYEXT %1(s16) - %4(s32) = G_SUB %2, %3 - %5(s16) = G_TRUNC %4(s32) - %r0 = COPY %5(s16) - BX_RET 14, _, implicit %r0 - -... ---- -name: test_sub_s8 -# CHECK-LABEL: name: test_sub_s8 +name: test_mul_s32 +# CHECK-LABEL: name: test_mul_s32 legalized: true regBankSelected: false selected: false @@ -255,34 +110,25 @@ selected: false # CHECK: - { id: 0, class: gprb, preferred-register: '' } # CHECK: - { id: 1, class: gprb, preferred-register: '' } # CHECK: - { id: 2, class: gprb, preferred-register: '' } -# CHECK: - { id: 3, class: gprb, preferred-register: '' } -# CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } - - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 - %2(s32) = G_ANYEXT %0(s8) - %3(s32) = G_ANYEXT %1(s8) - %4(s32) = G_SUB %2, %3 - %5(s8) = G_TRUNC %4(s32) - %r0 = COPY %5(s8) + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_MUL %0, %1 + %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... --- -name: test_mul_s32 -# CHECK-LABEL: name: test_mul_s32 +name: test_sdiv_s32 +# CHECK-LABEL: name: test_sdiv_s32 legalized: true regBankSelected: false selected: false @@ -301,14 +147,14 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - %2(s32) = G_MUL %0, %1 + %2(s32) = G_SDIV %0, %1 %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... --- -name: test_mul_s16 -# CHECK-LABEL: name: test_mul_s16 +name: test_udiv_s32 +# CHECK-LABEL: name: test_udiv_s32 legalized: true regBankSelected: false selected: false @@ -316,34 +162,25 @@ selected: false # CHECK: - { id: 0, class: gprb, preferred-register: '' } # CHECK: - { id: 1, class: gprb, preferred-register: '' } # CHECK: - { id: 2, class: gprb, preferred-register: '' } -# CHECK: - { id: 3, class: gprb, preferred-register: '' } -# CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } - - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s16) = COPY %r0 - %1(s16) = COPY %r1 - %2(s32) = G_ANYEXT %0(s16) - %3(s32) = G_ANYEXT %1(s16) - %4(s32) = G_MUL %2, %3 - %5(s16) = G_TRUNC %4(s32) - %r0 = COPY %5(s16) + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_UDIV %0, %1 + %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... --- -name: test_mul_s8 -# CHECK-LABEL: name: test_mul_s8 +name: test_and_s32 +# CHECK-LABEL: name: test_and_s32 legalized: true regBankSelected: false selected: false @@ -351,34 +188,25 @@ selected: false # CHECK: - { id: 0, class: gprb, preferred-register: '' } # CHECK: - { id: 1, class: gprb, preferred-register: '' } # CHECK: - { id: 2, class: gprb, preferred-register: '' } -# CHECK: - { id: 3, class: gprb, preferred-register: '' } -# CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } - - { id: 3, class: _ } - - { id: 4, class: _ } - - { id: 5, class: _ } body: | bb.0: liveins: %r0, %r1 - %0(s8) = COPY %r0 - %1(s8) = COPY %r1 - %2(s32) = G_ANYEXT %0(s8) - %3(s32) = G_ANYEXT %1(s8) - %4(s32) = G_MUL %2, %3 - %5(s8) = G_TRUNC %4(s32) - %r0 = COPY %5(s8) + %0(s32) = COPY %r0 + %1(s32) = COPY %r1 + %2(s32) = G_AND %0, %1 + %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... --- -name: test_sdiv_s32 -# CHECK-LABEL: name: test_sdiv_s32 +name: test_or_s32 +# CHECK-LABEL: name: test_or_s32 legalized: true regBankSelected: false selected: false @@ -397,14 +225,14 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - %2(s32) = G_SDIV %0, %1 + %2(s32) = G_OR %0, %1 %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... --- -name: test_udiv_s32 -# CHECK-LABEL: name: test_udiv_s32 +name: test_xor_s32 +# CHECK-LABEL: name: test_xor_s32 legalized: true regBankSelected: false selected: false @@ -423,14 +251,14 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - %2(s32) = G_UDIV %0, %1 + %2(s32) = G_XOR %0, %1 %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... --- -name: test_and_s32 -# CHECK-LABEL: name: test_and_s32 +name: test_lshr_s32 +# CHECK-LABEL: name: test_lshr_s32 legalized: true regBankSelected: false selected: false @@ -449,14 +277,14 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - %2(s32) = G_AND %0, %1 + %2(s32) = G_LSHR %0, %1 %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... --- -name: test_or_s32 -# CHECK-LABEL: name: test_or_s32 +name: test_ashr_s32 +# CHECK-LABEL: name: test_ashr_s32 legalized: true regBankSelected: false selected: false @@ -475,14 +303,14 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - %2(s32) = G_OR %0, %1 + %2(s32) = G_ASHR %0, %1 %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... --- -name: test_xor_s32 -# CHECK-LABEL: name: test_xor_s32 +name: test_shl_s32 +# CHECK-LABEL: name: test_shl_s32 legalized: true regBankSelected: false selected: false @@ -501,7 +329,7 @@ body: | %0(s32) = COPY %r0 %1(s32) = COPY %r1 - %2(s32) = G_XOR %0, %1 + %2(s32) = G_SHL %0, %1 %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 @@ -567,15 +395,15 @@ registers: - { id: 6, class: _ } body: | bb.0: - liveins: %r0, %r1, %r2, %r3, %r4, %r5, %d6 + liveins: %r0, %r1, %r5, %d6 %0(p0) = COPY %r0 %1(s32) = COPY %r1 G_STORE %1(s32), %0 :: (store 4) - %2(s16) = COPY %r2 + %2(s16) = G_TRUNC %1(s32) G_STORE %2(s16), %0 :: (store 2) - %3(s8) = COPY %r3 + %3(s8) = G_TRUNC %1(s32) G_STORE %3(s8), %0 :: (store 1) - %4(s1) = COPY %r4 + %4(s1) = G_TRUNC %1(s32) G_STORE %4(s1), %0 :: (store 1) %5(p0) = COPY %r5 G_STORE %5(p0), %0 :: (store 4) @@ -683,16 +511,19 @@ selected: false # CHECK: registers: # CHECK: - { id: 0, class: gprb, preferred-register: '' } # CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } + - { id: 2, class: _ } body: | bb.0: liveins: %r0 - %0(s8) = COPY %r0 - %1(s32) = G_ANYEXT %0(s8) - %r0 = COPY %1(s32) + %0(s32) = COPY %r0 + %1(s8) = G_TRUNC %0(s32) + %2(s32) = G_ANYEXT %1(s8) + %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... --- @@ -704,16 +535,19 @@ selected: false # CHECK: registers: # CHECK: - { id: 0, class: gprb, preferred-register: '' } # CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } + - { id: 2, class: _ } body: | bb.0: liveins: %r0 - %0(s16) = COPY %r0 - %1(s32) = G_ANYEXT %0(s16) - %r0 = COPY %1(s32) + %0(s32) = COPY %r0 + %1(s16) = G_TRUNC %0(s32) + %2(s32) = G_ANYEXT %1(s16) + %r0 = COPY %2(s32) BX_RET 14, _, implicit %r0 ... --- @@ -725,17 +559,20 @@ selected: false # CHECK: registers: # CHECK: - { id: 0, class: gprb, preferred-register: '' } # CHECK: - { id: 1, class: gprb, preferred-register: '' } +# CHECK: - { id: 2, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } + - { id: 2, class: _ } body: | bb.0: - liveins: %r0 + liveins: %r0, %r1 %0(s32) = COPY %r0 + %2(p0) = COPY %r1 %1(s16) = G_TRUNC %0(s32) - %r0 = COPY %1(s16) - BX_RET 14, _, implicit %r0 + G_STORE %1(s16), %2 :: (store 2) + BX_RET 14, _ ... --- name: test_icmp_eq_s32 @@ -747,6 +584,7 @@ selected: false # CHECK: - { id: 0, class: gprb, preferred-register: '' } # CHECK: - { id: 1, class: gprb, preferred-register: '' } # CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -775,6 +613,7 @@ selected: false # CHECK: - { id: 0, class: fprb, preferred-register: '' } # CHECK: - { id: 1, class: fprb, preferred-register: '' } # CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -803,6 +642,7 @@ selected: false # CHECK: - { id: 0, class: fprb, preferred-register: '' } # CHECK: - { id: 1, class: fprb, preferred-register: '' } # CHECK: - { id: 2, class: gprb, preferred-register: '' } +# CHECK: - { id: 3, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -832,21 +672,24 @@ selected: false # CHECK: - { id: 1, class: gprb, preferred-register: '' } # CHECK: - { id: 2, class: gprb, preferred-register: '' } # CHECK: - { id: 3, class: gprb, preferred-register: '' } +# CHECK: - { id: 4, class: gprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } - { id: 2, class: _ } - { id: 3, class: _ } + - { id: 4, class: _ } body: | bb.0: liveins: %r0, %r1, %r2 %0(s32) = COPY %r0 %1(s32) = COPY %r1 - %2(s1) = COPY %r2 - %3(s32) = G_SELECT %2(s1), %0, %1 - %r0 = COPY %3(s32) + %2(s32) = COPY %r2 + %3(s1) = G_TRUNC %2(s32) + %4(s32) = G_SELECT %3(s1), %0, %1 + %r0 = COPY %4(s32) BX_RET 14, _, implicit %r0 ... @@ -859,7 +702,9 @@ regBankSelected: false selected: false registers: - { id: 0, class: _ } + - { id: 1, class: _ } # CHECK: { id: 0, class: gprb, preferred-register: '' } +# CHECK: { id: 1, class: gprb, preferred-register: '' } # Check that we map the condition of the G_BRCOND into the GPR. # For the G_BR, there are no registers to map, but make sure we don't crash. body: | @@ -867,8 +712,9 @@ body: | successors: %bb.1(0x40000000), %bb.2(0x40000000) liveins: %r0 - %0(s1) = COPY %r0 - G_BRCOND %0(s1), %bb.1 + %0(s32) = COPY %r0 + %1(s1) = G_TRUNC %0(s32) + G_BRCOND %1(s1), %bb.1 G_BR %bb.2 bb.1: diff --git a/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir b/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir index dacd227df62b1..3ef1b61211c0a 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-select-globals-pic.mir @@ -28,12 +28,12 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_global - ; DARWIN-MOVT: [[G:%[0-9]+]] = MOV_ga_pcrel {{.*}}@internal_global - ; DARWIN-NOMOVT: [[G:%[0-9]+]] = LDRLIT_ga_pcrel {{.*}}@internal_global - ; ELF: [[G:%[0-9]+]] = LDRLIT_ga_pcrel {{.*}}@internal_global + ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel {{.*}}@internal_global + ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel {{.*}}@internal_global + ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel {{.*}}@internal_global %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]] = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_global) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] @@ -54,12 +54,12 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global - ; DARWIN-MOVT: [[G:%[0-9]+]] = MOV_ga_pcrel_ldr {{.*}} @external_global :: (load 4 from got) - ; DARWIN-NOMOVT: [[G:%[0-9]+]] = LDRLIT_ga_pcrel_ldr {{.*}}@external_global :: (load 4 from got) - ; ELF: [[G:%[0-9]+]] = LDRLIT_ga_pcrel_ldr @external_global :: (load 4 from got) + ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel_ldr {{.*}} @external_global :: (load 4 from got) + ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr {{.*}}@external_global :: (load 4 from got) + ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr @external_global :: (load 4 from got) %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]] = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_global) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] @@ -80,12 +80,12 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_constant - ; DARWIN-MOVT: [[G:%[0-9]+]] = MOV_ga_pcrel {{.*}}@internal_constant - ; DARWIN-NOMOVT: [[G:%[0-9]+]] = LDRLIT_ga_pcrel {{.*}}@internal_constant - ; ELF: [[G:%[0-9]+]] = LDRLIT_ga_pcrel {{.*}}@internal_constant + ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel {{.*}}@internal_constant + ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel {{.*}}@internal_constant + ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel {{.*}}@internal_constant %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_constant) - ; CHECK: [[V:%[0-9]+]] = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_constant) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] @@ -106,12 +106,12 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_constant - ; DARWIN-MOVT: [[G:%[0-9]+]] = MOV_ga_pcrel_ldr {{.*}} @external_constant :: (load 4 from got) - ; DARWIN-NOMOVT: [[G:%[0-9]+]] = LDRLIT_ga_pcrel_ldr {{.*}}@external_constant :: (load 4 from got) - ; ELF: [[G:%[0-9]+]] = LDRLIT_ga_pcrel_ldr @external_constant :: (load 4 from got) + ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel_ldr {{.*}} @external_constant :: (load 4 from got) + ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr {{.*}}@external_constant :: (load 4 from got) + ; ELF: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel_ldr @external_constant :: (load 4 from got) %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_constant) - ; CHECK: [[V:%[0-9]+]] = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_constant) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] diff --git a/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir b/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir index c31893cf22994..e80700317e004 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-select-globals-ropi-rwpi.mir @@ -36,14 +36,14 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_global - ; RW-DEFAULT-MOVT: [[G:%[0-9]+]] = MOVi32imm @internal_global - ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]] = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) - ; RWPI-MOVT: [[OFF:%[0-9]+]] = MOVi32imm {{.*}} @internal_global - ; RWPI-NOMOVT: [[OFF:%[0-9]+]] = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) - ; RWPI: [[G:%[0-9]+]] = ADDrr %r9, [[OFF]], 14, _, _ + ; RW-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_global + ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; RWPI-MOVT: [[OFF:%[0-9]+]]:gpr = MOVi32imm {{.*}} @internal_global + ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; RWPI: [[G:%[0-9]+]]:gpr = ADDrr %r9, [[OFF]], 14, _, _ %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]] = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_global) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] @@ -70,14 +70,14 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global - ; RW-DEFAULT-MOVT: [[G:%[0-9]+]] = MOVi32imm @external_global - ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]] = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) - ; RWPI-MOVT: [[OFF:%[0-9]+]] = MOVi32imm {{.*}} @external_global - ; RWPI-NOMOVT: [[OFF:%[0-9]+]] = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) - ; RWPI: [[G:%[0-9]+]] = ADDrr %r9, [[OFF]], 14, _, _ + ; RW-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_global + ; RW-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; RWPI-MOVT: [[OFF:%[0-9]+]]:gpr = MOVi32imm {{.*}} @external_global + ; RWPI-NOMOVT: [[OFF:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; RWPI: [[G:%[0-9]+]]:gpr = ADDrr %r9, [[OFF]], 14, _, _ %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]] = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_global) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_global) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] @@ -101,13 +101,13 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_constant - ; ROPI-MOVT: [[G:%[0-9]+]] = MOV_ga_pcrel @internal_constant - ; ROPI-NOMOVT: [[G:%[0-9]+]] = LDRLIT_ga_pcrel @internal_constant - ; RO-DEFAULT-MOVT: [[G:%[0-9]+]] = MOVi32imm @internal_constant - ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]] = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; ROPI-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel @internal_constant + ; ROPI-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel @internal_constant + ; RO-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_constant + ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_constant) - ; CHECK: [[V:%[0-9]+]] = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @internal_constant) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] @@ -131,13 +131,13 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_constant - ; ROPI-MOVT: [[G:%[0-9]+]] = MOV_ga_pcrel @external_constant - ; ROPI-NOMOVT: [[G:%[0-9]+]] = LDRLIT_ga_pcrel @external_constant - ; RO-DEFAULT-MOVT: [[G:%[0-9]+]] = MOVi32imm @external_constant - ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]] = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; ROPI-MOVT: [[G:%[0-9]+]]:gpr = MOV_ga_pcrel @external_constant + ; ROPI-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_pcrel @external_constant + ; RO-DEFAULT-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_constant + ; RO-DEFAULT-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_constant) - ; CHECK: [[V:%[0-9]+]] = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_constant) + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ :: (load 4 from @external_constant) %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] diff --git a/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir b/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir index 9cb402df30a4d..034b88296dc12 100644 --- a/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir +++ b/test/CodeGen/ARM/GlobalISel/arm-select-globals-static.mir @@ -25,13 +25,13 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @internal_global - ; ELF-MOVT: [[G:%[0-9]+]] = MOVi32imm @internal_global - ; ELF-NOMOVT: [[G:%[0-9]+]] = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) - ; DARWIN-MOVT: [[G:%[0-9]+]] = MOVi32imm @internal_global - ; DARWIN-NOMOVT: [[G:%[0-9]+]] = LDRLIT_ga_abs @internal_global + ; ELF-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_global + ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @internal_global + ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_abs @internal_global %1(s32) = G_LOAD %0(p0) :: (load 4 from @internal_global) - ; CHECK: [[V:%[0-9]+]] = LDRi12 [[G]], 0, 14, _ + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] @@ -55,13 +55,13 @@ registers: body: | bb.0: %0(p0) = G_GLOBAL_VALUE @external_global - ; ELF-MOVT: [[G:%[0-9]+]] = MOVi32imm @external_global - ; ELF-NOMOVT: [[G:%[0-9]+]] = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) - ; DARWIN-MOVT: [[G:%[0-9]+]] = MOVi32imm @external_global - ; DARWIN-NOMOVT: [[G:%[0-9]+]] = LDRLIT_ga_abs @external_global + ; ELF-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_global + ; ELF-NOMOVT: [[G:%[0-9]+]]:gpr = LDRi12 %const.0, 0, 14, _ :: (load 4 from constant-pool) + ; DARWIN-MOVT: [[G:%[0-9]+]]:gpr = MOVi32imm @external_global + ; DARWIN-NOMOVT: [[G:%[0-9]+]]:gpr = LDRLIT_ga_abs @external_global %1(s32) = G_LOAD %0(p0) :: (load 4 from @external_global) - ; CHECK: [[V:%[0-9]+]] = LDRi12 [[G]], 0, 14, _ + ; CHECK: [[V:%[0-9]+]]:gpr = LDRi12 [[G]], 0, 14, _ %r0 = COPY %1(s32) ; CHECK: %r0 = COPY [[V]] diff --git a/test/CodeGen/ARM/cmp.ll b/test/CodeGen/ARM/cmp.ll new file mode 100644 index 0000000000000..e12a096b8c0b1 --- /dev/null +++ b/test/CodeGen/ARM/cmp.ll @@ -0,0 +1,154 @@ +; RUN: llc -mtriple=armv7 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s --check-prefix=CHECK-T2 + +define i1 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: mov r2, #0 +; CHECK: cmp r0, r1 +; CHECK: movwne r2, #1 +; CHECK: mov r0, r2 +; CHECK-T2: mov{{.*}} r2, #0 +; CHECK-T2: cmp r0, r1 +; CHECK-T2: movne r2, #1 +; CHECK-T2: mov r0, r2 + %tmp = icmp ne i32 %a, %b + ret i1 %tmp +} + +define i1 @f2(i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: mov r2, #0 +; CHECK: cmp r0, r1 +; CHECK: movweq r2, #1 +; CHECK: mov r0, r2 +; CHECK-T2: mov{{.*}} r2, #0 +; CHECK-T2: cmp r0, r1 +; CHECK-T2: moveq r2, #1 +; CHECK-T2: mov r0, r2 + %tmp = icmp eq i32 %a, %b + ret i1 %tmp +} + +define i1 @f6(i32 %a, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: mov r2, #0 +; CHECK: cmp {{.*}}, r1, lsl #5 +; CHECK: movweq r2, #1 +; CHECK: mov r0, r2 +; CHECK-T2: mov{{.*}} r2, #0 +; CHECK-T2: cmp.w r0, r1, lsl #5 +; CHECK-T2: moveq r2, #1 +; CHECK-T2: mov r0, r2 + %tmp = shl i32 %b, 5 + %tmp1 = icmp eq i32 %a, %tmp + ret i1 %tmp1 +} + +define i1 @f7(i32 %a, i32 %b) { +; CHECK-LABEL: f7: +; CHECK: mov r2, #0 +; CHECK: cmp r0, r1, lsr #6 +; CHECK: movwne r2, #1 +; CHECK: mov r0, r2 +; CHECK-T2: mov{{.*}} r2, #0 +; CHECK-T2: cmp.w r0, r1, lsr #6 +; CHECK-T2: movne r2, #1 +; CHECK-T2: mov r0, r2 + %tmp = lshr i32 %b, 6 + %tmp1 = icmp ne i32 %a, %tmp + ret i1 %tmp1 +} + +define i1 @f8(i32 %a, i32 %b) { +; CHECK-LABEL: f8: +; CHECK: mov r2, #0 +; CHECK: cmp r0, r1, asr #7 +; CHECK: movweq r2, #1 +; CHECK: mov r0, r2 +; CHECK-T2: mov{{.*}} r2, #0 +; CHECK-T2: cmp.w r0, r1, asr #7 +; CHECK-T2: moveq r2, #1 +; CHECK-T2: mov r0, r2 + %tmp = ashr i32 %b, 7 + %tmp1 = icmp eq i32 %a, %tmp + ret i1 %tmp1 +} + +define i1 @f9(i32 %a) { +; CHECK-LABEL: f9: +; CHECK: mov r1, #0 +; CHECK: cmp r0, r0, ror #8 +; CHECK: movwne r1, #1 +; CHECK: mov r0, r1 +; CHECK-T2: mov{{.*}} r1, #0 +; CHECK-T2: cmp.w r0, r0, ror #8 +; CHECK-T2: movne r1, #1 +; CHECK-T2: mov r0, r1 + %l8 = shl i32 %a, 24 + %r8 = lshr i32 %a, 8 + %tmp = or i32 %l8, %r8 + %tmp1 = icmp ne i32 %a, %tmp + ret i1 %tmp1 +} + +; CHECK-LABEL: swap_cmp_shl +; CHECK: mov r2, #0 +; CHECK: cmp r1, r0, lsl #11 +; CHECK: movwlt r2, #1 +; CHECK-T2: mov{{.*}} r2, #0 +; CHECK-T2: cmp.w r1, r0, lsl #11 +; CHECK-T2: movlt r2, #1 +define arm_aapcscc i32 @swap_cmp_shl(i32 %a, i32 %b) { +entry: + %shift = shl i32 %a, 11 + %cmp = icmp sgt i32 %shift, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: swap_cmp_lshr +; CHECK: mov r2, #0 +; CHECK: cmp r1, r0, lsr #11 +; CHECK: movwhi r2, #1 +; CHECK-T2: mov{{.*}} r2, #0 +; CHECK-T2: cmp.w r1, r0, lsr #11 +; CHECK-T2: movhi r2, #1 +define arm_aapcscc i32 @swap_cmp_lshr(i32 %a, i32 %b) { +entry: + %shift = lshr i32 %a, 11 + %cmp = icmp ult i32 %shift, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: swap_cmp_ashr +; CHECK: mov r2, #0 +; CHECK: cmp r1, r0, asr #11 +; CHECK: movwle r2, #1 +; CHECK-T2: mov{{.*}} r2, #0 +; CHECK-T2: cmp.w r1, r0, asr #11 +; CHECK-T2: movle r2, #1 +define arm_aapcscc i32 @swap_cmp_ashr(i32 %a, i32 %b) { +entry: + %shift = ashr i32 %a, 11 + %cmp = icmp sge i32 %shift, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +; CHECK-LABEL: swap_cmp_rotr +; CHECK: mov r2, #0 +; CHECK: cmp r1, r0, ror #11 +; CHECK: movwls r2, #1 +; CHECK-T2: mov{{.*}} r2, #0 +; CHECK-T2: cmp.w r1, r0, ror #11 +; CHECK-T2: movls r2, #1 +define arm_aapcscc i32 @swap_cmp_rotr(i32 %a, i32 %b) { +entry: + %lsr = lshr i32 %a, 11 + %lsl = shl i32 %a, 21 + %ror = or i32 %lsr, %lsl + %cmp = icmp uge i32 %ror, %b + %conv = zext i1 %cmp to i32 + ret i32 %conv +} diff --git a/test/CodeGen/ARM/cortex-a57-misched-alu.ll b/test/CodeGen/ARM/cortex-a57-misched-alu.ll index 960ee87532b0b..2ced60fbf0d31 100644 --- a/test/CodeGen/ARM/cortex-a57-misched-alu.ll +++ b/test/CodeGen/ARM/cortex-a57-misched-alu.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=+use-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=POST-MISCHED ; Check the latency for ALU shifted operand variants. ; @@ -60,6 +61,8 @@ ; CHECK: Ready ; CHECK-NEXT: A57UnitI +; Check that post RA MI scheduler is invoked with +use-misched +; POST-MISCHED: Before post-MI-sched target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv8r-arm-none-eabi" diff --git a/test/CodeGen/ARM/imm-peephole-arm.mir b/test/CodeGen/ARM/imm-peephole-arm.mir index cd30bdb74d571..95ae58ff9bdb6 100644 --- a/test/CodeGen/ARM/imm-peephole-arm.mir +++ b/test/CodeGen/ARM/imm-peephole-arm.mir @@ -1,17 +1,17 @@ # RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s -# CHECK: [[IN:%.*]] = COPY %r0 -# CHECK: [[SUM1TMP:%.*]] = ADDri [[IN]], 133 -# CHECK: [[SUM1:%.*]] = ADDri killed [[SUM1TMP]], 25600 +# CHECK: [[IN:%.*]]:gprnopc = COPY %r0 +# CHECK: [[SUM1TMP:%.*]]:rgpr = ADDri [[IN]], 133 +# CHECK: [[SUM1:%.*]]:rgpr = ADDri killed [[SUM1TMP]], 25600 -# CHECK: [[SUM2TMP:%.*]] = SUBri [[IN]], 133 -# CHECK: [[SUM2:%.*]] = SUBri killed [[SUM2TMP]], 25600 +# CHECK: [[SUM2TMP:%.*]]:rgpr = SUBri [[IN]], 133 +# CHECK: [[SUM2:%.*]]:rgpr = SUBri killed [[SUM2TMP]], 25600 -# CHECK: [[SUM3TMP:%.*]] = SUBri [[IN]], 133 -# CHECK: [[SUM3:%.*]] = SUBri killed [[SUM3TMP]], 25600 +# CHECK: [[SUM3TMP:%.*]]:rgpr = SUBri [[IN]], 133 +# CHECK: [[SUM3:%.*]]:rgpr = SUBri killed [[SUM3TMP]], 25600 -# CHECK: [[SUM4TMP:%.*]] = ADDri killed [[IN]], 133 -# CHECK: [[SUM4:%.*]] = ADDri killed [[SUM4TMP]], 25600 +# CHECK: [[SUM4TMP:%.*]]:rgpr = ADDri killed [[IN]], 133 +# CHECK: [[SUM4:%.*]]:rgpr = ADDri killed [[SUM4TMP]], 25600 --- | @@ -57,4 +57,3 @@ body: | BX_RET 14, _, implicit %r0 ... - diff --git a/test/CodeGen/ARM/imm-peephole-thumb.mir b/test/CodeGen/ARM/imm-peephole-thumb.mir index 3d342902d80d1..553717ba74ac5 100644 --- a/test/CodeGen/ARM/imm-peephole-thumb.mir +++ b/test/CodeGen/ARM/imm-peephole-thumb.mir @@ -1,17 +1,17 @@ # RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s -# CHECK: [[IN:%.*]] = COPY %r0 -# CHECK: [[SUM1TMP:%.*]] = t2ADDri [[IN]], 25600 -# CHECK: [[SUM1:%.*]] = t2ADDri killed [[SUM1TMP]], 133 +# CHECK: [[IN:%.*]]:gprnopc = COPY %r0 +# CHECK: [[SUM1TMP:%.*]]:rgpr = t2ADDri [[IN]], 25600 +# CHECK: [[SUM1:%.*]]:rgpr = t2ADDri killed [[SUM1TMP]], 133 -# CHECK: [[SUM2TMP:%.*]] = t2SUBri [[IN]], 25600 -# CHECK: [[SUM2:%.*]] = t2SUBri killed [[SUM2TMP]], 133 +# CHECK: [[SUM2TMP:%.*]]:rgpr = t2SUBri [[IN]], 25600 +# CHECK: [[SUM2:%.*]]:rgpr = t2SUBri killed [[SUM2TMP]], 133 -# CHECK: [[SUM3TMP:%.*]] = t2SUBri [[IN]], 25600 -# CHECK: [[SUM3:%.*]] = t2SUBri killed [[SUM3TMP]], 133 +# CHECK: [[SUM3TMP:%.*]]:rgpr = t2SUBri [[IN]], 25600 +# CHECK: [[SUM3:%.*]]:rgpr = t2SUBri killed [[SUM3TMP]], 133 -# CHECK: [[SUM4TMP:%.*]] = t2ADDri killed [[IN]], 25600 -# CHECK: [[SUM4:%.*]] = t2ADDri killed [[SUM4TMP]], 133 +# CHECK: [[SUM4TMP:%.*]]:rgpr = t2ADDri killed [[IN]], 25600 +# CHECK: [[SUM4:%.*]]:rgpr = t2ADDri killed [[SUM4TMP]], 133 --- | @@ -56,4 +56,3 @@ body: | tBX_RET 14, _, implicit %r0 ... - diff --git a/test/CodeGen/ARM/sched-it-debug-nodes.mir b/test/CodeGen/ARM/sched-it-debug-nodes.mir index 74ea7c63d4b70..c055508e6c7ec 100644 --- a/test/CodeGen/ARM/sched-it-debug-nodes.mir +++ b/test/CodeGen/ARM/sched-it-debug-nodes.mir @@ -125,7 +125,7 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false stack: - - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '%lr' } + - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '%lr', callee-saved-restored: false } - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '%r7' } body: | bb.0.entry: diff --git a/test/CodeGen/ARM/setjmp_longjmp.ll b/test/CodeGen/ARM/setjmp_longjmp.ll index 7100175a97a4e..37ef1f435196b 100644 --- a/test/CodeGen/ARM/setjmp_longjmp.ll +++ b/test/CodeGen/ARM/setjmp_longjmp.ll @@ -1,4 +1,6 @@ ; RUN: llc %s -o - | FileCheck %s +; RUN: llc -mtriple=armv7-linux -exception-model sjlj %s -o - | FileCheck %s -check-prefix CHECK-LINUX +; RUN: llc -mtriple=thumbv7-win32 -exception-model sjlj %s -o - | FileCheck %s -check-prefix CHECK-WIN32 target triple = "armv7-apple-ios" declare i32 @llvm.eh.sjlj.setjmp(i8*) @@ -28,6 +30,16 @@ declare i8* @llvm.stacksave() ; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4] ; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}} ; CHECK-NEXT: bx [[DESTREG]] + +; CHECK-LINUX: ldr sp, [{{\s*}}[[BUFREG:r[0-9]+]], #8] +; CHECK-LINUX-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4] +; CHECK-LINUX-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}} +; CHECK-LINUX-NEXT: ldr r11, {{\[}}[[BUFREG]]{{\]}} +; CHECK-LINUX-NEXT: bx [[DESTREG]] + +; CHECK-WIN32: ldr.w r11, [{{\s*}}[[BUFREG:r[0-9]+]]] +; CHECK-WIN32-NEXT: ldr.w sp, {{\[}}[[BUFREG]], #8] +; CHECK-WIN32-NEXT: ldr.w pc, {{\[}}[[BUFREG]], #4] define void @foobar() { entry: %buf = alloca [5 x i8*], align 4 diff --git a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll index 323d5037138ec..a2b986effba98 100644 --- a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll +++ b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll @@ -4,6 +4,7 @@ ; RUN: llc -mtriple=armv7-apple-ios -O3 < %s | FileCheck %s ; RUN: llc -mtriple=armv7-apple-watchos -O3 < %s | FileCheck %s ; RUN: llc -mtriple=armv7k-apple-ios < %s | FileCheck %s --check-prefix=CHECK-WATCH +; RUN: llc -mtriple=armv7-linux -exception-model sjlj -O3 < %s | FileCheck %s --check-prefix=CHECK-LINUX ; SjLjEHPrepare shouldn't crash when lowering empty structs. ; @@ -17,6 +18,12 @@ entry: ; CHECK: bl __Unwind_SjLj_Register ; CHECK-NEXT: {{[A-Z][a-zA-Z0-9]*}}: ; CHECK-NEXT: bl _bar +; CHECK: bl __Unwind_SjLj_Resume + +; CHECK-LINUX: bl _Unwind_SjLj_Register +; CHECK-LINUX-NEXT: .{{[A-Z][a-zA-Z0-9]*}}: +; CHECK-LINUX-NEXT: bl bar +; CHECK-LINUX: bl _Unwind_SjLj_Resume ; CHECK-WATCH-NOT: bl __Unwind_SjLj_Register diff --git a/test/CodeGen/ARM/thumb1_return_sequence.ll b/test/CodeGen/ARM/thumb1_return_sequence.ll index 67d1cad2cf68f..c54712efb39be 100644 --- a/test/CodeGen/ARM/thumb1_return_sequence.ll +++ b/test/CodeGen/ARM/thumb1_return_sequence.ll @@ -9,6 +9,8 @@ entry: ; -------- ; CHECK-V4T: push {[[SAVED:(r[4567](, )?)+]], lr} ; CHECK-V4T: sub sp, +; Stack is realigned because of the <6 x i32> type +; CHECK-V4T: mov sp, r4 ; CHECK-V5T: push {[[SAVED:(r[4567](, )?)+]], lr} %b = alloca <6 x i32>, align 16 @@ -21,7 +23,8 @@ entry: ; Epilogue ; -------- -; CHECK-V4T: add sp, +; Stack realignment means sp is restored from frame pointer +; CHECK-V4T: mov sp ; CHECK-V4T-NEXT: pop {[[SAVED]]} ; The ISA for v4 does not support pop pc, so make sure we do not emit ; one even when we do not need to update SP. @@ -70,8 +73,9 @@ entry: ; CHECK-V4T-NEXT: mov lr, [[POP_REG]] ; CHECK-V4T-NEXT: mov [[POP_REG]], r12 ; CHECK-V4T: bx lr -; CHECK-V5T: add sp, -; CHECK-V5T-NEXT: pop {[[SAVED]]} +; CHECK-V5T: lsls r4 +; CHECK-V5T-NEXT: mov sp, r4 +; CHECK-V5T: pop {[[SAVED]]} ; CHECK-V5T-NEXT: mov r12, [[POP_REG:r[0-7]]] ; CHECK-V5T-NEXT: pop {[[POP_REG]]} ; CHECK-V5T-NEXT: add sp, diff --git a/test/CodeGen/AVR/atomics/load16.ll b/test/CodeGen/AVR/atomics/load16.ll index ea021c0724b97..2b51afe45f4fa 100644 --- a/test/CodeGen/AVR/atomics/load16.ll +++ b/test/CodeGen/AVR/atomics/load16.ll @@ -3,8 +3,8 @@ ; CHECK-LABEL: atomic_load16 ; CHECK: in r0, 63 ; CHECK-NEXT: cli +; CHECK-NEXT: ld [[RR:r[0-9]+]], [[RD:(X|Y|Z)]]+ ; CHECK-NEXT: ld [[RR:r[0-9]+]], [[RD:(X|Y|Z)]] -; CHECK-NEXT: ldd [[RR:r[0-9]+]], [[RD:(X|Y|Z)]]+ ; CHECK-NEXT: out 63, r0 define i16 @atomic_load16(i16* %foo) { %val = load atomic i16, i16* %foo unordered, align 2 @@ -29,8 +29,8 @@ define i16 @atomic_load_cmp_swap16(i16* %foo) { ; CHECK-LABEL: atomic_load_add16 ; CHECK: in r0, 63 ; CHECK-NEXT: cli -; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]] -; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]]+ +; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]]+ +; CHECK-NEXT: ld [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]] ; CHECK-NEXT: add [[RR1]], [[TMP:r[0-9]+]] ; CHECK-NEXT: adc [[RR2]], [[TMP:r[0-9]+]] ; CHECK-NEXT: st [[RD1]], [[RR1]] @@ -44,8 +44,8 @@ define i16 @atomic_load_add16(i16* %foo) { ; CHECK-LABEL: atomic_load_sub16 ; CHECK: in r0, 63 ; CHECK-NEXT: cli -; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]] -; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]]+ +; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]]+ +; CHECK-NEXT: ld [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]] ; CHECK-NEXT: sub [[RR1]], [[TMP:r[0-9]+]] ; CHECK-NEXT: sbc [[RR2]], [[TMP:r[0-9]+]] ; CHECK-NEXT: st [[RD1]], [[RR1]] @@ -59,8 +59,8 @@ define i16 @atomic_load_sub16(i16* %foo) { ; CHECK-LABEL: atomic_load_and16 ; CHECK: in r0, 63 ; CHECK-NEXT: cli -; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]] -; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]]+ +; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]]+ +; CHECK-NEXT: ld [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]] ; CHECK-NEXT: and [[RR1]], [[TMP:r[0-9]+]] ; CHECK-NEXT: and [[RR2]], [[TMP:r[0-9]+]] ; CHECK-NEXT: st [[RD1]], [[RR1]] @@ -74,8 +74,8 @@ define i16 @atomic_load_and16(i16* %foo) { ; CHECK-LABEL: atomic_load_or16 ; CHECK: in r0, 63 ; CHECK-NEXT: cli -; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]] -; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]]+ +; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]]+ +; CHECK-NEXT: ld [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]] ; CHECK-NEXT: or [[RR1]], [[TMP:r[0-9]+]] ; CHECK-NEXT: or [[RR2]], [[TMP:r[0-9]+]] ; CHECK-NEXT: st [[RD1]], [[RR1]] @@ -89,8 +89,8 @@ define i16 @atomic_load_or16(i16* %foo) { ; CHECK-LABEL: atomic_load_xor16 ; CHECK: in r0, 63 ; CHECK-NEXT: cli -; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]] -; CHECK-NEXT: ldd [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]]+ +; CHECK-NEXT: ld [[RR1:r[0-9]+]], [[RD1:(X|Y|Z)]]+ +; CHECK-NEXT: ld [[RR2:r[0-9]+]], [[RD2:(X|Y|Z)]] ; CHECK-NEXT: eor [[RR1]], [[TMP:r[0-9]+]] ; CHECK-NEXT: eor [[RR2]], [[TMP:r[0-9]+]] ; CHECK-NEXT: st [[RD1]], [[RR1]] diff --git a/test/CodeGen/AVR/branch-relaxation-long.ll b/test/CodeGen/AVR/branch-relaxation-long.ll new file mode 100644 index 0000000000000..2cfc7e812ebcc --- /dev/null +++ b/test/CodeGen/AVR/branch-relaxation-long.ll @@ -0,0 +1,4137 @@ +; RUN: llc < %s -march=avr | FileCheck %s + +; CHECK-LABEL: relax_to_jmp: +; CHECK: cpi r{{[0-9]+}}, 0 +; CHECK: brne [[BB1:LBB[0-9]+_[0-9]+]] +; CHECK: jmp [[BB2:LBB[0-9]+_[0-9]+]] +; CHECK: [[BB1]]: +; CHECK: nop +; CHECK: [[BB2]]: +define i8 @relax_to_jmp(i1 %a) { +entry-block: + br i1 %a, label %hello, label %finished +hello: + ; with >4 kB of instructions (2050 NOPs), this requires a long jump (jmp), + ; versus a relative one (rjmp). + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + br label %finished +finished: + ret i8 3 +} + +; CHECK-LABEL: relax_to_jmp_backwards: +; CHECK: [[BB1:LBB[0-9]+_[0-9]+]] +; CHECK: nop +; CHECK: cpi r{{[0-9]+}}, 0 +; CHECK: breq [[BB2:LBB[0-9]+_[0-9]+]] +; CHECK: jmp [[BB1]] +; CHECK: [[BB2]]: +define i8 @relax_to_jmp_backwards(i1 %a) { +entry-block: + br label %hello +hello: + ; with >4 kB of instructions (2050 NOPs), this requires a long jump (jmp), + ; versus a relative one (rjmp). + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + call void asm sideeffect "nop", ""() + br i1 %a, label %hello, label %finished +finished: + ret i8 3 +} diff --git a/test/CodeGen/AVR/load.ll b/test/CodeGen/AVR/load.ll index f5b15d56e6146..73568b5409656 100644 --- a/test/CodeGen/AVR/load.ll +++ b/test/CodeGen/AVR/load.ll @@ -9,8 +9,8 @@ define i8 @load8(i8* %x) { define i16 @load16(i16* %x) { ; CHECK-LABEL: load16: -; CHECK: ld r24, {{[YZ]}} -; CHECK: ldd r25, {{[YZ]}}+1 +; CHECK: ld r24, {{[XYZ]}}+ +; CHECK: ld r25, {{[XYZ]}} %1 = load i16, i16* %x ret i16 %1 } @@ -45,11 +45,11 @@ define i16 @load16disp(i16* %x) { define i16 @load16nodisp(i16* %x) { ; CHECK-LABEL: load16nodisp: -; CHECK: movw r30, r24 -; CHECK: subi r30, 192 -; CHECK: sbci r31, 255 -; CHECK: ld r24, {{[YZ]}} -; CHECK: ldd r25, {{[YZ]}}+1 +; CHECK: movw r26, r24 +; CHECK: subi r26, 192 +; CHECK: sbci r27, 255 +; CHECK: ld r24, {{[XYZ]}}+ +; CHECK: ld r25, {{[XYZ]}} %1 = getelementptr inbounds i16, i16* %x, i64 32 %2 = load i16, i16* %1 ret i16 %2 diff --git a/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir b/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir index 3e7fdcd400d21..82f1a9a832fcb 100644 --- a/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir +++ b/test/CodeGen/AVR/pseudo/LDWRdPtr-same-src-dst.mir @@ -18,9 +18,9 @@ body: | ; CHECK-LABEL: test_ldwrdptr - ; CHECK: ld [[SCRATCH:r[0-9]+]], Z + ; CHECK: ld [[SCRATCH:r[0-9]+]], Z+ ; CHECK-NEXT: push [[SCRATCH]] - ; CHECK-NEXT: ldd [[SCRATCH]], Z+1 + ; CHECK-NEXT: ld [[SCRATCH]], Z ; CHECK-NEXT: mov r31, [[SCRATCH]] ; CHECK-NEXT: pop r30 diff --git a/test/CodeGen/AVR/pseudo/LDWRdPtr.mir b/test/CodeGen/AVR/pseudo/LDWRdPtr.mir index 6db615878b95f..3a3ec3c2657fe 100644 --- a/test/CodeGen/AVR/pseudo/LDWRdPtr.mir +++ b/test/CodeGen/AVR/pseudo/LDWRdPtr.mir @@ -17,8 +17,8 @@ body: | ; CHECK-LABEL: test_ldwrdptr - ; CHECK: %r0 = LDRdPtr %r31r30 - ; CHECK-NEXT: early-clobber %r1 = LDDRdPtrQ %r31r30, 1 + ; CHECK: %r0, %r31r30 = LDRdPtrPi %r31r30 + ; CHECK-NEXT: %r1 = LDRdPtr %r31r30 %r1r0 = LDWRdPtr %r31r30 ... diff --git a/test/CodeGen/AVR/pseudo/LDWRdPtrPd.mir b/test/CodeGen/AVR/pseudo/LDWRdPtrPd.mir index eb65c6538d110..0c065f83d9a69 100644 --- a/test/CodeGen/AVR/pseudo/LDWRdPtrPd.mir +++ b/test/CodeGen/AVR/pseudo/LDWRdPtrPd.mir @@ -17,8 +17,8 @@ body: | ; CHECK-LABEL: test_ldwrdptrpd - ; CHECK: early-clobber %r1, early-clobber %r31r30 = LDRdPtrPd killed %r31r30 - ; CHECK-NEXT: early-clobber %r0, early-clobber %r31r30 = LDRdPtrPd killed %r31r30 + ; CHECK: early-clobber %r1, %r31r30 = LDRdPtrPd killed %r31r30 + ; CHECK-NEXT: early-clobber %r0, %r31r30 = LDRdPtrPd killed %r31r30 %r1r0, %r31r30 = LDWRdPtrPd %r31r30 ... diff --git a/test/CodeGen/AVR/pseudo/LDWRdPtrPi.mir b/test/CodeGen/AVR/pseudo/LDWRdPtrPi.mir index 50bad2a4c7653..a947d48d0bad7 100644 --- a/test/CodeGen/AVR/pseudo/LDWRdPtrPi.mir +++ b/test/CodeGen/AVR/pseudo/LDWRdPtrPi.mir @@ -17,8 +17,8 @@ body: | ; CHECK-LABEL: test_ldwrdptrpi - ; CHECK: early-clobber %r0, early-clobber %r31r30 = LDRdPtrPi killed %r31r30 - ; CHECK-NEXT: early-clobber %r1, early-clobber %r31r30 = LDRdPtrPi killed %r31r30 + ; CHECK: early-clobber %r0, %r31r30 = LDRdPtrPi killed %r31r30 + ; CHECK-NEXT: early-clobber %r1, %r31r30 = LDRdPtrPi killed %r31r30 %r1r0, %r31r30 = LDWRdPtrPi %r31r30 ... diff --git a/test/CodeGen/AVR/std-ldd-immediate-overflow.ll b/test/CodeGen/AVR/std-ldd-immediate-overflow.ll new file mode 100644 index 0000000000000..290e349c5342f --- /dev/null +++ b/test/CodeGen/AVR/std-ldd-immediate-overflow.ll @@ -0,0 +1,18 @@ +; RUN: llc -O0 < %s -march=avr | FileCheck %s + +define i32 @std_ldd_overflow() { + %src = alloca [4 x i8] + %dst = alloca [4 x i8] + %buf = alloca [28 x i16] + %1 = bitcast [4 x i8]* %src to i32* + store i32 0, i32 *%1 + %2 = bitcast [4 x i8]* %dst to i8* + %3 = bitcast [4 x i8]* %src to i8* + call void @llvm.memcpy.p0i8.p0i8.i16(i8* %2, i8* %3, i16 4, i32 1, i1 false) +; CHECK-NOT: std {{[XYZ]}}+64, {{r[0-9]+}} +; CHECK-NOT: ldd {{r[0-9]+}}, {{[XYZ]}}+64 + + ret i32 0 +} + +declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i32, i1) diff --git a/test/CodeGen/BPF/select_ri.ll b/test/CodeGen/BPF/select_ri.ll index b802b64b7281d..7b1f852ca7966 100644 --- a/test/CodeGen/BPF/select_ri.ll +++ b/test/CodeGen/BPF/select_ri.ll @@ -25,3 +25,38 @@ entry: } attributes #0 = { norecurse nounwind readonly } + +; test immediate out of 32-bit range +; Source file: + +; unsigned long long +; load_word(void *buf, unsigned long long off) +; asm("llvm.bpf.load.word"); +; +; int +; foo(void *buf) +; { +; unsigned long long sum = 0; +; +; sum += load_word(buf, 100); +; sum += load_word(buf, 104); +; +; if (sum != 0x1ffffffffULL) +; return ~0U; +; +; return 0; +;} + +; Function Attrs: nounwind readonly +define i32 @foo(i8*) local_unnamed_addr #0 { + %2 = tail call i64 @llvm.bpf.load.word(i8* %0, i64 100) + %3 = tail call i64 @llvm.bpf.load.word(i8* %0, i64 104) + %4 = add i64 %3, %2 + %5 = icmp ne i64 %4, 8589934591 +; CHECK: r{{[0-9]+}} = 8589934591 ll + %6 = sext i1 %5 to i32 + ret i32 %6 +} + +; Function Attrs: nounwind readonly +declare i64 @llvm.bpf.load.word(i8*, i64) #1 diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll index 804e5b0ce9fca..8207fa8ce0f12 100644 --- a/test/CodeGen/Generic/MachineBranchProb.ll +++ b/test/CodeGen/Generic/MachineBranchProb.ll @@ -7,6 +7,8 @@ ; Bug: PR31899 ; XFAIL: avr +declare void @foo() + ; Make sure we have the correct weight attached to each successor. define i32 @test2(i32 %x) nounwind uwtable readnone ssp { ; CHECK-LABEL: Machine code for function test2: @@ -26,6 +28,8 @@ entry: ; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}36.36%) BB#3({{[0-9a-fx/= ]+}}63.64%) sw.bb: +; this call will prevent simplifyCFG from optimizing the block away in ARM/AArch64. + tail call void @foo() br label %return sw.bb1: diff --git a/test/CodeGen/Hexagon/PR33749.ll b/test/CodeGen/Hexagon/PR33749.ll new file mode 100644 index 0000000000000..7f8533054e88c --- /dev/null +++ b/test/CodeGen/Hexagon/PR33749.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; This testcase used to fail with "cannot select 'i1 = add x, y'". +; Check for some sane output: +; CHECK: xor(p{{[0-3]}},p{{[0-3]}}) + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define void @foo(i32* nocapture %a0) local_unnamed_addr #0 { +b1: + %v2 = getelementptr inbounds i32, i32* %a0, i32 26 + %v3 = load i32, i32* %v2, align 4 + %v4 = add nsw i32 %v3, 1 + %v5 = load i32, i32* %a0, align 4 + br label %b6 + +b6: ; preds = %b28, %b1 + %v7 = phi i32 [ %v29, %b28 ], [ %v5, %b1 ] + %v8 = mul nsw i32 %v4, %v7 + %v9 = add nsw i32 %v8, %v7 + %v10 = mul i32 %v7, %v7 + %v11 = mul i32 %v10, %v9 + %v12 = add nsw i32 %v11, 1 + %v13 = mul nsw i32 %v12, %v7 + %v14 = add nsw i32 %v13, %v7 + %v15 = mul i32 %v10, %v14 + %v16 = and i32 %v15, 1 + %v17 = add nsw i32 %v16, -1 + %v18 = mul i32 %v10, %v7 + %v19 = mul i32 %v18, %v11 + %v20 = mul i32 %v19, %v17 + %v21 = and i32 %v20, 1 + %v22 = add nsw i32 %v21, -1 + %v23 = mul nsw i32 %v22, %v3 + %v24 = sub nsw i32 %v7, %v23 + %v25 = mul i32 %v10, %v24 + %v26 = sub i32 0, %v7 + %v27 = icmp eq i32 %v25, %v26 + br i1 %v27, label %b30, label %b28 + +b28: ; preds = %b6 + %v29 = add nsw i32 %v3, %v7 + store i32 %v29, i32* %a0, align 4 + br label %b6 + +b30: ; preds = %b6 + ret void +} + +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" } diff --git a/test/CodeGen/Hexagon/SUnit-boundary-prob.ll b/test/CodeGen/Hexagon/SUnit-boundary-prob.ll index 9df178f9907cd..badab1686fcf1 100644 --- a/test/CodeGen/Hexagon/SUnit-boundary-prob.ll +++ b/test/CodeGen/Hexagon/SUnit-boundary-prob.ll @@ -187,7 +187,7 @@ entry: } attributes #0 = { nounwind readnone } -attributes #1 = { "target-cpu"="hexagonv60" "target-features"="+hvx" } +attributes #1 = { "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } attributes #2 = { nounwind } !llvm.module.flags = !{!0} diff --git a/test/CodeGen/Hexagon/addaddi.ll b/test/CodeGen/Hexagon/addaddi.ll new file mode 100644 index 0000000000000..6510858f1bd85 --- /dev/null +++ b/test/CodeGen/Hexagon/addaddi.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; Check for S4_addaddi: +; CHECK: r{{[0-9]+}} = add(r{{[0-9]+}},add(r{{[0-9]+}},#2)) + +define i32 @fred(i32 %a0, i32 %a1, i32* nocapture %a2) #0 { +b3: + %v4 = add nsw i32 %a0, 2 + %v5 = add nsw i32 %v4, %a1 + store i32 %v5, i32* %a2, align 4 + ret i32 undef +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/Hexagon/addrmode-indoff.ll b/test/CodeGen/Hexagon/addrmode-indoff.ll index 6ea2b3d95daf7..274add33898b4 100644 --- a/test/CodeGen/Hexagon/addrmode-indoff.ll +++ b/test/CodeGen/Hexagon/addrmode-indoff.ll @@ -3,72 +3,90 @@ ; Bug 6840. Use absolute+index addressing. @ga = common global [1024 x i8] zeroinitializer, align 8 -@gb = common global [1024 x i8] zeroinitializer, align 8 -; CHECK: memub(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##ga) -define zeroext i8 @lf2(i32 %i) nounwind readonly { +; CHECK-LABEL: test0 +; CHECK: memub(r{{[0-9]+}}+##ga) +define zeroext i8 @test0(i32 %i) nounwind readonly { entry: - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i - %0 = load i8, i8* %arrayidx, align 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i + %0 = load i8, i8* %t, align 1 ret i8 %0 } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##gb) -define signext i8 @lf2s(i32 %i) nounwind readonly { +; CHECK-LABEL: test1 +; CHECK: memb(r{{[0-9]+}}+##ga) +define signext i8 @test1(i32 %i) nounwind readonly { entry: - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %i - %0 = load i8, i8* %arrayidx, align 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i + %0 = load i8, i8* %t, align 1 ret i8 %0 } -; CHECK: memub(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##ga) -define zeroext i8 @lf3(i32 %i) nounwind readonly { +; CHECK-LABEL: test2 +; CHECK: memub(r{{[0-9]+}}<<#1+##ga) +define zeroext i8 @test2(i32 %i) nounwind readonly { entry: - %mul = shl nsw i32 %i, 2 - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %mul - %0 = load i8, i8* %arrayidx, align 1 + %j = shl nsw i32 %i, 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + %0 = load i8, i8* %t, align 1 ret i8 %0 } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##gb) -define signext i8 @lf3s(i32 %i) nounwind readonly { +; CHECK-LABEL: test3 +; CHECK: memb(r{{[0-9]+}}<<#1+##ga) +define signext i8 @test3(i32 %i) nounwind readonly { entry: - %mul = shl nsw i32 %i, 2 - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %mul - %0 = load i8, i8* %arrayidx, align 1 + %j = shl nsw i32 %i, 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + %0 = load i8, i8* %t, align 1 ret i8 %0 } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##ga) -define void @sf4(i32 %i, i8 zeroext %j) nounwind { +; CHECK-LABEL: test4 +; CHECK: memub(r{{[0-9]+}}<<#2+##ga) +define zeroext i8 @test4(i32 %i) nounwind readonly { entry: - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i - store i8 %j, i8* %arrayidx, align 1 - ret void + %j = shl nsw i32 %i, 2 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + %0 = load i8, i8* %t, align 1 + ret i8 %0 +} + +; CHECK-LABEL: test5 +; CHECK: memb(r{{[0-9]+}}<<#2+##ga) +define signext i8 @test5(i32 %i) nounwind readonly { +entry: + %j = shl nsw i32 %i, 2 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + %0 = load i8, i8* %t, align 1 + ret i8 %0 } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##gb) -define void @sf4s(i32 %i, i8 signext %j) nounwind { +; CHECK-LABEL: test10 +; CHECK: memb(r{{[0-9]+}}+##ga) +define void @test10(i32 %i, i8 zeroext %v) nounwind { entry: - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %i - store i8 %j, i8* %arrayidx, align 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i + store i8 %v, i8* %t, align 1 ret void } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##ga) -define void @sf5(i32 %i, i8 zeroext %j) nounwind { +; CHECK-LABEL: test11 +; CHECK: memb(r{{[0-9]+}}<<#1+##ga) +define void @test11(i32 %i, i8 signext %v) nounwind { entry: - %mul = shl nsw i32 %i, 2 - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %mul - store i8 %j, i8* %arrayidx, align 1 + %j = shl nsw i32 %i, 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + store i8 %v, i8* %t, align 1 ret void } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##gb) -define void @sf5s(i32 %i, i8 signext %j) nounwind { +; CHECK-LABEL: test12 +; CHECK: memb(r{{[0-9]+}}<<#2+##ga) +define void @test12(i32 %i, i8 zeroext %v) nounwind { entry: - %mul = shl nsw i32 %i, 2 - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %mul - store i8 %j, i8* %arrayidx, align 1 + %j = shl nsw i32 %i, 2 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + store i8 %v, i8* %t, align 1 ret void } diff --git a/test/CodeGen/Hexagon/addrmode-rr-to-io.mir b/test/CodeGen/Hexagon/addrmode-rr-to-io.mir new file mode 100644 index 0000000000000..75eb0d3844035 --- /dev/null +++ b/test/CodeGen/Hexagon/addrmode-rr-to-io.mir @@ -0,0 +1,22 @@ +# RUN: llc -march=hexagon -run-pass amode-opt %s -o - | FileCheck %s + +# This testcase used to crash. +# CHECK: S2_storerb_io killed %r0, @var_i8, killed %r2 + +--- | + define void @fred() { ret void } + @var_i8 = global [10 x i8] zeroinitializer, align 8 +... + +--- +name: fred +tracksRegLiveness: true +body: | + bb.0: + liveins: %r0 + %r1 = A2_tfrsi @var_i8 + %r2 = A2_tfrsi 255 + S4_storerb_rr killed %r0, killed %r1, 0, killed %r2 + PS_jmpret %r31, implicit-def %pc +... + diff --git a/test/CodeGen/Hexagon/bit-bitsplit-at.ll b/test/CodeGen/Hexagon/bit-bitsplit-at.ll index 87d535fd0f22a..30d18b7724e18 100644 --- a/test/CodeGen/Hexagon/bit-bitsplit-at.ll +++ b/test/CodeGen/Hexagon/bit-bitsplit-at.ll @@ -30,4 +30,4 @@ b9: ; preds = %b6, %b4 ret i32 %v10 } -attributes #0 = { nounwind optsize "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" } +attributes #0 = { nounwind optsize "target-cpu"="hexagonv60" "target-features"="-hvxv60,-long-calls" } diff --git a/test/CodeGen/Hexagon/bit-bitsplit-src.ll b/test/CodeGen/Hexagon/bit-bitsplit-src.ll index 2d1c71c709f47..edac4cb34b6e8 100644 --- a/test/CodeGen/Hexagon/bit-bitsplit-src.ll +++ b/test/CodeGen/Hexagon/bit-bitsplit-src.ll @@ -32,4 +32,4 @@ b0: ; Function Attrs: nounwind declare void @printf(i8* nocapture readonly, ...) local_unnamed_addr #0 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/bit-bitsplit.ll b/test/CodeGen/Hexagon/bit-bitsplit.ll index 4ae2e4e665083..52ae69af994b6 100644 --- a/test/CodeGen/Hexagon/bit-bitsplit.ll +++ b/test/CodeGen/Hexagon/bit-bitsplit.ll @@ -14,4 +14,4 @@ entry: ret i32 %and2 } -attributes #0 = { norecurse nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double" } +attributes #0 = { norecurse nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx" } diff --git a/test/CodeGen/Hexagon/bit-ext-sat.ll b/test/CodeGen/Hexagon/bit-ext-sat.ll index 47c49c2364b7e..713e3988457e1 100644 --- a/test/CodeGen/Hexagon/bit-ext-sat.ll +++ b/test/CodeGen/Hexagon/bit-ext-sat.ll @@ -53,5 +53,5 @@ declare i32 @llvm.hexagon.A2.sath(i32) #1 declare i32 @llvm.hexagon.A2.satub(i32) #1 declare i32 @llvm.hexagon.A2.satuh(i32) #1 -attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/bit-extract-off.ll b/test/CodeGen/Hexagon/bit-extract-off.ll index 183435ab7b23a..4086ca34bbbcf 100644 --- a/test/CodeGen/Hexagon/bit-extract-off.ll +++ b/test/CodeGen/Hexagon/bit-extract-off.ll @@ -19,5 +19,5 @@ b5: ; preds = %b5, %b4 declare double @fabs(double) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } -attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } +attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/bit-extract.ll b/test/CodeGen/Hexagon/bit-extract.ll index ad7d05d2c235b..33fa50c14f39b 100644 --- a/test/CodeGen/Hexagon/bit-extract.ll +++ b/test/CodeGen/Hexagon/bit-extract.ll @@ -72,4 +72,4 @@ entry: ret i32 %bf.ashr } -attributes #0 = { noinline norecurse nounwind readnone "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { noinline norecurse nounwind readnone "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/bit-has.ll b/test/CodeGen/Hexagon/bit-has.ll index 9022de3918682..5bb0f2f60b0a9 100644 --- a/test/CodeGen/Hexagon/bit-has.ll +++ b/test/CodeGen/Hexagon/bit-has.ll @@ -60,5 +60,5 @@ b23: ; preds = %b21 declare i32 @llvm.hexagon.A2.sath(i32) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv5" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind "target-cpu"="hexagonv5" "target-features"="-hvx,-long-calls" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/bit-loop-rc-mismatch.ll b/test/CodeGen/Hexagon/bit-loop-rc-mismatch.ll index db57998aeb666..e7dd87c1da14d 100644 --- a/test/CodeGen/Hexagon/bit-loop-rc-mismatch.ll +++ b/test/CodeGen/Hexagon/bit-loop-rc-mismatch.ll @@ -24,7 +24,7 @@ for.end: ; preds = %for.body, %entry declare hidden i64 @danny(i32*, i32* nocapture readonly dereferenceable(4)) #1 align 2 declare hidden i32 @sammy(i32* nocapture, i32) #0 align 2 -attributes #0 = { nounwind optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind optsize readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind optsize readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { optsize } diff --git a/test/CodeGen/Hexagon/bit-rie.ll b/test/CodeGen/Hexagon/bit-rie.ll index 302382a1ade47..a090a668d9f3a 100644 --- a/test/CodeGen/Hexagon/bit-rie.ll +++ b/test/CodeGen/Hexagon/bit-rie.ll @@ -190,7 +190,7 @@ declare i64 @llvm.hexagon.M2.mpyd.ll.s1(i32, i32) #2 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind } attributes #2 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/bitconvert-vector.ll b/test/CodeGen/Hexagon/bitconvert-vector.ll index c090721b8fffb..a89a15c22d221 100644 --- a/test/CodeGen/Hexagon/bitconvert-vector.ll +++ b/test/CodeGen/Hexagon/bitconvert-vector.ll @@ -24,4 +24,4 @@ entry: attributes #0 = { nounwind readnone } -attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } diff --git a/test/CodeGen/Hexagon/block-addr.ll b/test/CodeGen/Hexagon/block-addr.ll index 5af3a69f8aab1..bd59e59033110 100644 --- a/test/CodeGen/Hexagon/block-addr.ll +++ b/test/CodeGen/Hexagon/block-addr.ll @@ -1,7 +1,6 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s -; CHECK: .LJTI -; CHECK-DAG: r[[REG:[0-9]+]] = memw(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+<<#[0-9]+}}) +; CHECK-DAG: r[[REG:[0-9]+]] = memw(r{{[0-9]+<<#[0-9]+}}+##.LJTI{{.*}}) ; CHECK-DAG: jumpr r[[REG]] define void @main() #0 { diff --git a/test/CodeGen/Hexagon/build-vector-shuffle.ll b/test/CodeGen/Hexagon/build-vector-shuffle.ll index 1d06953ddf32d..7efc38f15b333 100644 --- a/test/CodeGen/Hexagon/build-vector-shuffle.ll +++ b/test/CodeGen/Hexagon/build-vector-shuffle.ll @@ -17,5 +17,5 @@ entry: ; Function Attrs: nounwind readnone declare <16 x i32> @llvm.hexagon.V6.vshuffh(<16 x i32>) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/builtin-expect.ll b/test/CodeGen/Hexagon/builtin-expect.ll index 9945da1782b2f..9fed28760ade1 100644 --- a/test/CodeGen/Hexagon/builtin-expect.ll +++ b/test/CodeGen/Hexagon/builtin-expect.ll @@ -39,6 +39,6 @@ b14: ; preds = %b13, %b10 declare i32 @bar(i32) local_unnamed_addr #0 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b,-long-calls" } !0 = !{!"branch_weights", i32 1, i32 2000} diff --git a/test/CodeGen/Hexagon/call-ret-i1.ll b/test/CodeGen/Hexagon/call-ret-i1.ll new file mode 100644 index 0000000000000..3838e8a6e88fe --- /dev/null +++ b/test/CodeGen/Hexagon/call-ret-i1.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=hexagon < %s +; REQUIRES: asserts + +; Test that the compiler does not assert because the DAG is not correct. +; CHECK: call foo + +%returntype = type { i1, i32 } + +define i32 @test(i32* %a0, i32* %a1, i32* %a2) #0 { +b3: + br i1 undef, label %b6, label %b4 + +b4: ; preds = %b3 + %v5 = call %returntype @foo(i32* nonnull undef, i32* %a2, i32* %a0) #0 + ret i32 1 + +b6: ; preds = %b3 + unreachable +} + +declare %returntype @foo(i32*, i32*, i32*) #0 + +attributes #0 = { nounwind } diff --git a/test/CodeGen/Hexagon/cext-opt-basic.mir b/test/CodeGen/Hexagon/cext-opt-basic.mir new file mode 100644 index 0000000000000..63530c88c1e2d --- /dev/null +++ b/test/CodeGen/Hexagon/cext-opt-basic.mir @@ -0,0 +1,74 @@ +# RUN: llc -march=hexagon -run-pass hexagon-cext-opt -hexagon-cext-threshold=3 %s -o - | FileCheck %s + +--- | + define void @test0() { ret void } + define void @test1() { ret void } + define void @test2() { ret void } + @global_address = global [1024 x i32] zeroinitializer, align 8 +... + +# CHECK-LABEL: name: test0 +# CHECK: [[B:%[0-9]+]]:intregs = A2_tfrsi @global_address +# CHECK: L2_loadri_io [[B]], 0 +# CHECK: L2_loadri_io [[B]], 4 +# CHECK: L2_loadri_io [[B]], 8 +--- +name: test0 +registers: + - { id: 0, class: intregs } + - { id: 1, class: intregs } + - { id: 2, class: intregs } +body: | + bb.0: + %0 = PS_loadriabs @global_address + %1 = PS_loadriabs @global_address+4 + %2 = PS_loadriabs @global_address+8 +... + +# CHECK-LABEL: name: test1 +# CHECK: [[C:%[0-9]+]]:intregs = COPY %r0 +# CHECK: [[B:%[0-9]+]]:intregs = A2_addi [[C]], @global_address +# CHECK: L2_loadri_io [[B]], 0 +# CHECK: L2_loadri_io [[B]], 4 +# CHECK: L2_loadri_io [[B]], 8 +--- +name: test1 +registers: + - { id: 0, class: intregs } + - { id: 1, class: intregs } + - { id: 2, class: intregs } + - { id: 3, class: intregs } +body: | + bb.0: + liveins: %r0 + %0 = COPY %r0 + %1 = L4_loadri_ur %0, 0, @global_address + %2 = L4_loadri_ur %0, 0, @global_address+4 + %3 = L4_loadri_ur %0, 0, @global_address+8 +... + +# CHECK-LABEL: name: test2 +# CHECK: [[C:%[0-9]+]]:intregs = COPY %r0 +# CHECK: [[B:%[0-9]+]]:intregs = A2_tfrsi @global_address + 4 +# CHECK: [[T0:%[0-9]+]]:intregs = A2_addi [[B]], -4 +# CHECK: %r0 = COPY [[T0]] +# CHECK: [[T1:%[0-9]+]]:intregs = A2_addi [[B]], -2 +# CHECK: %r1 = COPY [[T1]] +# CHECK: L4_loadri_rr [[B]], [[C]], 0 +--- +name: test2 +registers: + - { id: 0, class: intregs } + - { id: 1, class: intregs } + - { id: 2, class: intregs } + - { id: 3, class: intregs } +body: | + bb.0: + liveins: %r0 + %0 = COPY %r0 + %1 = A2_tfrsi @global_address + %r0 = COPY %1 + %2 = A2_tfrsi @global_address+2 + %r1 = COPY %2 + %3 = L4_loadri_ur %0, 0, @global_address+4 +... diff --git a/test/CodeGen/Hexagon/cext-opt-range-offset.mir b/test/CodeGen/Hexagon/cext-opt-range-offset.mir new file mode 100644 index 0000000000000..6fb53489a4597 --- /dev/null +++ b/test/CodeGen/Hexagon/cext-opt-range-offset.mir @@ -0,0 +1,43 @@ +# RUN: llc -march=hexagon -run-pass hexagon-cext-opt %s -o - | FileCheck %s + +# Check that this testcase does not crash. +# CHECK: L4_and_memopw_io + +--- +name: fred +tracksRegLiveness: true +registers: + - { id: 0, class: intregs } + - { id: 1, class: intregs } + - { id: 2, class: intregs } + - { id: 3, class: intregs } + - { id: 4, class: predregs } + - { id: 5, class: intregs } + - { id: 6, class: intregs } +body: | + bb.0: + successors: %bb.1 + %0 = A2_tfrsi -360184608 + %1 = L2_loadri_io %0, -1024 + + bb.1: + successors: %bb.2 + %2 = A2_tfrsi -234944641 + %3 = A2_tfrsi -360185632 + L4_and_memopw_io %3, 0, %2 + + bb.2: + successors: %bb.3, %bb.4 + %4 = IMPLICIT_DEF + J2_jumpt %4, %bb.4, implicit-def %pc + J2_jump %bb.3, implicit-def %pc + + bb.3: + successors: %bb.4 + + bb.4: + successors: %bb.4 + %5 = A2_tfrsi -234944521 + %6 = A2_tfrsi -360185632 + L4_and_memopw_io %6, 0, %5 +... diff --git a/test/CodeGen/Hexagon/cfgopt-fall-through.ll b/test/CodeGen/Hexagon/cfgopt-fall-through.ll index be234aafc0bb1..2d65a5c5848fd 100644 --- a/test/CodeGen/Hexagon/cfgopt-fall-through.ll +++ b/test/CodeGen/Hexagon/cfgopt-fall-through.ll @@ -68,4 +68,4 @@ b19: ; preds = %b4 unreachable } -attributes #0 = { nounwind "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind "target-cpu"="hexagonv55" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/cfi-offset.ll b/test/CodeGen/Hexagon/cfi-offset.ll index 100034a0c6c4a..c7d447d168c8b 100644 --- a/test/CodeGen/Hexagon/cfi-offset.ll +++ b/test/CodeGen/Hexagon/cfi-offset.ll @@ -39,5 +39,5 @@ declare i8* @__cxa_begin_catch(i8*) declare void @__cxa_end_catch() -attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } diff --git a/test/CodeGen/Hexagon/cmpb-dec-imm.ll b/test/CodeGen/Hexagon/cmpb-dec-imm.ll new file mode 100644 index 0000000000000..d3b48e6b294e3 --- /dev/null +++ b/test/CodeGen/Hexagon/cmpb-dec-imm.ll @@ -0,0 +1,30 @@ +; RUN: llc -march=hexagon -debug-only=isel < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; Check that we generate 'cmpb.gtu' instruction for a byte comparision +; The "Optimized Lowered Selection" converts the "ugt with #40" to +; "ult with #41". The immediate value should be decremented to #40 +; with the selected cmpb.gtu pattern +; CHECK: setcc{{.*}}41{{.*}}setult +; CHECK: A4_cmpbgtui{{.*}}40 + +@glob = common global i8 0, align 1 + +define i32 @cmpgtudec(i32 %a0, i32 %a1) #0 { +b2: + %v3 = xor i32 %a1, %a0 + %v4 = and i32 %v3, 255 + %v5 = icmp ugt i32 %v4, 40 + br i1 %v5, label %b6, label %b8 + +b6: ; preds = %b2 + %v7 = trunc i32 %a0 to i8 + store i8 %v7, i8* @glob, align 1 + br label %b8 + +b8: ; preds = %b6, %b2 + %v9 = phi i32 [ 1, %b6 ], [ 0, %b2 ] + ret i32 %v9 +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/Hexagon/cmph-gtu.ll b/test/CodeGen/Hexagon/cmph-gtu.ll new file mode 100644 index 0000000000000..f5feb7bc6fb15 --- /dev/null +++ b/test/CodeGen/Hexagon/cmph-gtu.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that we generate 'cmph.gtu' instruction. +; CHECK-LABEL: @cmphgtu +; CHECK: cmph.gtu + +@glob = common global i8 0, align 1 + +define i32 @cmphgtu(i32 %a0, i32 %a1) #0 { +b2: + %v3 = xor i32 %a1, %a0 + %v4 = and i32 %v3, 65535 + %v5 = icmp ugt i32 %v4, 40 + br i1 %v5, label %b6, label %b8 + +b6: ; preds = %b2 + %v7 = trunc i32 %a0 to i8 + store i8 %v7, i8* @glob, align 1 + br label %b8 + +b8: ; preds = %b6, %b2 + %v9 = phi i32 [ 1, %b6 ], [ 0, %b2 ] + ret i32 %v9 +} + +; With zxtb, we must not generate a cmph.gtu instruction. +; CHECK-LABEL: @nocmphgtu +; CHECK-NOT: cmph.gtu +define i32 @nocmphgtu(i32 %a0, i32 %a1) #0 { +b2: + %v3 = xor i32 %a1, %a0 + %v4 = and i32 %v3, 255 + %v5 = icmp ugt i32 %v4, 40 + br i1 %v5, label %b6, label %b8 + +b6: ; preds = %b2 + %v7 = trunc i32 %a0 to i8 + store i8 %v7, i8* @glob, align 1 + br label %b8 + +b8: ; preds = %b6, %b2 + %v9 = phi i32 [ 1, %b6 ], [ 0, %b2 ] + ret i32 %v9 +} + +attributes #0 = { nounwind } diff --git a/test/CodeGen/Hexagon/common-gep-inbounds.ll b/test/CodeGen/Hexagon/common-gep-inbounds.ll index a8b75725a0b89..ddc73c284bc8d 100644 --- a/test/CodeGen/Hexagon/common-gep-inbounds.ll +++ b/test/CodeGen/Hexagon/common-gep-inbounds.ll @@ -17,4 +17,4 @@ entry: ret i16 %a } -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/const-pool-tf.ll b/test/CodeGen/Hexagon/const-pool-tf.ll index 9a4569b1e4de2..e67892537ef6e 100644 --- a/test/CodeGen/Hexagon/const-pool-tf.ll +++ b/test/CodeGen/Hexagon/const-pool-tf.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv60 -relocation-model pic < %s | FileCheck %s +; RUN: opt -relocation-model pic -march=hexagon -mcpu=hexagonv60 -O2 -S < %s | llc -march=hexagon -mcpu=hexagonv60 -relocation-model pic -; CHECK: @PCREL +; CHECK: jumpr target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" target triple = "hexagon-unknown--elf" diff --git a/test/CodeGen/Hexagon/convert-to-dot-old.ll b/test/CodeGen/Hexagon/convert-to-dot-old.ll index b793fa0c22cd5..c4e67f3db6108 100644 --- a/test/CodeGen/Hexagon/convert-to-dot-old.ll +++ b/test/CodeGen/Hexagon/convert-to-dot-old.ll @@ -103,8 +103,8 @@ declare i32 @llvm.hexagon.S2.asr.r.r.sat(i32, i32) #2 declare i32 @llvm.hexagon.A2.aslh(i32) #2 declare void @foo(i16*, i32*, i16*, i16 signext, i16 signext, i16 signext) local_unnamed_addr #3 -attributes #0 = { nounwind optsize "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind optsize "target-cpu"="hexagonv55" "target-features"="-hvx,-long-calls" } attributes #1 = { argmemonly nounwind } attributes #2 = { nounwind readnone } -attributes #3 = { optsize "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #3 = { optsize "target-cpu"="hexagonv55" "target-features"="-hvx,-long-calls" } attributes #4 = { nounwind optsize } diff --git a/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll b/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll index 35c12f1d88b7f..62beeee19ff19 100644 --- a/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll +++ b/test/CodeGen/Hexagon/convert_const_i1_to_i8.ll @@ -14,4 +14,4 @@ entry: declare <32 x i32> @llvm.hexagon.V6.vrdelta.128B(<32 x i32>, <32 x i32>) declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } diff --git a/test/CodeGen/Hexagon/dead-store-stack.ll b/test/CodeGen/Hexagon/dead-store-stack.ll index 0d8124e76b903..532c2b2ee8c99 100644 --- a/test/CodeGen/Hexagon/dead-store-stack.ll +++ b/test/CodeGen/Hexagon/dead-store-stack.ll @@ -1,4 +1,4 @@ -; RUN: llc -O2 -march=hexagon < %s | FileCheck %s +; RUN: llc -O2 -march=hexagon -mcpu=hexagonv62< %s | FileCheck %s ; CHECK: ParseFunc: ; CHECK: r[[ARG0:[0-9]+]] = memuh(r[[ARG1:[0-9]+]]+#[[OFFSET:[0-9]+]]) ; CHECK: memw(r[[ARG1]]+#[[OFFSET]]) = r[[ARG0]] @@ -126,6 +126,7 @@ sw.epilog: ; Function Attrs: nounwind declare void @snprintf(i8* nocapture, i32, i8* nocapture readonly, ...) local_unnamed_addr #1 -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+hvx" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+hvx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv62" "target-features"="+hvx,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv62" "target-features"="+hvx,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { nounwind } + diff --git a/test/CodeGen/Hexagon/early-if-debug.mir b/test/CodeGen/Hexagon/early-if-debug.mir index 39b5036f81065..7c8fb0aee10de 100644 --- a/test/CodeGen/Hexagon/early-if-debug.mir +++ b/test/CodeGen/Hexagon/early-if-debug.mir @@ -3,16 +3,16 @@ # if-converted. # CHECK-LABEL: bb.0: -# CHECK: %0 = COPY %r0 -# CHECK: %1 = C2_cmpeqi %0, 0 -# CHECK: %2 = A2_tfrsi 123 +# CHECK: %0:intregs = COPY %r0 +# CHECK: %1:predregs = C2_cmpeqi %0, 0 +# CHECK: %2:intregs = A2_tfrsi 123 # CHECK: DBG_VALUE debug-use %0, debug-use _ # CHECK: DBG_VALUE debug-use %0, debug-use _ # CHECK: DBG_VALUE debug-use %0, debug-use _ # CHECK: DBG_VALUE debug-use %0, debug-use _ # CHECK: DBG_VALUE debug-use %0, debug-use _ -# CHECK: %3 = A2_tfrsi 321 -# CHECK: %5 = C2_mux %1, %2, %3 +# CHECK: %3:intregs = A2_tfrsi 321 +# CHECK: %5:intregs = C2_mux %1, %2, %3 --- | define void @foo() { diff --git a/test/CodeGen/Hexagon/early-if-merge-loop.ll b/test/CodeGen/Hexagon/early-if-merge-loop.ll index f45058f029dd0..ab8b00d6c909b 100644 --- a/test/CodeGen/Hexagon/early-if-merge-loop.ll +++ b/test/CodeGen/Hexagon/early-if-merge-loop.ll @@ -82,7 +82,7 @@ declare i64 @llvm.hexagon.A2.addp(i64, i64) #1 declare i64 @llvm.hexagon.A2.subp(i64, i64) #1 declare i64 @llvm.hexagon.A2.combinew(i32, i32) #1 -attributes #0 = { nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } attributes #1 = { nounwind readnone } !0 = !{!1, !1, i64 0} diff --git a/test/CodeGen/Hexagon/early-if-vecpi.ll b/test/CodeGen/Hexagon/early-if-vecpi.ll index 6f3ec2d5a51da..6fd2aa134807c 100644 --- a/test/CodeGen/Hexagon/early-if-vecpi.ll +++ b/test/CodeGen/Hexagon/early-if-vecpi.ll @@ -66,4 +66,4 @@ for.end: ; preds = %if.end ret void } -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } diff --git a/test/CodeGen/Hexagon/early-if-vecpred.ll b/test/CodeGen/Hexagon/early-if-vecpred.ll index ca119e1d1dec3..05074338cffb3 100644 --- a/test/CodeGen/Hexagon/early-if-vecpred.ll +++ b/test/CodeGen/Hexagon/early-if-vecpred.ll @@ -31,7 +31,7 @@ b5: ; preds = %b3, %b1 declare <1024 x i1> @llvm.hexagon.V6.pred.scalar2.128B(i32) #1 declare <1024 x i1> @llvm.hexagon.V6.pred.not.128B(<1024 x i1>) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } diff --git a/test/CodeGen/Hexagon/eliminate-pred-spill.ll b/test/CodeGen/Hexagon/eliminate-pred-spill.ll index b3a4a2f425249..4c93ab201e3b4 100644 --- a/test/CodeGen/Hexagon/eliminate-pred-spill.ll +++ b/test/CodeGen/Hexagon/eliminate-pred-spill.ll @@ -139,5 +139,5 @@ declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/expand-condsets-dead-bad.ll b/test/CodeGen/Hexagon/expand-condsets-dead-bad.ll index ce7f5e0ce12fe..350b0edec85d5 100644 --- a/test/CodeGen/Hexagon/expand-condsets-dead-bad.ll +++ b/test/CodeGen/Hexagon/expand-condsets-dead-bad.ll @@ -51,4 +51,4 @@ b23: ; preds = %b0 ret void } -attributes #0 = { nounwind "target-cpu"="hexagonv5" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind "target-cpu"="hexagonv5" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/expand-condsets-dead-pred.ll b/test/CodeGen/Hexagon/expand-condsets-dead-pred.ll index ecec83625e1c2..dbcba1aa7d02a 100644 --- a/test/CodeGen/Hexagon/expand-condsets-dead-pred.ll +++ b/test/CodeGen/Hexagon/expand-condsets-dead-pred.ll @@ -42,4 +42,4 @@ b20: ; preds = %b2 br label %b1 } -attributes #0 = { nounwind "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind "target-cpu"="hexagonv55" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/expand-condsets-def-undef.mir b/test/CodeGen/Hexagon/expand-condsets-def-undef.mir index 44da969bf29b2..702099a44531b 100644 --- a/test/CodeGen/Hexagon/expand-condsets-def-undef.mir +++ b/test/CodeGen/Hexagon/expand-condsets-def-undef.mir @@ -32,10 +32,9 @@ body: | %1 = COPY %r0 %2 = COPY %d0 ; Check that this instruction is unchanged (remains unpredicated) - ; CHECK: %3 = A2_addi %2.isub_hi, 1 + ; CHECK: %3:intregs = A2_addi %2.isub_hi, 1 %3 = A2_addi %2.isub_hi, 1 undef %2.isub_lo = C2_mux %0, %2.isub_lo, %1 %2.isub_hi = C2_muxir %0, %3, 0 ... - diff --git a/test/CodeGen/Hexagon/expand-condsets-imm.mir b/test/CodeGen/Hexagon/expand-condsets-imm.mir index 1b0988393b7cd..141db6453299e 100644 --- a/test/CodeGen/Hexagon/expand-condsets-imm.mir +++ b/test/CodeGen/Hexagon/expand-condsets-imm.mir @@ -1,6 +1,6 @@ # RUN: llc -march=hexagon -run-pass expand-condsets %s -o - | FileCheck %s # Check that we can expand a mux with a global as an immediate operand. -# CHECK: C2_cmoveif undef %0, @G +# CHECK: C2_cmoveif undef %0:predregs, @G --- | @G = global i32 0, align 4 @@ -19,4 +19,3 @@ body: | %1 = C2_muxir undef %0, %1, @G %r0 = COPY %1 ... - diff --git a/test/CodeGen/Hexagon/expand-condsets-impuse.mir b/test/CodeGen/Hexagon/expand-condsets-impuse.mir index 08b6798aa2fb9..725e414f52186 100644 --- a/test/CodeGen/Hexagon/expand-condsets-impuse.mir +++ b/test/CodeGen/Hexagon/expand-condsets-impuse.mir @@ -53,7 +53,7 @@ body: | %7 = L2_loadrb_io %99, 12 %8 = C2_cmpeqi %7, 9 %9 = A2_tfrsi -999 - ; CHECK: %10 = C2_cmoveit killed %8, -999, implicit %10 + ; CHECK: %10:intregs = C2_cmoveit killed %8, -999, implicit %10 %10 = C2_mux %8, %9, %1 J2_jumpr %10, implicit-def %pc diff --git a/test/CodeGen/Hexagon/expand-condsets-rm-reg.mir b/test/CodeGen/Hexagon/expand-condsets-rm-reg.mir index f3d105f75da27..e4c54c4b9888d 100644 --- a/test/CodeGen/Hexagon/expand-condsets-rm-reg.mir +++ b/test/CodeGen/Hexagon/expand-condsets-rm-reg.mir @@ -39,8 +39,8 @@ body: | %1 = COPY %r1 %2 = COPY %p0 ; Check that %3 was coalesced into %4. - ; CHECK: %4 = A2_abs %1 - ; CHECK: %4 = A2_tfrt killed %2, killed %0, implicit %4 + ; CHECK: %4:intregs = A2_abs %1 + ; CHECK: %4:intregs = A2_tfrt killed %2, killed %0, implicit %4 %3 = A2_abs %1 %4 = C2_mux %2, %0, %3 %r0 = COPY %4 diff --git a/test/CodeGen/Hexagon/expand-vselect-kill.ll b/test/CodeGen/Hexagon/expand-vselect-kill.ll index 1d07859665c07..a5769dbddd64a 100644 --- a/test/CodeGen/Hexagon/expand-vselect-kill.ll +++ b/test/CodeGen/Hexagon/expand-vselect-kill.ll @@ -48,6 +48,6 @@ declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #2 declare <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32>) #2 declare <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32>, <32 x i32>, i32) #2 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" } -attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } +attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } attributes #2 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/expand-vstorerw-undef.ll b/test/CodeGen/Hexagon/expand-vstorerw-undef.ll index 8524bf33de188..88eaec938fd36 100644 --- a/test/CodeGen/Hexagon/expand-vstorerw-undef.ll +++ b/test/CodeGen/Hexagon/expand-vstorerw-undef.ll @@ -91,5 +91,5 @@ b22: ; preds = %b22, %b18 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } -attributes #2 = { nounwind "reciprocal-estimates"="none" "target-cpu"="hexagonv60" "target-features"="+hvx-double" } +attributes #2 = { nounwind "reciprocal-estimates"="none" "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } attributes #3 = { nobuiltin nounwind } diff --git a/test/CodeGen/Hexagon/expand-vstorerw-undef2.ll b/test/CodeGen/Hexagon/expand-vstorerw-undef2.ll index 4f2bb86f0842b..641d53c87837b 100644 --- a/test/CodeGen/Hexagon/expand-vstorerw-undef2.ll +++ b/test/CodeGen/Hexagon/expand-vstorerw-undef2.ll @@ -210,7 +210,7 @@ b34: ; preds = %b34, %b24 br i1 %v146, label %b33, label %b34 } -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } attributes #3 = { nobuiltin nounwind } diff --git a/test/CodeGen/Hexagon/find-loop-instr.ll b/test/CodeGen/Hexagon/find-loop-instr.ll index 1234baf17f528..b9743ad33aad4 100644 --- a/test/CodeGen/Hexagon/find-loop-instr.ll +++ b/test/CodeGen/Hexagon/find-loop-instr.ll @@ -76,4 +76,4 @@ b21: ; preds = %b20, %b19, %b16, %b br i1 %v23, label %b13, label %b10 } -attributes #0 = { norecurse "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { norecurse "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/fminmax.ll b/test/CodeGen/Hexagon/fminmax.ll index 7c1a9fb42f233..cf1dc6cdf61be 100644 --- a/test/CodeGen/Hexagon/fminmax.ll +++ b/test/CodeGen/Hexagon/fminmax.ll @@ -22,6 +22,6 @@ entry: declare float @fminf(float, float) #0 declare float @fmaxf(float, float) #0 -attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/frame-offset-overflow.ll b/test/CodeGen/Hexagon/frame-offset-overflow.ll index 43d5fd5ad0f05..88d4e287fc038 100644 --- a/test/CodeGen/Hexagon/frame-offset-overflow.ll +++ b/test/CodeGen/Hexagon/frame-offset-overflow.ll @@ -156,7 +156,7 @@ declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32) #0 declare <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32>, <16 x i32>, i32) #0 attributes #0 = { nounwind readnone } -attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } !1 = !{!2, !2, i64 0} !2 = !{!"omnipotent char", !3, i64 0} diff --git a/test/CodeGen/Hexagon/hasfp-crash1.ll b/test/CodeGen/Hexagon/hasfp-crash1.ll index 1154a7117a70a..f96eafe15024f 100644 --- a/test/CodeGen/Hexagon/hasfp-crash1.ll +++ b/test/CodeGen/Hexagon/hasfp-crash1.ll @@ -18,7 +18,7 @@ entry: ; Function Attrs: nounwind readnone speculatable declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 -attributes #0 = { nounwind "disable-tail-calls"="true" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv5" "target-features"="-hvx-double,-long-calls" } +attributes #0 = { nounwind "disable-tail-calls"="true" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv5" "target-features"="-hvx,-long-calls" } attributes #1 = { nounwind readnone speculatable } !llvm.dbg.cu = !{!0} diff --git a/test/CodeGen/Hexagon/hasfp-crash2.ll b/test/CodeGen/Hexagon/hasfp-crash2.ll index c8b49948ce74e..c454a9fcd9b67 100644 --- a/test/CodeGen/Hexagon/hasfp-crash2.ll +++ b/test/CodeGen/Hexagon/hasfp-crash2.ll @@ -19,7 +19,7 @@ entry: ; Function Attrs: nounwind readnone speculatable declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1 -attributes #0 = { nounwind "disable-tail-calls"="true" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv5" "target-features"="-hvx-double,-long-calls" } +attributes #0 = { nounwind "disable-tail-calls"="true" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv5" "target-features"=",-hvx,-long-calls" } attributes #1 = { nounwind readnone speculatable } !llvm.dbg.cu = !{!0} diff --git a/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse.ll b/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse.ll index 1719003bb8027..ca1ba2fe1a267 100644 --- a/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse.ll +++ b/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse.ll @@ -73,7 +73,7 @@ declare <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32>, <32 x i32>) #1 ; Function Attrs: nounwind readnone declare <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32>, <32 x i32>, i32) #1 -attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } !llvm.ident = !{!0} diff --git a/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse_constant.ll b/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse_constant.ll new file mode 100644 index 0000000000000..8fb62b3fa5aee --- /dev/null +++ b/test/CodeGen/Hexagon/hexagon_vector_loop_carried_reuse_constant.ll @@ -0,0 +1,86 @@ +; RUN: opt < %s -hexagon-vlcr -adce -S | FileCheck %s + +; CHECK-NOT: %.hexagon.vlcr +; ModuleID = 'hexagon_vector_loop_carried_reuse.c' +source_filename = "hexagon_vector_loop_carried_reuse.c" +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +@W = external local_unnamed_addr global i32, align 4 + +; Function Attrs: nounwind +define void @foo(i8* noalias nocapture readonly %src, i8* noalias nocapture %dst, i32 %stride) local_unnamed_addr #0 { +entry: + %add.ptr = getelementptr inbounds i8, i8* %src, i32 %stride + %mul = mul nsw i32 %stride, 2 + %add.ptr1 = getelementptr inbounds i8, i8* %src, i32 %mul + %0 = load i32, i32* @W, align 4, !tbaa !1 + %cmp55 = icmp sgt i32 %0, 0 + br i1 %cmp55, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + %1 = bitcast i8* %add.ptr1 to <32 x i32>* + %2 = load <32 x i32>, <32 x i32>* %1, align 128, !tbaa !5 + %incdec.ptr4 = getelementptr inbounds i8, i8* %add.ptr1, i32 128 + %3 = bitcast i8* %incdec.ptr4 to <32 x i32>* + %4 = bitcast i8* %add.ptr to <32 x i32>* + %5 = load <32 x i32>, <32 x i32>* %4, align 128, !tbaa !5 + %incdec.ptr2 = getelementptr inbounds i8, i8* %add.ptr, i32 128 + %6 = bitcast i8* %incdec.ptr2 to <32 x i32>* + %7 = bitcast i8* %src to <32 x i32>* + %8 = load <32 x i32>, <32 x i32>* %7, align 128, !tbaa !5 + %incdec.ptr = getelementptr inbounds i8, i8* %src, i32 128 + %9 = bitcast i8* %incdec.ptr to <32 x i32>* + %10 = bitcast i8* %dst to <32 x i32>* + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %out.063 = phi <32 x i32>* [ %10, %for.body.lr.ph ], [ %incdec.ptr18, %for.body ] + %p2.062 = phi <32 x i32>* [ %3, %for.body.lr.ph ], [ %incdec.ptr10, %for.body ] + %p1.061 = phi <32 x i32>* [ %6, %for.body.lr.ph ], [ %incdec.ptr8, %for.body ] + %p0.060 = phi <32 x i32>* [ %9, %for.body.lr.ph ], [ %incdec.ptr6, %for.body ] + %i.059 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %a.sroa.0.058 = phi <32 x i32> [ %8, %for.body.lr.ph ], [ %11, %for.body ] + %b.sroa.0.057 = phi <32 x i32> [ %5, %for.body.lr.ph ], [ %12, %for.body ] + %c.sroa.0.056 = phi <32 x i32> [ %2, %for.body.lr.ph ], [ %13, %for.body ] + %incdec.ptr6 = getelementptr inbounds <32 x i32>, <32 x i32>* %p0.060, i32 1 + %11 = load <32 x i32>, <32 x i32>* %p0.060, align 128, !tbaa !5 + %incdec.ptr8 = getelementptr inbounds <32 x i32>, <32 x i32>* %p1.061, i32 1 + %12 = load <32 x i32>, <32 x i32>* %p1.061, align 128, !tbaa !5 + %incdec.ptr10 = getelementptr inbounds <32 x i32>, <32 x i32>* %p2.062, i32 1 + %13 = load <32 x i32>, <32 x i32>* %p2.062, align 128, !tbaa !5 + %14 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> %a.sroa.0.058, <32 x i32> %b.sroa.0.057, i32 4) + %15 = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32> %14, <32 x i32> %c.sroa.0.056) + %16 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> %11, <32 x i32> %12, i32 5) + %17 = tail call <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32> %16, <32 x i32> %13) + %18 = tail call <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32> %17, <32 x i32> %15, i32 1) + %incdec.ptr18 = getelementptr inbounds <32 x i32>, <32 x i32>* %out.063, i32 1 + store <32 x i32> %18, <32 x i32>* %out.063, align 128, !tbaa !5 + %add = add nuw nsw i32 %i.059, 128 + %cmp = icmp slt i32 %add, %0 + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +; Function Attrs: nounwind readnone +declare <32 x i32> @llvm.hexagon.V6.vmaxub.128B(<32 x i32>, <32 x i32>) #1 + +; Function Attrs: nounwind readnone +declare <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32>, <32 x i32>, i32) #1 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.ident = !{!0} + +!0 = !{!"QuIC LLVM Hexagon Clang version hexagon-clang-82-2622 (based on LLVM 5.0.0)"} +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!3, !3, i64 0} diff --git a/test/CodeGen/Hexagon/hvx-nontemporal.ll b/test/CodeGen/Hexagon/hvx-nontemporal.ll index 98c5ef4809b08..38e597df1ba8e 100644 --- a/test/CodeGen/Hexagon/hvx-nontemporal.ll +++ b/test/CodeGen/Hexagon/hvx-nontemporal.ll @@ -20,7 +20,7 @@ entry: ret void } -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } !1 = !{!2, !2, i64 0} !2 = !{!"omnipotent char", !3, i64 0} diff --git a/test/CodeGen/Hexagon/hwloop-loop1.ll b/test/CodeGen/Hexagon/hwloop-loop1.ll index 427efdc2c1110..af908b602297a 100644 --- a/test/CodeGen/Hexagon/hwloop-loop1.ll +++ b/test/CodeGen/Hexagon/hwloop-loop1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner=0 < %s | FileCheck %s ; ; Generate loop1 instruction for double loop sequence. diff --git a/test/CodeGen/Hexagon/hwloop-noreturn-call.ll b/test/CodeGen/Hexagon/hwloop-noreturn-call.ll index 1045e2ed80a79..accf6fd83c6e6 100644 --- a/test/CodeGen/Hexagon/hwloop-noreturn-call.ll +++ b/test/CodeGen/Hexagon/hwloop-noreturn-call.ll @@ -58,6 +58,6 @@ noret: declare void @trap() #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" } attributes #1 = { nounwind noreturn } diff --git a/test/CodeGen/Hexagon/hwloop-preh.ll b/test/CodeGen/Hexagon/hwloop-preh.ll index e92461f43da58..fb7e76848660b 100644 --- a/test/CodeGen/Hexagon/hwloop-preh.ll +++ b/test/CodeGen/Hexagon/hwloop-preh.ll @@ -41,4 +41,4 @@ return: ; preds = %return.loopexit, %f !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"} -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx" } diff --git a/test/CodeGen/Hexagon/hwloop-redef-imm.mir b/test/CodeGen/Hexagon/hwloop-redef-imm.mir new file mode 100644 index 0000000000000..014908e20a7fb --- /dev/null +++ b/test/CodeGen/Hexagon/hwloop-redef-imm.mir @@ -0,0 +1,63 @@ +# RUN: llc -march=hexagon -run-pass hwloops %s -o - | FileCheck %s + +# Normally, if the registers holding the induction variable's bounds +# are redefined inside of the loop's body, the loop cannot be converted +# to a hardware loop. However, if the redefining instruction is actually +# loading an immediate value into the register, this conversion is both +# possible and legal (since the immediate itself will be used in the +# loop setup in the preheader). + +# CHECK: [[R0:%[0-9]+]]:intregs = A2_tfrsi 1920 +# CHECK: J2_loop0r %bb.1.b1, [[R0]] +# +# CHECK: bb.1.b1 (address-taken): +# CHECK: ENDLOOP0 %bb.1.b1 + + +--- | + define void @fred() { + b0: + br label %b1 + b1: + br label %b2 + b2: + ret void + } +... + +--- +name: fred +tracksRegLiveness: true +registers: + - { id: 0, class: intregs } + - { id: 1, class: intregs } + - { id: 2, class: intregs } + - { id: 3, class: intregs } + - { id: 4, class: intregs } + - { id: 5, class: intregs } + - { id: 6, class: intregs } + - { id: 7, class: intregs } + - { id: 8, class: predregs } +body: | + bb.0.b0: + liveins: %r0 + successors: %bb.1 + %0 = A2_tfrsi 0 + %1 = A2_tfrsi 0 + %2 = COPY %r0 + + bb.1.b1: + successors: %bb.1, %bb.2 + %3 = PHI %0, %bb.0, %6, %bb.1 + %4 = PHI %1, %bb.0, %5, %bb.1 + S4_storerh_rr %2, %4, 0, %3 + %5 = A2_addi %4, 2 + %6 = A2_addi %3, 1 + ; This definition of %7 should not prevent conversion to hardware loop. + %7 = A2_tfrsi 3840 + %8 = C2_cmpeq %5, %7 + J2_jumpf %8, %bb.1, implicit-def %pc + J2_jump %bb.2, implicit-def %pc + + bb.2.b2: +... diff --git a/test/CodeGen/Hexagon/ifcvt-diamond-bug-2016-08-26.ll b/test/CodeGen/Hexagon/ifcvt-diamond-bug-2016-08-26.ll index 91b9aaa9cb4ea..19eb2d1fc6747 100644 --- a/test/CodeGen/Hexagon/ifcvt-diamond-bug-2016-08-26.ll +++ b/test/CodeGen/Hexagon/ifcvt-diamond-bug-2016-08-26.ll @@ -1,31 +1,34 @@ -; RUN: llc -march=hexagon -hexagon-eif=0 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-eif=0 -disable-machine-sink < %s | FileCheck %s target triple = "hexagon" %struct.0 = type { i16, i16 } @t = external local_unnamed_addr global %struct.0, align 2 -define void @foo(i32 %p) local_unnamed_addr #0 { +define void @foo(i32 %p, i16 %x, i16 %y, i16 %z) local_unnamed_addr #0 { entry: %conv90 = trunc i32 %p to i16 %call105 = call signext i16 @bar(i16 signext 16384, i16 signext undef) #0 %call175 = call signext i16 @bar(i16 signext %conv90, i16 signext 4) #0 %call197 = call signext i16 @bar(i16 signext %conv90, i16 signext 4) #0 + %x1 = add i16 %x, 1 + %z1 = add i16 %z, 1 %cmp199 = icmp eq i16 %call197, 0 br i1 %cmp199, label %if.then200, label %if.else201 -; CHECK-DAG: [[R4:r[0-9]+]] = #4 +; CHECK-DAG: [[R4:r[0-9]+]] = add ; CHECK: p0 = cmp.eq(r0,#0) -; CHECK: if (!p0.new) [[R3:r[0-9]+]] = #3 +; CHECK: if (!p0) [[R3:r[0-9]+]] = add(r{{[0-9]+}},#3) ; CHECK-DAG: if (!p0) memh(##t) = [[R3]] ; CHECK-DAG: if (p0) memh(##t) = [[R4]] if.then200: ; preds = %entry - store i16 4, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2 - store i16 0, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 1), align 2 + store i16 %x1, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2 + store i16 %z1, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 1), align 2 br label %if.end202 if.else201: ; preds = %entry - store i16 3, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2 + %y1 = add i16 %y, 3 + store i16 %y1, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2 br label %if.end202 if.end202: ; preds = %if.else201, %if.then200 @@ -34,4 +37,4 @@ if.end202: ; preds = %if.else201, %if.the declare signext i16 @bar(i16 signext, i16 signext) local_unnamed_addr #0 -attributes #0 = { optsize "target-cpu"="hexagonv55" } +attributes #0 = { "target-cpu"="hexagonv55" } diff --git a/test/CodeGen/Hexagon/inline-asm-bad-constraint.ll b/test/CodeGen/Hexagon/inline-asm-bad-constraint.ll new file mode 100644 index 0000000000000..2c4e3f4ae4a0a --- /dev/null +++ b/test/CodeGen/Hexagon/inline-asm-bad-constraint.ll @@ -0,0 +1,16 @@ +; RUN: not llc -march=hexagon < %s 2>&1 | FileCheck %s + +; CHECK: error: couldn't allocate output register for constraint 'r' + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define void @fred() #0 { +entry: + %a0 = alloca <16 x i32>, align 64 + %0 = call <16 x i32> asm sideeffect "$0 = vmem(r0)", "=r"() + store <16 x i32> %0, <16 x i32>* %a0, align 64 + ret void +} + +attributes #0 = { noinline nounwind } diff --git a/test/CodeGen/Hexagon/inline-asm-qv.ll b/test/CodeGen/Hexagon/inline-asm-qv.ll index 2563421703130..d540c09c1dde0 100644 --- a/test/CodeGen/Hexagon/inline-asm-qv.ll +++ b/test/CodeGen/Hexagon/inline-asm-qv.ll @@ -15,5 +15,5 @@ entry: ret void } -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/inline-asm-vecpred128.ll b/test/CodeGen/Hexagon/inline-asm-vecpred128.ll index 234f5a0b79260..7d2f50ed58a4b 100644 --- a/test/CodeGen/Hexagon/inline-asm-vecpred128.ll +++ b/test/CodeGen/Hexagon/inline-asm-vecpred128.ll @@ -12,4 +12,4 @@ define void @fred() #0 { ret void } -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } diff --git a/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll b/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll index 2a54bfef0ad7a..3b853ebb444bc 100644 --- a/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll +++ b/test/CodeGen/Hexagon/intrinsics/byte-store-double.ll @@ -1,4 +1,4 @@ -; RUN: llc -mattr=+hvx-double -march=hexagon -O2 < %s | FileCheck %s +; RUN: llc -mattr=+hvxv60,hvx-length128b -march=hexagon -O2 < %s | FileCheck %s ; CHECK-LABEL: V6_vmaskedstoreq_128B ; CHECK: if (q{{[0-3]+}}) vmem(r{{[0-9]+}}+#0) = v{{[0-9]+}} diff --git a/test/CodeGen/Hexagon/intrinsics/byte-store.ll b/test/CodeGen/Hexagon/intrinsics/byte-store.ll index 208c15fec9804..5ff6722245292 100644 --- a/test/CodeGen/Hexagon/intrinsics/byte-store.ll +++ b/test/CodeGen/Hexagon/intrinsics/byte-store.ll @@ -1,4 +1,4 @@ -; RUN: llc -mattr=+hvx -march=hexagon -O2 < %s | FileCheck %s +; RUN: llc -mattr=+hvxv60,hvx-length64b -march=hexagon -O2 < %s | FileCheck %s ; CHECK-LABEL: V6_vmaskedstoreq ; CHECK: if (q{{[0-3]+}}) vmem(r{{[0-9]+}}+#0) = v{{[0-9]+}} diff --git a/test/CodeGen/Hexagon/intrinsics/system_user.ll b/test/CodeGen/Hexagon/intrinsics/system_user.ll index 23473c92da911..1a5fd138e0ff9 100644 --- a/test/CodeGen/Hexagon/intrinsics/system_user.ll +++ b/test/CodeGen/Hexagon/intrinsics/system_user.ll @@ -65,7 +65,7 @@ declare void @llvm.hexagon.Y2.dczeroa(i8* nocapture) #3 declare void @llvm.hexagon.Y4.l2fetch(i8* nocapture readonly, i32) #2 declare void @llvm.hexagon.Y5.l2fetch(i8* nocapture readonly, i64) #2 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } attributes #1 = { inaccessiblemem_or_argmemonly nounwind } attributes #2 = { nounwind } attributes #3 = { argmemonly nounwind writeonly } diff --git a/test/CodeGen/Hexagon/jt-in-text.ll b/test/CodeGen/Hexagon/jt-in-text.ll index 62b5caef6aaa1..7389c960b9ec3 100644 --- a/test/CodeGen/Hexagon/jt-in-text.ll +++ b/test/CodeGen/Hexagon/jt-in-text.ll @@ -54,4 +54,4 @@ sw.epilog: ; preds = %entry, %sw.bb4, %sw ret void } -attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { noinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/Hexagon/loop-idiom/pmpy-infinite-loop.ll b/test/CodeGen/Hexagon/loop-idiom/pmpy-infinite-loop.ll index f738282c0f1bc..92f3b6048bfb1 100644 --- a/test/CodeGen/Hexagon/loop-idiom/pmpy-infinite-loop.ll +++ b/test/CodeGen/Hexagon/loop-idiom/pmpy-infinite-loop.ll @@ -80,4 +80,4 @@ if.end437: ; preds = %if.then409, %for.bo br label %for.body405 } -attributes #0 = { noinline nounwind "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" } +attributes #0 = { noinline nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll b/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll index 9907ae71c9921..3e1e39b9d0944 100644 --- a/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll +++ b/test/CodeGen/Hexagon/loop-idiom/pmpy-mod.ll @@ -81,4 +81,4 @@ b46: ; preds = %b3 ret i16 %v5 } -attributes #0 = { noinline nounwind "target-cpu"="hexagonv5" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { noinline nounwind "target-cpu"="hexagonv5" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/loop-prefetch.ll b/test/CodeGen/Hexagon/loop-prefetch.ll index 0c6e4581a71ff..24518421c4452 100644 --- a/test/CodeGen/Hexagon/loop-prefetch.ll +++ b/test/CodeGen/Hexagon/loop-prefetch.ll @@ -24,4 +24,4 @@ while.end: ; preds = %while.body, %entry ret void } -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="-hvx" } diff --git a/test/CodeGen/Hexagon/lower-extract-subvector.ll b/test/CodeGen/Hexagon/lower-extract-subvector.ll index ba67de9e00a4c..09ca465c6716b 100644 --- a/test/CodeGen/Hexagon/lower-extract-subvector.ll +++ b/test/CodeGen/Hexagon/lower-extract-subvector.ll @@ -43,5 +43,5 @@ if.then.i164: ; preds = %"consume denoised" ; Function Attrs: nounwind readnone declare <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32>, <32 x i32>, i32) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } -attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } +attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } diff --git a/test/CodeGen/Hexagon/memops-stack.ll b/test/CodeGen/Hexagon/memops-stack.ll index 1aa2e30ea25b6..9da319f443bb2 100644 --- a/test/CodeGen/Hexagon/memops-stack.ll +++ b/test/CodeGen/Hexagon/memops-stack.ll @@ -136,9 +136,9 @@ declare void @foo(i32*) #2 declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind } -attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { nounwind } !1 = !{!2, !2, i64 0} diff --git a/test/CodeGen/Hexagon/misaligned_double_vector_store_not_fast.ll b/test/CodeGen/Hexagon/misaligned_double_vector_store_not_fast.ll index 25cb14e8514e1..c147282407434 100644 --- a/test/CodeGen/Hexagon/misaligned_double_vector_store_not_fast.ll +++ b/test/CodeGen/Hexagon/misaligned_double_vector_store_not_fast.ll @@ -42,6 +42,6 @@ entry: declare <64 x i32> @llvm.hexagon.V6.vshuffvdd.128B(<32 x i32>, <32 x i32>, i32) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } -attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } +attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } diff --git a/test/CodeGen/Hexagon/multi-cycle.ll b/test/CodeGen/Hexagon/multi-cycle.ll index fc021821af388..b8caef90397d4 100644 --- a/test/CodeGen/Hexagon/multi-cycle.ll +++ b/test/CodeGen/Hexagon/multi-cycle.ll @@ -95,7 +95,7 @@ declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1 declare <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32>, <16 x i32>) #1 declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } attributes #1 = { nounwind readnone } !1 = !{!2, !2, i64 0} diff --git a/test/CodeGen/Hexagon/newify-crash.ll b/test/CodeGen/Hexagon/newify-crash.ll index 705170b13a593..bb29954291271 100644 --- a/test/CodeGen/Hexagon/newify-crash.ll +++ b/test/CodeGen/Hexagon/newify-crash.ll @@ -40,5 +40,5 @@ b18: ; preds = %b7 declare <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32>, <32 x i32>) #1 declare void @f0() #0 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/newvaluejump3.ll b/test/CodeGen/Hexagon/newvaluejump3.ll index 1e2e6c28c849f..93479666ad53d 100644 --- a/test/CodeGen/Hexagon/newvaluejump3.ll +++ b/test/CodeGen/Hexagon/newvaluejump3.ll @@ -74,6 +74,6 @@ b24: ; preds = %b20, %b16, %b9, %b2 } attributes #0 = { argmemonly nounwind } -attributes #1 = { nounwind readonly "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double,-long-calls" } -attributes #2 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double,-long-calls" } +attributes #1 = { nounwind readonly "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b,-long-calls" } +attributes #2 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b,-long-calls" } diff --git a/test/CodeGen/Hexagon/packetize-load-store-aliasing.mir b/test/CodeGen/Hexagon/packetize-load-store-aliasing.mir new file mode 100644 index 0000000000000..03835d69ba8fd --- /dev/null +++ b/test/CodeGen/Hexagon/packetize-load-store-aliasing.mir @@ -0,0 +1,41 @@ +# RUN: llc -march=hexagon -mcpu=hexagonv60 -run-pass hexagon-packetizer %s -o - | FileCheck %s + +# Check that a store can be packetized with a load that happens later +# if these instructions are not aliased (the load will actually execute +# first). +# CHECK-LABEL: name: danny +# CHECK: BUNDLE + +--- +name: danny +tracksRegLiveness: true +stack: + - { id: 0, type: default, size: 4, alignment: 4 } + - { id: 1, type: default, size: 4, alignment: 4 } +body: | + bb.0: + liveins: %r0 + S2_storeri_io %r29, 0, %r0 :: (store 4 into %stack.0) + %r1 = L2_loadri_io %r29, 4 :: (load 4 from %stack.1) +... + + +# Check that a store cannot be packetized with a load that happens later +# if these instructions are aliased. +# CHECK-LABEL: name: sammy +# CHECK-NOT: BUNDLE +# CHECK: S2_storeri_io %r29, 0, %r0 +# CHECK: %r1 = L2_loadri_io %r29, 0 + +--- +name: sammy +tracksRegLiveness: true +stack: + - { id: 0, type: default, size: 4, alignment: 4 } +body: | + bb.0: + liveins: %r0 + S2_storeri_io %r29, 0, %r0 :: (store 4 into %stack.0) + %r1 = L2_loadri_io %r29, 0 :: (load 4 from %stack.0) +... + diff --git a/test/CodeGen/Hexagon/packetize-nvj-no-prune.mir b/test/CodeGen/Hexagon/packetize-nvj-no-prune.mir new file mode 100644 index 0000000000000..7047968ab6f94 --- /dev/null +++ b/test/CodeGen/Hexagon/packetize-nvj-no-prune.mir @@ -0,0 +1,31 @@ +# RUN: llc -march=hexagon -run-pass hexagon-packetizer %s -o - | FileCheck %s + +# Make sure that the new-value jump is packetized with the producer. In this +# case, the loads cold be packetized together (with updating the offset in +# the second load), but then the new-value jump would not be possible to +# put in the same packet. + +# CHECK-LABEL: name: fred +# CHECK: BUNDLE +# CHECK-NEXT: %r3 = L2_loadri_io %r1, 0 +# CHECK-NEXT: J4_cmpgtu_f_jumpnv_t internal killed %r3 + + +--- | + define void @fred() { ret void } + @array = external global [256 x i32], align 8 +... + +--- +name: fred +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1 + %r1 = A2_tfrsi @array + %r2, %r1 = L2_loadri_pi %r1, 4 + %r3 = L2_loadri_io %r1, 0 + J4_cmpgtu_f_jumpnv_t killed %r3, killed %r2, %bb.1, implicit-def %pc + + bb.1: +... diff --git a/test/CodeGen/Hexagon/peephole-kill-flags.ll b/test/CodeGen/Hexagon/peephole-kill-flags.ll index 03de15323528f..4a24ea62af4e0 100644 --- a/test/CodeGen/Hexagon/peephole-kill-flags.ll +++ b/test/CodeGen/Hexagon/peephole-kill-flags.ll @@ -23,5 +23,5 @@ for.end13: ; preds = %for.cond ret void } -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" } diff --git a/test/CodeGen/Hexagon/plt-rel.ll b/test/CodeGen/Hexagon/plt-rel.ll index 1d38cf32b8860..d1d97a62263cd 100644 --- a/test/CodeGen/Hexagon/plt-rel.ll +++ b/test/CodeGen/Hexagon/plt-rel.ll @@ -34,4 +34,4 @@ return: ; preds = %entry, %if.then ret i1 %.sink } -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } diff --git a/test/CodeGen/Hexagon/post-inc-aa-metadata.ll b/test/CodeGen/Hexagon/post-inc-aa-metadata.ll index fb2f038e6e592..673a9b41ff22a 100644 --- a/test/CodeGen/Hexagon/post-inc-aa-metadata.ll +++ b/test/CodeGen/Hexagon/post-inc-aa-metadata.ll @@ -29,7 +29,7 @@ while.end: ; preds = %while.body, %entry ret void } -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" } !1 = !{!2, !2, i64 0} diff --git a/test/CodeGen/Hexagon/propagate-vcombine.ll b/test/CodeGen/Hexagon/propagate-vcombine.ll index 4948a89b73e8e..989322a0fea09 100644 --- a/test/CodeGen/Hexagon/propagate-vcombine.ll +++ b/test/CodeGen/Hexagon/propagate-vcombine.ll @@ -42,7 +42,7 @@ declare <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32>, <32 x i32>) #3 declare <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32>) #3 declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #3 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" } -attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } -attributes #2 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx" } -attributes #3 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } +attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } +attributes #2 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } +attributes #3 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } diff --git a/test/CodeGen/Hexagon/rdf-def-mask.ll b/test/CodeGen/Hexagon/rdf-def-mask.ll index 3d65968911ed3..91aec7750dbc9 100644 --- a/test/CodeGen/Hexagon/rdf-def-mask.ll +++ b/test/CodeGen/Hexagon/rdf-def-mask.ll @@ -48,5 +48,5 @@ declare i32 @llvm.hexagon.S2.clb(i32) #1 declare i32 @llvm.hexagon.S2.asl.r.r(i32, i32) #1 declare i32 @llvm.hexagon.M2.mpyu.nac.ll.s0(i32, i32, i32) #1 -attributes #0 = { nounwind readnone "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { nounwind readnone "target-cpu"="hexagonv55" "target-features"="-hvx,-long-calls" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/rdf-inline-asm-fixed.ll b/test/CodeGen/Hexagon/rdf-inline-asm-fixed.ll index 222d8a2b2e147..d06da9346786b 100644 --- a/test/CodeGen/Hexagon/rdf-inline-asm-fixed.ll +++ b/test/CodeGen/Hexagon/rdf-inline-asm-fixed.ll @@ -26,7 +26,7 @@ declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1 ; Function Attrs: argmemonly nounwind declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1 -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv5" "target-features"="-hvx,-hvx-double" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv5" "target-features"="-hvx" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind } attributes #2 = { nounwind } diff --git a/test/CodeGen/Hexagon/rdf-inline-asm.ll b/test/CodeGen/Hexagon/rdf-inline-asm.ll index ae09062638dcd..2661f8c0d0dd1 100644 --- a/test/CodeGen/Hexagon/rdf-inline-asm.ll +++ b/test/CodeGen/Hexagon/rdf-inline-asm.ll @@ -24,7 +24,7 @@ if.end: ; preds = %if.then, %entry ret i32 %retval1.0 } -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } !1 = !{i32 155} diff --git a/test/CodeGen/Hexagon/reg-scavengebug-3.ll b/test/CodeGen/Hexagon/reg-scavengebug-3.ll index db9ed55d2da66..c73d4c7bc01f5 100644 --- a/test/CodeGen/Hexagon/reg-scavengebug-3.ll +++ b/test/CodeGen/Hexagon/reg-scavengebug-3.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -march=hexagon -mcpu=hexagonv60 < %s | FileCheck %s +; RUN: llc -O0 -march=hexagon -mcpu=hexagonv60 -mattr=+hvxv60,hvx-length64b < %s | FileCheck %s ; CHECK: vmem diff --git a/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll b/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll index 78c4b989b7ac2..bc878e09ef942 100644 --- a/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll +++ b/test/CodeGen/Hexagon/reg-scavenger-valid-slot.ll @@ -95,6 +95,6 @@ entry: ret void } -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } diff --git a/test/CodeGen/Hexagon/regalloc-bad-undef.mir b/test/CodeGen/Hexagon/regalloc-bad-undef.mir index a541e766f593c..7e18011a523a9 100644 --- a/test/CodeGen/Hexagon/regalloc-bad-undef.mir +++ b/test/CodeGen/Hexagon/regalloc-bad-undef.mir @@ -58,7 +58,7 @@ declare i32 @lrand48() #0 declare i64 @llvm.hexagon.S2.extractup(i64, i32, i32) #1 - attributes #0 = { nounwind optsize "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double" } + attributes #0 = { nounwind optsize "target-cpu"="hexagonv55" "target-features"="-hvx" } attributes #1 = { nounwind readnone } ... diff --git a/test/CodeGen/Hexagon/regalloc-block-overlap.ll b/test/CodeGen/Hexagon/regalloc-block-overlap.ll index c98fcb6a9f04b..2dc9a7a5153cd 100644 --- a/test/CodeGen/Hexagon/regalloc-block-overlap.ll +++ b/test/CodeGen/Hexagon/regalloc-block-overlap.ll @@ -138,6 +138,6 @@ b42: ; preds = %b40 br label %b39 } -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind } diff --git a/test/CodeGen/Hexagon/regalloc-liveout-undef.mir b/test/CodeGen/Hexagon/regalloc-liveout-undef.mir index 6a41514b060e0..a6a398f0cdff4 100644 --- a/test/CodeGen/Hexagon/regalloc-liveout-undef.mir +++ b/test/CodeGen/Hexagon/regalloc-liveout-undef.mir @@ -6,7 +6,7 @@ # cover live intervals as well. # # Make sure that this compiles successfully. -# CHECK: undef %1.isub_lo = A2_addi %1.isub_lo, 1 +# CHECK: undef %1.isub_lo:doubleregs = A2_addi %1.isub_lo, 1 --- name: fred @@ -32,4 +32,3 @@ body: | undef %1.isub_lo = A2_addi %1.isub_lo, 1 J2_jump %bb.1, implicit-def %pc ... - diff --git a/test/CodeGen/Hexagon/sdata-array.ll b/test/CodeGen/Hexagon/sdata-array.ll index 89ef46079f7c9..cea86bd426d95 100644 --- a/test/CodeGen/Hexagon/sdata-array.ll +++ b/test/CodeGen/Hexagon/sdata-array.ll @@ -5,9 +5,9 @@ @foo = common global [4 x i8] zeroinitializer, align 1 -define void @set() nounwind { +define void @set(i8 %x) nounwind { entry: - store i8 0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @foo, i32 0, i32 0), align 1 + store i8 %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @foo, i32 0, i32 0), align 1 ret void } diff --git a/test/CodeGen/Hexagon/select-instr-align.ll b/test/CodeGen/Hexagon/select-instr-align.ll index e3b2929d52f16..368ee3c5726ad 100644 --- a/test/CodeGen/Hexagon/select-instr-align.ll +++ b/test/CodeGen/Hexagon/select-instr-align.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -enable-hexagon-hvx < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv60 -mattr=+hvxv60,hvx-length64b < %s | FileCheck %s ; CHECK-LABEL: aligned_load: ; CHECK: = vmem({{.*}}) ; CHECK-LABEL: aligned_store: diff --git a/test/CodeGen/Hexagon/stack-align-reset.ll b/test/CodeGen/Hexagon/stack-align-reset.ll index 0d028fb95b248..f7639c728624b 100644 --- a/test/CodeGen/Hexagon/stack-align-reset.ll +++ b/test/CodeGen/Hexagon/stack-align-reset.ll @@ -47,5 +47,5 @@ b11: ; preds = %b11, %b7 declare i32 @llvm.hexagon.V6.extractw(<16 x i32>, i32) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/store-imm-amode.ll b/test/CodeGen/Hexagon/store-imm-amode.ll new file mode 100644 index 0000000000000..463559ad63fc1 --- /dev/null +++ b/test/CodeGen/Hexagon/store-imm-amode.ll @@ -0,0 +1,97 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that a store with a proper addressing mode is selected for various +; cases of storing an immediate value. + + +@var_i8 = global [10 x i8] zeroinitializer, align 8 + +; CHECK-LABEL: store_imm_i8: +; CHECK: memb(r0+#0) = #-1 +define void @store_imm_i8(i8* %p) nounwind { + store i8 255, i8* %p, align 4 + ret void +} + +; CHECK-LABEL: store_rr_i8: +; CHECK: [[RV:r[0-9]+]] = #255 +; CHECK: memb(r0+r1<<#0) = [[RV]] +define void @store_rr_i8(i8* %p, i32 %x) nounwind { + %t0 = getelementptr i8, i8* %p, i32 %x + store i8 255, i8* %t0, align 4 + ret void +} + +; CHECK-LABEL: store_io_i8: +; CHECK: [[RV:r[0-9]+]] = #255 +; CHECK: memb(r0+##var_i8) = [[RV]] +define void @store_io_i8(i32 %x) nounwind { + %t0 = getelementptr [10 x i8], [10 x i8]* @var_i8, i32 0, i32 %x + store i8 255, i8* %t0, align 4 + ret void +} + +; CHECK-LABEL: store_ur_i8: +; CHECK: [[RV:r[0-9]+]] = #255 +; CHECK: memb(r0<<#2+##var_i8) = [[RV]] +define void @store_ur_i8(i32 %x) nounwind { + %t0 = shl i32 %x, 2 + %t1 = getelementptr [10 x i8], [10 x i8]* @var_i8, i32 0, i32 %t0 + store i8 255, i8* %t1, align 4 + ret void +} + +@var_i16 = global [10 x i16] zeroinitializer, align 8 + +; CHECK-LABEL: store_imm_i16: +; CHECK: memh(r0+#0) = #-1 +define void @store_imm_i16(i16* %p) nounwind { + store i16 65535, i16* %p, align 4 + ret void +} + +; CHECK-LABEL: store_rr_i16: +; CHECK: [[RV:r[0-9]+]] = ##65535 +; CHECK: memh(r0+r1<<#1) = [[RV]] +define void @store_rr_i16(i16* %p, i32 %x) nounwind { + %t0 = getelementptr i16, i16* %p, i32 %x + store i16 65535, i16* %t0, align 4 + ret void +} + +; CHECK-LABEL: store_ur_i16: +; CHECK: [[RV:r[0-9]+]] = ##65535 +; CHECK: memh(r0<<#1+##var_i16) = [[RV]] +define void @store_ur_i16(i32 %x) nounwind { + %t0 = getelementptr [10 x i16], [10 x i16]* @var_i16, i32 0, i32 %x + store i16 65535, i16* %t0, align 4 + ret void +} + +@var_i32 = global [10 x i32] zeroinitializer, align 8 + +; CHECK-LABEL: store_imm_i32: +; CHECK: memw(r0+#0) = #-1 +define void @store_imm_i32(i32* %p) nounwind { + store i32 4294967295, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: store_rr_i32: +; CHECK: [[RV:r[0-9]+]] = #-1 +; CHECK: memw(r0+r1<<#2) = [[RV]] +define void @store_rr_i32(i32* %p, i32 %x) nounwind { + %t0 = getelementptr i32, i32* %p, i32 %x + store i32 4294967295, i32* %t0, align 4 + ret void +} + +; CHECK-LABEL: store_ur_i32: +; CHECK: [[RV:r[0-9]+]] = #-1 +; CHECK: memw(r0<<#2+##var_i32) = [[RV]] +define void @store_ur_i32(i32 %x) nounwind { + %t0 = getelementptr [10 x i32], [10 x i32]* @var_i32, i32 0, i32 %x + store i32 4294967295, i32* %t0, align 4 + ret void +} + diff --git a/test/CodeGen/Hexagon/store-imm-stack-object.ll b/test/CodeGen/Hexagon/store-imm-stack-object.ll index 8de310953aee0..c0eaea26cc245 100644 --- a/test/CodeGen/Hexagon/store-imm-stack-object.ll +++ b/test/CodeGen/Hexagon/store-imm-stack-object.ll @@ -3,8 +3,7 @@ target triple = "hexagon" ; CHECK-LABEL: test1: -; CHECK: [[REG1:(r[0-9]+)]] = ##875770417 -; CHECK-DAG: memw(r29+#4) = [[REG1]] +; CHECK-DAG: memw(r29+#4) = ##875770417 ; CHECK-DAG: memw(r29+#8) = #51 ; CHECK-DAG: memh(r29+#12) = #50 ; CHECK-DAG: memb(r29+#15) = #49 diff --git a/test/CodeGen/Hexagon/store-shift.ll b/test/CodeGen/Hexagon/store-shift.ll index 981071a0181e7..f92e23f4bc41b 100644 --- a/test/CodeGen/Hexagon/store-shift.ll +++ b/test/CodeGen/Hexagon/store-shift.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s -; CHECK-DAG: r[[BASE:[0-9]+]] += add +; CHECK-DAG: r[[BASE:[0-9]+]] = add(r1,#1000) ; CHECK-DAG: r[[IDX0:[0-9]+]] = add(r2,#5) ; CHECK-DAG: r[[IDX1:[0-9]+]] = add(r2,#6) ; CHECK-DAG: memw(r0+r[[IDX0]]<<#2) = r3 @@ -42,7 +42,7 @@ entry: ret void } -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" } !1 = !{!2, !2, i64 0} !2 = !{!"int", !3, i64 0} diff --git a/test/CodeGen/Hexagon/switch-lut-explicit-section.ll b/test/CodeGen/Hexagon/switch-lut-explicit-section.ll index 6c67a0dab1a8c..b80e8e33bf8b4 100644 --- a/test/CodeGen/Hexagon/switch-lut-explicit-section.ll +++ b/test/CodeGen/Hexagon/switch-lut-explicit-section.ll @@ -29,4 +29,4 @@ return: ; preds = %entry ret i32 19 } -attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/Hexagon/switch-lut-function-section.ll b/test/CodeGen/Hexagon/switch-lut-function-section.ll index bb2b1e798c8ab..542bfbb6d6678 100644 --- a/test/CodeGen/Hexagon/switch-lut-function-section.ll +++ b/test/CodeGen/Hexagon/switch-lut-function-section.ll @@ -27,4 +27,4 @@ return: ; preds = %entry ret i32 19 } -attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/Hexagon/switch-lut-multiple-functions.ll b/test/CodeGen/Hexagon/switch-lut-multiple-functions.ll index 57fdfbf33abce..22b61f0c92ba7 100644 --- a/test/CodeGen/Hexagon/switch-lut-multiple-functions.ll +++ b/test/CodeGen/Hexagon/switch-lut-multiple-functions.ll @@ -39,4 +39,4 @@ return: ; preds = %entry ret i32 19 } -attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/Hexagon/switch-lut-text-section.ll b/test/CodeGen/Hexagon/switch-lut-text-section.ll index b4d3e898d1034..203ea4abd9467 100644 --- a/test/CodeGen/Hexagon/switch-lut-text-section.ll +++ b/test/CodeGen/Hexagon/switch-lut-text-section.ll @@ -24,4 +24,4 @@ return: ; preds = %entry ret i32 19 } -attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx-double,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { norecurse nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/Hexagon/swp-order-copies.ll b/test/CodeGen/Hexagon/swp-order-copies.ll new file mode 100644 index 0000000000000..5de0717654ffa --- /dev/null +++ b/test/CodeGen/Hexagon/swp-order-copies.ll @@ -0,0 +1,37 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Test that the instruction ordering code in the pipeliner fixes up dependences +; between post-increment register definitions and uses so that the register +; allocator does not allocate an additional register. The following test case +; should generate a single packet. + +; CHECK: loop0(.LBB0_[[LOOP:.]], +; CHECK: .LBB0_[[LOOP]]: +; CHECK: { +; CHECK-NOT: { +; CHECK: :endloop0 + +define void @test(i64* nocapture %v1, i64 %v2, i32 %len) local_unnamed_addr #0 { +entry: + %cmp7 = icmp sgt i32 %len, 0 + br i1 %cmp7, label %for.body, label %for.end + +for.body: + %arrayidx.phi = phi i64* [ %arrayidx.inc, %for.body ], [ %v1, %entry ] + %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %0 = load i64, i64* %arrayidx.phi, align 8 + %1 = tail call i64 @llvm.hexagon.M2.mmpyul.rs1(i64 %0, i64 %v2) + store i64 %1, i64* %arrayidx.phi, align 8 + %inc = add nuw nsw i32 %i.08, 1 + %exitcond = icmp eq i32 %inc, %len + %arrayidx.inc = getelementptr i64, i64* %arrayidx.phi, i32 1 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare i64 @llvm.hexagon.M2.mmpyul.rs1(i64, i64) #1 + +attributes #0 = { nounwind "target-cpu"="hexagonv60" } +attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/tfr-to-combine.ll b/test/CodeGen/Hexagon/tfr-to-combine.ll index 50879ffe582dd..86801dbc71f18 100644 --- a/test/CodeGen/Hexagon/tfr-to-combine.ll +++ b/test/CodeGen/Hexagon/tfr-to-combine.ll @@ -6,30 +6,33 @@ @b = external global i16 @c = external global i16 -; Function Attrs: nounwind -define i64 @test1() #0 { +declare void @test0a(i32, i32) #0 +declare void @test0b(i32, i32, i32, i32) #0 + +; CHECK-LABEL: test1: ; CHECK: combine(#10,#0) +define i32 @test1() #0 { entry: - store i16 0, i16* @a, align 2 - store i16 10, i16* @b, align 2 - ret i64 10 + call void @test0a(i32 0, i32 10) #0 + ret i32 10 } -; Function Attrs: nounwind -define i64 @test2() #0 { +; CHECK-LABEL: test2: ; CHECK: combine(#0,r{{[0-9]+}}) +define i32 @test2() #0 { entry: - store i16 0, i16* @a, align 2 - %0 = load i16, i16* @c, align 2 - %conv2 = zext i16 %0 to i64 - ret i64 %conv2 + %t0 = load i16, i16* @c, align 2 + %t1 = zext i16 %t0 to i32 + call void @test0b(i32 %t1, i32 0, i32 %t1, i32 0) + ret i32 0 } -; Function Attrs: nounwind -define i64 @test4() #0 { +; CHECK-LABEL: test3: ; CHECK: combine(#0,#100) +define i32 @test3() #0 { entry: - store i16 100, i16* @b, align 2 - store i16 0, i16* @a, align 2 - ret i64 0 + call void @test0a(i32 100, i32 0) + ret i32 0 } + +attributes #0 = { nounwind } diff --git a/test/CodeGen/Hexagon/tls_pic.ll b/test/CodeGen/Hexagon/tls_pic.ll index 2c2be0dc384af..c6e5f5af582fb 100644 --- a/test/CodeGen/Hexagon/tls_pic.ll +++ b/test/CodeGen/Hexagon/tls_pic.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: test_initial_exec ; CHECK-DAG: = add(pc,##_GLOBAL_OFFSET_TABLE_@PCREL) -; CHECK-DAG: = ##src_ie@IEGOT -; CHECK-DAG: = ##dst_ie@IEGOT +; CHECK-DAG: ##src_ie@IEGOT +; CHECK-DAG: ##dst_ie@IEGOT ; CHECK-NOT: call define i32 @test_initial_exec() nounwind { entry: @@ -23,8 +23,8 @@ entry: ; CHECK-LABEL: test_dynamic ; CHECK-DAG: = add(pc,##_GLOBAL_OFFSET_TABLE_@PCREL) -; CHECK-DAG: = ##src_gd@GDGOT -; CHECK-DAG: = ##dst_gd@GDGOT +; CHECK-DAG: ##src_gd@GDGOT +; CHECK-DAG: ##dst_gd@GDGOT ; CHECK-DAG: call src_gd@GDPLT ; CHECK-DAG: call dst_gd@GDPLT diff --git a/test/CodeGen/Hexagon/tls_static.ll b/test/CodeGen/Hexagon/tls_static.ll index dbd3bd7b4ba80..f4e882b4ff285 100644 --- a/test/CodeGen/Hexagon/tls_static.ll +++ b/test/CodeGen/Hexagon/tls_static.ll @@ -4,8 +4,8 @@ @src_le = thread_local global i32 0, align 4 ; CHECK-LABEL: test_local_exec -; CHECK-DAG: = ##src_le@TPREL -; CHECK-DAG: = ##dst_le@TPREL +; CHECK-DAG: ##src_le@TPREL +; CHECK-DAG: ##dst_le@TPREL define i32 @test_local_exec() nounwind { entry: %0 = load i32, i32* @src_le, align 4 diff --git a/test/CodeGen/Hexagon/undo-dag-shift.ll b/test/CodeGen/Hexagon/undo-dag-shift.ll index c1ab5d73f5c38..5aa7f39121d83 100644 --- a/test/CodeGen/Hexagon/undo-dag-shift.ll +++ b/test/CodeGen/Hexagon/undo-dag-shift.ll @@ -54,6 +54,6 @@ entry: ret void } -attributes #0 = { norecurse nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } -attributes #1 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-hvx-double,-long-calls" } +attributes #0 = { norecurse nounwind readonly "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } +attributes #1 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="-hvx,-long-calls" } diff --git a/test/CodeGen/Hexagon/unreachable-mbb-phi-subreg.mir b/test/CodeGen/Hexagon/unreachable-mbb-phi-subreg.mir index 6d6549201abf1..a8c342f296c1f 100644 --- a/test/CodeGen/Hexagon/unreachable-mbb-phi-subreg.mir +++ b/test/CodeGen/Hexagon/unreachable-mbb-phi-subreg.mir @@ -17,9 +17,8 @@ body: | bb.2: ; Make sure that the subregister from the PHI operand is preserved. - ; CHECK: %[[REG:[0-9]+]] = COPY %0.isub_lo + ; CHECK: %[[REG:[0-9]+]]:intregs = COPY %0.isub_lo ; CHECK: %r0 = COPY %[[REG]] %1 : intregs = PHI %0.isub_lo, %bb.0, %0.isub_hi, %bb.1 %r0 = COPY %1 ... - diff --git a/test/CodeGen/Hexagon/v60-cur.ll b/test/CodeGen/Hexagon/v60-cur.ll index a7d4f6d310e47..26d40c9a69756 100644 --- a/test/CodeGen/Hexagon/v60-cur.ll +++ b/test/CodeGen/Hexagon/v60-cur.ll @@ -54,7 +54,7 @@ declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32) #1 declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1 declare <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32>, <16 x i32>) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } attributes #1 = { nounwind readnone } !1 = !{!2, !2, i64 0} diff --git a/test/CodeGen/Hexagon/v60-vsel1.ll b/test/CodeGen/Hexagon/v60-vsel1.ll index e673145c9d14c..71d112cc7357e 100644 --- a/test/CodeGen/Hexagon/v60-vsel1.ll +++ b/test/CodeGen/Hexagon/v60-vsel1.ll @@ -65,5 +65,5 @@ declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1 declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1 declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/v60Intrins.ll b/test/CodeGen/Hexagon/v60Intrins.ll index d0064c50e71d3..980d870138268 100644 --- a/test/CodeGen/Hexagon/v60Intrins.ll +++ b/test/CodeGen/Hexagon/v60Intrins.ll @@ -2555,5 +2555,5 @@ declare <32 x i32> @llvm.hexagon.V6.vunpackh(<16 x i32>) #1 ; Function Attrs: nounwind readnone declare <32 x i32> @llvm.hexagon.V6.vunpackoh(<32 x i32>, <16 x i32>) #1 -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/v60Vasr.ll b/test/CodeGen/Hexagon/v60Vasr.ll index fb177f614f72d..dd309f6764615 100644 --- a/test/CodeGen/Hexagon/v60Vasr.ll +++ b/test/CodeGen/Hexagon/v60Vasr.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60 < %s | FileCheck %s +; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60 -mattr=+hvxv60,hvx-length64b < %s | FileCheck %s ; CHECK: vasr(v{{[0-9]+}}.h,v{{[0-9]+}}.h,r{{[0-7]+}}):sat diff --git a/test/CodeGen/Hexagon/v60small.ll b/test/CodeGen/Hexagon/v60small.ll index 8a6a6155a3998..efa726e2c6b1c 100644 --- a/test/CodeGen/Hexagon/v60small.ll +++ b/test/CodeGen/Hexagon/v60small.ll @@ -47,5 +47,5 @@ declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1 ; Function Attrs: nounwind readnone declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1 -attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readnone } diff --git a/test/CodeGen/Hexagon/v6vec-vprint.ll b/test/CodeGen/Hexagon/v6vec-vprint.ll index 24daeac3fb5de..18c2cf65f727e 100644 --- a/test/CodeGen/Hexagon/v6vec-vprint.ll +++ b/test/CodeGen/Hexagon/v6vec-vprint.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-hexagon-hvx -disable-hexagon-shuffle=0 -O2 -enable-hexagon-vector-print < %s | FileCheck %s -; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-hexagon-hvx -disable-hexagon-shuffle=0 -O2 -enable-hexagon-vector-print -trace-hex-vector-stores-only < %s | FileCheck --check-prefix=VSTPRINT %s +; RUN: llc -march=hexagon -mcpu=hexagonv60 -mattr=+hvxv60,hvx-length64b -disable-hexagon-shuffle=0 -O2 -enable-hexagon-vector-print < %s | FileCheck --check-prefix=CHECK %s +; RUN: llc -march=hexagon -mcpu=hexagonv60 -mattr=+hvxv60,hvx-length64b -disable-hexagon-shuffle=0 -O2 -enable-hexagon-vector-print -trace-hex-vector-stores-only < %s | FileCheck --check-prefix=VSTPRINT %s ; generate .long XXXX which is a vector debug print instruction. ; CHECK: .long 0x1dffe0 ; CHECK: .long 0x1dffe0 diff --git a/test/CodeGen/Hexagon/vassign-to-combine.ll b/test/CodeGen/Hexagon/vassign-to-combine.ll index a9a0d51e43b6e..0facdc335554d 100644 --- a/test/CodeGen/Hexagon/vassign-to-combine.ll +++ b/test/CodeGen/Hexagon/vassign-to-combine.ll @@ -52,5 +52,5 @@ b2: ; preds = %b1 } attributes #0 = { nounwind readnone } -attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } diff --git a/test/CodeGen/Hexagon/vdmpy-halide-test.ll b/test/CodeGen/Hexagon/vdmpy-halide-test.ll index 7e41bd4d20d41..352398e7bbeaf 100644 --- a/test/CodeGen/Hexagon/vdmpy-halide-test.ll +++ b/test/CodeGen/Hexagon/vdmpy-halide-test.ll @@ -155,8 +155,8 @@ destructor_block: ; preds = %"for testOne.s0.x.x ; Function Attrs: nounwind readnone declare <16 x i32> @llvm.hexagon.V6.vdmpyhvsat(<16 x i32>, <16 x i32>) #1 -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } -attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } +attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } !5 = !{!6, !6, i64 0} !6 = !{!"inputOne", !7} diff --git a/test/CodeGen/Hexagon/vec-pred-spill1.ll b/test/CodeGen/Hexagon/vec-pred-spill1.ll index d120295fa52cc..40b4a819ad615 100644 --- a/test/CodeGen/Hexagon/vec-pred-spill1.ll +++ b/test/CodeGen/Hexagon/vec-pred-spill1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv60 -O2 -enable-hexagon-hvx < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv60 -O2 -mattr=+hvxv60,hvx-length64b < %s | FileCheck %s ; CHECK: vmem(r{{[0-9]+}}+#3) = v{{[0-9]+}} ; CHECK: call puts diff --git a/test/CodeGen/Hexagon/vec-vararg-align.ll b/test/CodeGen/Hexagon/vec-vararg-align.ll index d4c6bd3ef61b8..0101c1ffa8a0e 100644 --- a/test/CodeGen/Hexagon/vec-vararg-align.ll +++ b/test/CodeGen/Hexagon/vec-vararg-align.ll @@ -27,4 +27,4 @@ b0: declare i32 @printf(i8*, ...) #0 declare void @VarVec1(i8*, i32, ...) #0 -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx" } +attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } diff --git a/test/CodeGen/Hexagon/vect/vect-extract-i1.ll b/test/CodeGen/Hexagon/vect/vect-extract-i1.ll new file mode 100644 index 0000000000000..8bcf1768b8825 --- /dev/null +++ b/test/CodeGen/Hexagon/vect/vect-extract-i1.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=hexagon < %s + +define i1 @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind { +entry: + %0 = add <4 x i8> %a, %b + %1 = bitcast <4 x i8> %0 to <32 x i1> + %2 = extractelement <32 x i1> %1, i32 0 + ret i1 %2 +} diff --git a/test/CodeGen/Hexagon/vect/vect-load-1.ll b/test/CodeGen/Hexagon/vect/vect-load-1.ll index fbaf61d545dac..0c3aaefa4ff59 100644 --- a/test/CodeGen/Hexagon/vect/vect-load-1.ll +++ b/test/CodeGen/Hexagon/vect/vect-load-1.ll @@ -1,11 +1,10 @@ ; RUN: llc -march=hexagon < %s -; Used to fail with "Cannot select: v2i32,ch = load 0x16c5890, 0x16f76e0, 0x16f76e0", 0x16c5890, 0x16f76e0, 0x16f76e0" +; +; Used to fail with "Cannot select: v2i32,ch = load 0x16c5890, 0x16f76e0, 0x16f76e0" -; ModuleID = 'bugpoint-reduced-simplified.bc' -target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32" target triple = "hexagon-unknown-linux-gnu" -define void @foo() nounwind { +define void @foo(<2 x i8>* %p) nounwind { entry: br label %polly.loop_header @@ -17,7 +16,7 @@ polly.loop_header: ; preds = %polly.loop_body, %e br i1 %0, label %polly.loop_body, label %polly.loop_after polly.loop_body: ; preds = %polly.loop_header - %_p_vec_full = load <2 x i8>, <2 x i8>* undef, align 8 + %_p_vec_full = load <2 x i8>, <2 x i8>* %p, align 8 %1 = sext <2 x i8> %_p_vec_full to <2 x i32> %p_vec = mul <2 x i32> %1, %mulp_vec = add <2 x i32> %p_vec, diff --git a/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll b/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll index d60d014607854..5ebc33726bbb1 100644 --- a/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll +++ b/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s -; CHECK: vmpybsu +; CHECK: vmpybu ; CHECK: vtrunehb define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind { diff --git a/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll b/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll index a84cd00234ea4..aee0437effd76 100644 --- a/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll +++ b/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s -; CHECK: vmpybsu -; CHECK: vmpybsu +; CHECK: vmpybu +; CHECK: vmpybu define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind { entry: diff --git a/test/CodeGen/Hexagon/vector-align.ll b/test/CodeGen/Hexagon/vector-align.ll index 557ee3f97f2e7..043839c704ae9 100644 --- a/test/CodeGen/Hexagon/vector-align.ll +++ b/test/CodeGen/Hexagon/vector-align.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-hexagon-hvx < %s \ +; RUN: llc -march=hexagon -mcpu=hexagonv60 -mattr=+hvxv60,hvx-length64b < %s \ ; RUN: | FileCheck %s ; Check that the store to Q6VecPredResult does not get expanded into multiple diff --git a/test/CodeGen/Hexagon/vload-postinc-sel.ll b/test/CodeGen/Hexagon/vload-postinc-sel.ll index 70ed3a9b1e8db..a3bed31071d18 100644 --- a/test/CodeGen/Hexagon/vload-postinc-sel.ll +++ b/test/CodeGen/Hexagon/vload-postinc-sel.ll @@ -49,4 +49,4 @@ call_destructor.exit: ; preds = %entry declare <32 x i32> @llvm.hexagon.V6.valignbi.128B(<32 x i32>, <32 x i32>, i32) #0 attributes #0 = { nounwind readnone } -attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" } +attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" } diff --git a/test/CodeGen/Hexagon/vmpa-halide-test.ll b/test/CodeGen/Hexagon/vmpa-halide-test.ll index 9c359900ba422..8b207ba4f2389 100644 --- a/test/CodeGen/Hexagon/vmpa-halide-test.ll +++ b/test/CodeGen/Hexagon/vmpa-halide-test.ll @@ -133,8 +133,8 @@ destructor_block: ; preds = %"for testOne.s0.x.x ; Function Attrs: nounwind readnone declare <32 x i32> @llvm.hexagon.V6.vmpabuuv(<32 x i32>, <32 x i32>) #1 -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } -attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } +attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } !5 = !{!6, !6, i64 0} !6 = !{!"inputOne", !7} diff --git a/test/CodeGen/Hexagon/vpack_eo.ll b/test/CodeGen/Hexagon/vpack_eo.ll index 7238ca84a42e0..cf8619c0f0a5d 100644 --- a/test/CodeGen/Hexagon/vpack_eo.ll +++ b/test/CodeGen/Hexagon/vpack_eo.ll @@ -61,8 +61,8 @@ entry: ; Function Attrs: nounwind readnone declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #1 -attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } -attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" } +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } +attributes #1 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length64b" } !4 = !{!5, !5, i64 0} !5 = !{!"InputOne", !6} diff --git a/test/CodeGen/Hexagon/vselect-pseudo.ll b/test/CodeGen/Hexagon/vselect-pseudo.ll index ef86e47e3959f..e6be3ee69c040 100644 --- a/test/CodeGen/Hexagon/vselect-pseudo.ll +++ b/test/CodeGen/Hexagon/vselect-pseudo.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s +; RUN: llc -march=hexagon -mattr="+hvxv60,+hvx-length64b" < %s ; REQUIRES: asserts target triple = "hexagon" diff --git a/test/CodeGen/Hexagon/zextloadi1.ll b/test/CodeGen/Hexagon/zextloadi1.ll index 582120d0f355f..29ebf2e09275d 100644 --- a/test/CodeGen/Hexagon/zextloadi1.ll +++ b/test/CodeGen/Hexagon/zextloadi1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-cext=0 < %s | FileCheck %s @i65_l = external global i65 @i65_s = external global i65 diff --git a/test/CodeGen/Lanai/peephole-compare.mir b/test/CodeGen/Lanai/peephole-compare.mir index 51133b5e58e3f..a65660cbee445 100644 --- a/test/CodeGen/Lanai/peephole-compare.mir +++ b/test/CodeGen/Lanai/peephole-compare.mir @@ -11,23 +11,23 @@ # been sub.f %r3, 0, %r0 then it would have matched. # CHECK-LABEL: name: test1a -# CHECK: [[IN1:%.*]] = COPY %r7 -# CHECK: [[IN2:%.*]] = COPY %r6 +# CHECK: [[IN1:%.*]]:gpr = COPY %r7 +# CHECK: [[IN2:%.*]]:gpr = COPY %r6 # CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr # CHECK-LABEL: name: test1b -# CHECK: [[IN1:%.*]] = COPY %r7 -# CHECK: [[IN2:%.*]] = COPY %r6 +# CHECK: [[IN1:%.*]]:gpr = COPY %r7 +# CHECK: [[IN2:%.*]]:gpr = COPY %r6 # CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr # CHECK-LABEL: name: test2a -# CHECK: [[IN1:%.*]] = COPY %r7 -# CHECK: [[IN2:%.*]] = COPY %r6 +# CHECK: [[IN1:%.*]]:gpr = COPY %r7 +# CHECK: [[IN2:%.*]]:gpr = COPY %r6 # CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr # CHECK-LABEL: name: test2b -# CHECK: [[IN1:%.*]] = COPY %r7 -# CHECK: [[IN2:%.*]] = COPY %r6 +# CHECK: [[IN1:%.*]]:gpr = COPY %r7 +# CHECK: [[IN2:%.*]]:gpr = COPY %r6 # CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr # CHECK-LABEL: name: test3 @@ -38,10 +38,10 @@ --- | target datalayout = "E-m:e-p:32:32-i64:64-a:0:32-n32-S64" target triple = "lanai-unknown-unknown" - + @a = global i32 -1, align 4 @b = global i32 0, align 4 - + define i32 @test0a(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { entry: %sub = sub i32 %b, %a @@ -49,14 +49,14 @@ %cond = select i1 %cmp, i32 %c, i32 %sub ret i32 %cond } - + define i32 @test0b(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { entry: %cmp = icmp eq i32 %b, %a %cond = select i1 %cmp, i32 %c, i32 %b ret i32 %cond } - + define i32 @test1a(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { entry: %sub = sub i32 %b, %a @@ -64,7 +64,7 @@ %cond = select i1 %cmp, i32 %c, i32 %d ret i32 %cond } - + define i32 @test1b(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { entry: %sub = sub i32 %b, %a @@ -72,7 +72,7 @@ %cond = select i1 %cmp, i32 %c, i32 %d ret i32 %cond } - + define i32 @test2a(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { entry: %sub = sub i32 %b, %a @@ -80,7 +80,7 @@ %cond = select i1 %cmp, i32 %c, i32 %d ret i32 %cond } - + define i32 @test2b(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { entry: %sub = sub i32 %b, %a @@ -88,7 +88,7 @@ %cond = select i1 %cmp, i32 %c, i32 %d ret i32 %cond } - + define i32 @test3(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { entry: %sub = sub i32 %b, %a @@ -96,38 +96,38 @@ %cond = select i1 %cmp, i32 %c, i32 %d ret i32 %cond } - + define i32 @test4(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { entry: %cmp = icmp ne i32 %a, 0 %cmp1 = icmp ult i32 %a, %b %or.cond = and i1 %cmp, %cmp1 br i1 %or.cond, label %return, label %if.end - + if.end: ; preds = %entry %cmp2 = icmp ne i32 %b, 0 %cmp4 = icmp ult i32 %b, %c %or.cond29 = and i1 %cmp2, %cmp4 br i1 %or.cond29, label %return, label %if.end6 - + if.end6: ; preds = %if.end %cmp7 = icmp ne i32 %c, 0 %cmp9 = icmp ult i32 %c, %d %or.cond30 = and i1 %cmp7, %cmp9 br i1 %or.cond30, label %return, label %if.end11 - + if.end11: ; preds = %if.end6 %cmp12 = icmp ne i32 %d, 0 %cmp14 = icmp ult i32 %d, %a %or.cond31 = and i1 %cmp12, %cmp14 %b. = select i1 %or.cond31, i32 %b, i32 21 ret i32 %b. - + return: ; preds = %if.end6, %if.end, %entry %retval.0 = phi i32 [ %c, %entry ], [ %d, %if.end ], [ %a, %if.end6 ] ret i32 %retval.0 } - + define void @testBB() { entry: %0 = load i32, i32* @a, align 4, !tbaa !0 @@ -135,36 +135,36 @@ %sub.i = sub i32 %1, %0 %tobool = icmp sgt i32 %sub.i, -1 br i1 %tobool, label %if.end, label %if.then - + if.then: ; preds = %entry %call1 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() br label %while.body - + while.body: ; preds = %while.body, %if.then br label %while.body - + if.end: ; preds = %entry %cmp.i = icmp slt i32 %sub.i, 1 br i1 %cmp.i, label %if.then4, label %if.end7 - + if.then4: ; preds = %if.end %call5 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() br label %while.body6 - + while.body6: ; preds = %while.body6, %if.then4 br label %while.body6 - + if.end7: ; preds = %if.end ret void } - + declare i32 @g(...) - + ; Function Attrs: nounwind declare void @llvm.stackprotector(i8*, i8**) #0 - + attributes #0 = { nounwind } - + !0 = !{!1, !1, i64 0} !1 = !{!"int", !2, i64 0} !2 = !{!"omnipotent char", !3, i64 0} @@ -176,18 +176,18 @@ name: test0a alignment: 2 exposesReturnsTwice: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } - { id: 4, class: gpr } - { id: 5, class: gpr } -liveins: +liveins: - { reg: '%r6', virtual-reg: '%0' } - { reg: '%r7', virtual-reg: '%1' } - { reg: '%r18', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -204,7 +204,7 @@ frameInfo: body: | bb.0.entry: liveins: %r6, %r7, %r18 - + %2 = COPY %r18 %1 = COPY %r7 %0 = COPY %r6 @@ -220,17 +220,17 @@ name: test0b alignment: 2 exposesReturnsTwice: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } - { id: 4, class: gpr } -liveins: +liveins: - { reg: '%r6', virtual-reg: '%0' } - { reg: '%r7', virtual-reg: '%1' } - { reg: '%r18', virtual-reg: '%2' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -247,7 +247,7 @@ frameInfo: body: | bb.0.entry: liveins: %r6, %r7, %r18 - + %2 = COPY %r18 %1 = COPY %r7 %0 = COPY %r6 @@ -262,19 +262,19 @@ name: test1a alignment: 2 exposesReturnsTwice: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } - { id: 4, class: gpr } - { id: 5, class: gpr } -liveins: +liveins: - { reg: '%r6', virtual-reg: '%0' } - { reg: '%r7', virtual-reg: '%1' } - { reg: '%r18', virtual-reg: '%2' } - { reg: '%r19', virtual-reg: '%3' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -291,7 +291,7 @@ frameInfo: body: | bb.0.entry: liveins: %r6, %r7, %r18, %r19 - + %3 = COPY %r19 %2 = COPY %r18 %1 = COPY %r7 @@ -308,19 +308,19 @@ name: test1b alignment: 2 exposesReturnsTwice: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } - { id: 4, class: gpr } - { id: 5, class: gpr } -liveins: +liveins: - { reg: '%r6', virtual-reg: '%0' } - { reg: '%r7', virtual-reg: '%1' } - { reg: '%r18', virtual-reg: '%2' } - { reg: '%r19', virtual-reg: '%3' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -337,7 +337,7 @@ frameInfo: body: | bb.0.entry: liveins: %r6, %r7, %r18, %r19 - + %3 = COPY %r19 %2 = COPY %r18 %1 = COPY %r7 @@ -354,19 +354,19 @@ name: test2a alignment: 2 exposesReturnsTwice: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } - { id: 4, class: gpr } - { id: 5, class: gpr } -liveins: +liveins: - { reg: '%r6', virtual-reg: '%0' } - { reg: '%r7', virtual-reg: '%1' } - { reg: '%r18', virtual-reg: '%2' } - { reg: '%r19', virtual-reg: '%3' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -383,7 +383,7 @@ frameInfo: body: | bb.0.entry: liveins: %r6, %r7, %r18, %r19 - + %3 = COPY %r19 %2 = COPY %r18 %1 = COPY %r7 @@ -400,19 +400,19 @@ name: test2b alignment: 2 exposesReturnsTwice: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } - { id: 4, class: gpr } - { id: 5, class: gpr } -liveins: +liveins: - { reg: '%r6', virtual-reg: '%0' } - { reg: '%r7', virtual-reg: '%1' } - { reg: '%r18', virtual-reg: '%2' } - { reg: '%r19', virtual-reg: '%3' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -429,7 +429,7 @@ frameInfo: body: | bb.0.entry: liveins: %r6, %r7, %r18, %r19 - + %3 = COPY %r19 %2 = COPY %r18 %1 = COPY %r7 @@ -446,19 +446,19 @@ name: test3 alignment: 2 exposesReturnsTwice: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } - { id: 3, class: gpr } - { id: 4, class: gpr } - { id: 5, class: gpr } -liveins: +liveins: - { reg: '%r6', virtual-reg: '%0' } - { reg: '%r7', virtual-reg: '%1' } - { reg: '%r18', virtual-reg: '%2' } - { reg: '%r19', virtual-reg: '%3' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -475,7 +475,7 @@ frameInfo: body: | bb.0.entry: liveins: %r6, %r7, %r18, %r19 - + %3 = COPY %r19 %2 = COPY %r18 %1 = COPY %r7 @@ -492,7 +492,7 @@ name: test4 alignment: 2 exposesReturnsTwice: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } @@ -516,12 +516,12 @@ registers: - { id: 20, class: gpr } - { id: 21, class: gpr } - { id: 22, class: gpr } -liveins: +liveins: - { reg: '%r6', virtual-reg: '%1' } - { reg: '%r7', virtual-reg: '%2' } - { reg: '%r18', virtual-reg: '%3' } - { reg: '%r19', virtual-reg: '%4' } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -539,7 +539,7 @@ body: | bb.0.entry: successors: %bb.4.return, %bb.1.if.end liveins: %r6, %r7, %r18, %r19 - + %4 = COPY %r19 %3 = COPY %r18 %2 = COPY %r7 @@ -554,10 +554,10 @@ body: | SFSUB_F_RI_LO killed %9, 0, implicit-def %sr BRCC %bb.4.return, 6, implicit %sr BT %bb.1.if.end - + bb.1.if.end: successors: %bb.4.return, %bb.2.if.end6 - + SFSUB_F_RI_LO %2, 0, implicit-def %sr %10 = SCC 6, implicit %sr SFSUB_F_RR %2, %3, implicit-def %sr @@ -567,10 +567,10 @@ body: | SFSUB_F_RI_LO killed %14, 0, implicit-def %sr BRCC %bb.4.return, 6, implicit %sr BT %bb.2.if.end6 - + bb.2.if.end6: successors: %bb.4.return, %bb.3.if.end11 - + SFSUB_F_RI_LO %3, 0, implicit-def %sr %15 = SCC 6, implicit %sr SFSUB_F_RR %3, %4, implicit-def %sr @@ -581,7 +581,7 @@ body: | SFSUB_F_RI_LO killed %19, 0, implicit-def %sr BRCC %bb.4.return, 6, implicit %sr BT %bb.3.if.end11 - + bb.3.if.end11: %20 = SLI 21 SFSUB_F_RR %4, %1, implicit-def %sr @@ -590,7 +590,7 @@ body: | %22 = SELECT killed %21, %20, 6, implicit %sr %rv = COPY %22 RET implicit %rca, implicit %rv - + bb.4.return: %0 = PHI %3, %bb.0.entry, %4, %bb.1.if.end, %1, %bb.2.if.end6 %rv = COPY %0 @@ -602,7 +602,7 @@ name: testBB alignment: 2 exposesReturnsTwice: false tracksRegLiveness: true -registers: +registers: - { id: 0, class: gpr } - { id: 1, class: gpr } - { id: 2, class: gpr } @@ -612,7 +612,7 @@ registers: - { id: 6, class: gpr } - { id: 7, class: gpr } - { id: 8, class: gpr } -frameInfo: +frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false @@ -629,7 +629,7 @@ frameInfo: body: | bb.0.entry: successors: %bb.3.if.end, %bb.1.if.then - + %1 = MOVHI target-flags(lanai-hi) @a %2 = OR_I_LO killed %1, target-flags(lanai-lo) @a %3 = LDW_RI killed %2, 0, 0 :: (load 4 from @a, !tbaa !0) @@ -640,38 +640,38 @@ body: | SFSUB_F_RI_LO %0, 0, implicit-def %sr BRCC %bb.3.if.end, 10, implicit %sr BT %bb.1.if.then - + bb.1.if.then: successors: %bb.2.while.body - + ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp CALL @g, csr, implicit-def dead %rca, implicit %sp, implicit-def %sp, implicit-def %rv ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp - + bb.2.while.body: successors: %bb.2.while.body - + BT %bb.2.while.body - + bb.3.if.end: successors: %bb.4.if.then4, %bb.6.if.end7 liveins: %sr - + BRCC %bb.6.if.end7, 14, implicit %sr BT %bb.4.if.then4 - + bb.4.if.then4: successors: %bb.5.while.body6 - + ADJCALLSTACKDOWN 0, 0, implicit-def dead %sp, implicit %sp CALL @g, csr, implicit-def dead %rca, implicit %sp, implicit-def %sp, implicit-def %rv ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp - + bb.5.while.body6: successors: %bb.5.while.body6 - + BT %bb.5.while.body6 - + bb.6.if.end7: RET implicit %rca diff --git a/test/CodeGen/MIR/AArch64/atomic-memoperands.mir b/test/CodeGen/MIR/AArch64/atomic-memoperands.mir index 1c81f580bee53..2dfb61c53d5c8 100644 --- a/test/CodeGen/MIR/AArch64/atomic-memoperands.mir +++ b/test/CodeGen/MIR/AArch64/atomic-memoperands.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass none -o - %s | FileCheck %s --- | @@ -8,17 +9,19 @@ ... --- -# CHECK-LABEL: name: atomic_memoperands -# CHECK: %1(s64) = G_LOAD %0(p0) :: (load unordered 8) -# CHECK: %2(s32) = G_LOAD %0(p0) :: (load monotonic 4) -# CHECK: %3(s16) = G_LOAD %0(p0) :: (load acquire 2) -# CHECK: G_STORE %3(s16), %0(p0) :: (store release 2) -# CHECK: G_STORE %2(s32), %0(p0) :: (store acq_rel 4) -# CHECK: G_STORE %1(s64), %0(p0) :: (store syncscope("singlethread") seq_cst 8) name: atomic_memoperands body: | bb.0: + ; CHECK-LABEL: name: atomic_memoperands + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: (load unordered 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load monotonic 4) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load acquire 2) + ; CHECK: G_STORE [[LOAD2]](s16), [[COPY]](p0) :: (store release 2) + ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p0) :: (store acq_rel 4) + ; CHECK: G_STORE [[LOAD]](s64), [[COPY]](p0) :: (store syncscope("singlethread") seq_cst 8) + ; CHECK: RET_ReallyLR %0:_(p0) = COPY %x0 %1:_(s64) = G_LOAD %0(p0) :: (load unordered 8) %2:_(s32) = G_LOAD %0(p0) :: (load monotonic 4) diff --git a/test/CodeGen/MIR/AArch64/spill-fold.mir b/test/CodeGen/MIR/AArch64/spill-fold.mir index 05e7f7521ed53..f812bc710aaf2 100644 --- a/test/CodeGen/MIR/AArch64/spill-fold.mir +++ b/test/CodeGen/MIR/AArch64/spill-fold.mir @@ -59,7 +59,7 @@ body: | bb.0: %0 = COPY %wzr INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp - ; CHECK: undef %1.sub_32 = LDRWui %stack.0, 0 :: (load 4 from %stack.0) + ; CHECK: undef %1.sub_32:gpr64 = LDRWui %stack.0, 0 :: (load 4 from %stack.0) undef %1.sub_32 = COPY %0 %x0 = COPY %1 RET_ReallyLR implicit %x0 @@ -75,7 +75,7 @@ body: | bb.0: %0 = COPY %wzr INLINEASM $nop, 1, 12, implicit-def dead %x0, 12, implicit-def dead %x1, 12, implicit-def dead %x2, 12, implicit-def dead %x3, 12, implicit-def dead %x4, 12, implicit-def dead %x5, 12, implicit-def dead %x6, 12, implicit-def dead %x7, 12, implicit-def dead %x8, 12, implicit-def dead %x9, 12, implicit-def dead %x10, 12, implicit-def dead %x11, 12, implicit-def dead %x12, 12, implicit-def dead %x13, 12, implicit-def dead %x14, 12, implicit-def dead %x15, 12, implicit-def dead %x16, 12, implicit-def dead %x17, 12, implicit-def dead %x18, 12, implicit-def dead %x19, 12, implicit-def dead %x20, 12, implicit-def dead %x21, 12, implicit-def dead %x22, 12, implicit-def dead %x23, 12, implicit-def dead %x24, 12, implicit-def dead %x25, 12, implicit-def dead %x26, 12, implicit-def dead %x27, 12, implicit-def dead %x28, 12, implicit-def dead %fp, 12, implicit-def dead %lr, 12, implicit-def %sp - ; CHECK: undef %1.ssub = LDRSui %stack.0, 0 :: (load 4 from %stack.0) + ; CHECK: undef %1.ssub:fpr64 = LDRSui %stack.0, 0 :: (load 4 from %stack.0) undef %1.ssub = COPY %0 %d0 = COPY %1 RET_ReallyLR implicit %d0 diff --git a/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir index 06e0c8014b54d..6fc92e7358420 100644 --- a/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir +++ b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir @@ -26,8 +26,8 @@ frameInfo: # CHECK-LABEL: stack_local # CHECK: stack: # CHECK: - { id: 0, name: local_var, type: default, offset: 0, size: 8, alignment: 8, -# CHECK-NEXT: stack-id: 0, callee-saved-register: '', local-offset: -8, di-variable: '', -# CHECK-NEXT: di-expression: '', di-location: '' } +# CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: local-offset: -8, di-variable: '', di-expression: '', di-location: '' } stack: - { id: 0,name: local_var,offset: 0,size: 8,alignment: 8, local-offset: -8 } body: | diff --git a/test/CodeGen/MIR/AArch64/target-memoperands.mir b/test/CodeGen/MIR/AArch64/target-memoperands.mir index c71302d97e2e4..a3442f251359f 100644 --- a/test/CodeGen/MIR/AArch64/target-memoperands.mir +++ b/test/CodeGen/MIR/AArch64/target-memoperands.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass none -o - %s | FileCheck %s --- | @@ -8,15 +9,17 @@ ... --- -# CHECK-LABEL: name: target_memoperands -# CHECK: %1(s64) = G_LOAD %0(p0) :: ("aarch64-suppress-pair" load 8) -# CHECK: %2(s32) = G_LOAD %0(p0) :: ("aarch64-strided-access" load 4) -# CHECK: G_STORE %1(s64), %0(p0) :: ("aarch64-suppress-pair" store 8) -# CHECK: G_STORE %2(s32), %0(p0) :: ("aarch64-strided-access" store 4) name: target_memoperands body: | bb.0: + ; CHECK-LABEL: name: target_memoperands + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY %x0 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p0) :: ("aarch64-suppress-pair" load 8) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: ("aarch64-strided-access" load 4) + ; CHECK: G_STORE [[LOAD]](s64), [[COPY]](p0) :: ("aarch64-suppress-pair" store 8) + ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p0) :: ("aarch64-strided-access" store 4) + ; CHECK: RET_ReallyLR %0:_(p0) = COPY %x0 %1:_(s64) = G_LOAD %0(p0) :: ("aarch64-suppress-pair" load 8) %2:_(s32) = G_LOAD %0(p0) :: ("aarch64-strided-access" load 4) diff --git a/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir b/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir index c0251232fd5c7..cae8ed80d1654 100644 --- a/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir +++ b/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir @@ -111,7 +111,7 @@ # literal constant. # CHECK-LABEL: name: add_f32_1.0_one_f16_use -# CHECK: %13 = V_ADD_F16_e32 1065353216, killed %11, implicit %exec +# CHECK: %13:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit %exec name: add_f32_1.0_one_f16_use alignment: 0 @@ -170,9 +170,9 @@ body: | # operands # CHECK-LABEL: name: add_f32_1.0_multi_f16_use -# CHECK: %13 = V_MOV_B32_e32 1065353216, implicit %exec -# CHECK: %14 = V_ADD_F16_e32 killed %11, %13, implicit %exec -# CHECK: %15 = V_ADD_F16_e32 killed %12, killed %13, implicit %exec +# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1065353216, implicit %exec +# CHECK: %14:vgpr_32 = V_ADD_F16_e32 killed %11, %13, implicit %exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 killed %12, killed %13, implicit %exec name: add_f32_1.0_multi_f16_use @@ -238,8 +238,8 @@ body: | # immediate, and folded into the single f16 use as a literal constant # CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use -# CHECK: %15 = V_ADD_F16_e32 1065353216, %11, implicit %exec -# CHECK: %16 = V_ADD_F32_e32 1065353216, killed %13, implicit %exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit %exec +# CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit %exec name: add_f32_1.0_one_f32_use_one_f16_use alignment: 0 @@ -306,10 +306,10 @@ body: | # constant, and not folded as a multi-use literal for the f16 cases # CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use -# CHECK: %14 = V_MOV_B32_e32 1065353216, implicit %exec -# CHECK: %15 = V_ADD_F16_e32 %11, %14, implicit %exec -# CHECK: %16 = V_ADD_F16_e32 %12, %14, implicit %exec -# CHECK: %17 = V_ADD_F32_e32 1065353216, killed %13, implicit %exec +# CHECK: %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit %exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %11, %14, implicit %exec +# CHECK: %16:vgpr_32 = V_ADD_F16_e32 %12, %14, implicit %exec +# CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit %exec name: add_f32_1.0_one_f32_use_multi_f16_use alignment: 0 @@ -375,9 +375,9 @@ body: | ... --- # CHECK-LABEL: name: add_i32_1_multi_f16_use -# CHECK: %13 = V_MOV_B32_e32 1, implicit %exec -# CHECK: %14 = V_ADD_F16_e32 1, killed %11, implicit %exec -# CHECK: %15 = V_ADD_F16_e32 1, killed %12, implicit %exec +# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1, implicit %exec +# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit %exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit %exec name: add_i32_1_multi_f16_use @@ -440,10 +440,10 @@ body: | --- # CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use -# CHECK: %14 = V_MOV_B32_e32 -2, implicit %exec -# CHECK: %15 = V_ADD_F16_e32 -2, %11, implicit %exec -# CHECK: %16 = V_ADD_F16_e32 -2, %12, implicit %exec -# CHECK: %17 = V_ADD_F32_e32 -2, killed %13, implicit %exec +# CHECK: %14:vgpr_32 = V_MOV_B32_e32 -2, implicit %exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit %exec +# CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit %exec +# CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit %exec name: add_i32_m2_one_f32_use_multi_f16_use alignment: 0 @@ -513,9 +513,9 @@ body: | # constant, and not folded as a multi-use literal for the f16 cases # CHECK-LABEL: name: add_f16_1.0_multi_f32_use -# CHECK: %13 = V_MOV_B32_e32 15360, implicit %exec -# CHECK: %14 = V_ADD_F32_e32 %11, %13, implicit %exec -# CHECK: %15 = V_ADD_F32_e32 %12, %13, implicit %exec +# CHECK: %13:vgpr_32 = V_MOV_B32_e32 15360, implicit %exec +# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit %exec +# CHECK: %15:vgpr_32 = V_ADD_F32_e32 %12, %13, implicit %exec name: add_f16_1.0_multi_f32_use alignment: 0 @@ -580,9 +580,9 @@ body: | # FIXME: Should be able to fold this # CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use -# CHECK: %13 = V_MOV_B32_e32 80886784, implicit %exec -# CHECK: %14 = V_ADD_F16_e32 %11, %13, implicit %exec -# CHECK: %15 = V_ADD_F16_e32 %12, %13, implicit %exec +# CHECK: %13:vgpr_32 = V_MOV_B32_e32 80886784, implicit %exec +# CHECK: %14:vgpr_32 = V_ADD_F16_e32 %11, %13, implicit %exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit %exec name: add_f16_1.0_other_high_bits_multi_f16_use alignment: 0 @@ -647,9 +647,9 @@ body: | # f32 instruction. # CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32 -# CHECK: %13 = V_MOV_B32_e32 305413120, implicit %exec -# CHECK: %14 = V_ADD_F32_e32 %11, %13, implicit %exec -# CHECK: %15 = V_ADD_F16_e32 %12, %13, implicit %exec +# CHECK: %13:vgpr_32 = V_MOV_B32_e32 305413120, implicit %exec +# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit %exec +# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit %exec name: add_f16_1.0_other_high_bits_use_f16_f32 alignment: 0 exposesReturnsTwice: false diff --git a/test/CodeGen/MIR/AMDGPU/fold-multiple.mir b/test/CodeGen/MIR/AMDGPU/fold-multiple.mir index a5da33a997d39..b9b6ee6887b64 100644 --- a/test/CodeGen/MIR/AMDGPU/fold-multiple.mir +++ b/test/CodeGen/MIR/AMDGPU/fold-multiple.mir @@ -14,8 +14,8 @@ # being processed twice. # CHECK-LABEL: name: test -# CHECK: %2 = V_LSHLREV_B32_e32 2, killed %0, implicit %exec -# CHECK: %4 = V_AND_B32_e32 8, killed %2, implicit %exec +# CHECK: %2:vgpr_32 = V_LSHLREV_B32_e32 2, killed %0, implicit %exec +# CHECK: %4:vgpr_32 = V_AND_B32_e32 8, killed %2, implicit %exec name: test tracksRegLiveness: true diff --git a/test/CodeGen/MIR/AMDGPU/intrinsics.mir b/test/CodeGen/MIR/AMDGPU/intrinsics.mir index cb6e6190990b0..52d3135261a64 100644 --- a/test/CodeGen/MIR/AMDGPU/intrinsics.mir +++ b/test/CodeGen/MIR/AMDGPU/intrinsics.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -run-pass none -o - %s | FileCheck %s --- | @@ -9,11 +10,12 @@ ... --- # Completely invalid code, but it checks that intrinsics round-trip properly. -# CHECK: %0(s64) = COPY intrinsic(@llvm.amdgcn.sbfe) name: use_intrin registers: - { id: 0, class: _ } body: | bb.0: + ; CHECK-LABEL: name: use_intrin + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY intrinsic(@llvm.amdgcn.sbfe) %0(s64) = COPY intrinsic(@llvm.amdgcn.sbfe.i32) ... diff --git a/test/CodeGen/MIR/AMDGPU/target-flags.mir b/test/CodeGen/MIR/AMDGPU/target-flags.mir index 7d288dd1b0450..e69a94b59ea72 100644 --- a/test/CodeGen/MIR/AMDGPU/target-flags.mir +++ b/test/CodeGen/MIR/AMDGPU/target-flags.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -run-pass none -o - %s | FileCheck %s --- | define amdgpu_kernel void @flags() { @@ -8,8 +9,6 @@ ... --- -# CHECK: SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead %scc -# CHECK: %1 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo name: flags liveins: @@ -22,6 +21,10 @@ registers: body: | bb.0: liveins: %sgpr0_sgpr1 + ; CHECK-LABEL: name: flags + ; CHECK: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead %scc + ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo + ; CHECK: S_ENDPGM %0 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead %scc %1 = S_MOV_B64 target-flags(amdgpu-gotprel) @foo diff --git a/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir index 312bf004a9cef..71d232b58cf84 100644 --- a/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir +++ b/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir @@ -43,11 +43,11 @@ body: | %0 = LD_f32_avar 0, 4, 1, 2, 32, $test_param_0 %1 = CVT_f64_f32 %0, 0 %2 = LD_i32_avar 0, 4, 1, 0, 32, $test_param_1 - ; CHECK: %3 = FADD_rnf64ri %1, double 3.250000e+00 + ; CHECK: %3:float64regs = FADD_rnf64ri %1, double 3.250000e+00 %3 = FADD_rnf64ri %1, double 3.250000e+00 %4 = CVT_f32_f64 %3, 5 %5 = CVT_f32_s32 %2, 5 - ; CHECK: %6 = FADD_rnf32ri %5, float 6.250000e+00 + ; CHECK: %6:float32regs = FADD_rnf32ri %5, float 6.250000e+00 %6 = FADD_rnf32ri %5, float 6.250000e+00 %7 = FMUL_rnf32rr %6, %4 StoreRetvalF32 %7, 0 @@ -69,11 +69,11 @@ body: | %0 = LD_f32_avar 0, 4, 1, 2, 32, $test2_param_0 %1 = CVT_f64_f32 %0, 0 %2 = LD_i32_avar 0, 4, 1, 0, 32, $test2_param_1 - ; CHECK: %3 = FADD_rnf64ri %1, double 0x7FF8000000000000 + ; CHECK: %3:float64regs = FADD_rnf64ri %1, double 0x7FF8000000000000 %3 = FADD_rnf64ri %1, double 0x7FF8000000000000 %4 = CVT_f32_f64 %3, 5 %5 = CVT_f32_s32 %2, 5 - ; CHECK: %6 = FADD_rnf32ri %5, float 0x7FF8000000000000 + ; CHECK: %6:float32regs = FADD_rnf32ri %5, float 0x7FF8000000000000 %6 = FADD_rnf32ri %5, float 0x7FF8000000000000 %7 = FMUL_rnf32rr %6, %4 StoreRetvalF32 %7, 0 diff --git a/test/CodeGen/MIR/X86/callee-saved-info.mir b/test/CodeGen/MIR/X86/callee-saved-info.mir index 2a62b4e4f48bf..886465148aee5 100644 --- a/test/CodeGen/MIR/X86/callee-saved-info.mir +++ b/test/CodeGen/MIR/X86/callee-saved-info.mir @@ -50,15 +50,16 @@ frameInfo: adjustsStack: true hasCalls: true # CHECK: fixedStack: -# CHECK: callee-saved-register: '%rbx' } +# CHECK: callee-saved-register: '%rbx', callee-saved-restored: true } fixedStack: - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%rbx' } # CHECK: stack: # CHECK-NEXT: - { id: 0 -# CHECK: callee-saved-register: '%edi' +# CHECK: callee-saved-register: '%edi', callee-saved-restored: false stack: - { id: 0, name: b, offset: -20, size: 4, alignment: 4 } - - { id: 1, offset: -24, size: 4, alignment: 4, callee-saved-register: '%edi' } + - { id: 1, offset: -24, size: 4, alignment: 4, callee-saved-register: '%edi', + callee-saved-restored: false } body: | bb.0.entry: successors: %bb.1.check diff --git a/test/CodeGen/MIR/X86/generic-instr-type.mir b/test/CodeGen/MIR/X86/generic-instr-type.mir index 78951de70a3cc..c9835923c441b 100644 --- a/test/CodeGen/MIR/X86/generic-instr-type.mir +++ b/test/CodeGen/MIR/X86/generic-instr-type.mir @@ -37,18 +37,18 @@ registers: body: | bb.0: liveins: %edi, %xmm0 - ; CHECK: %1(s32) = G_ADD %0 + ; CHECK: %1:_(s32) = G_ADD %0 %0(s32) = COPY %edi %6(<2 x s32>) = COPY %xmm0 %7(s64) = COPY %rdi %1(s32) = G_ADD %0, %0 - ; CHECK: %2(<2 x s32>) = G_ADD %6, %6 + ; CHECK: %2:_(<2 x s32>) = G_ADD %6, %6 %2(<2 x s32>) = G_ADD %6, %6 - ; CHECK: %3(s64) = G_ADD %7, %7 + ; CHECK: %3:_(s64) = G_ADD %7, %7 %3(s64) = G_ADD %7, %7 - ; CHECK: %5(s48) = G_ADD %8, %8 + ; CHECK: %5:_(s48) = G_ADD %8, %8 %8(s48) = G_TRUNC %7 %5(s48) = G_ADD %8, %8 ... diff --git a/test/CodeGen/MIR/X86/metadata-operands.mir b/test/CodeGen/MIR/X86/metadata-operands.mir index 758f3031465bc..501d0c58a635e 100644 --- a/test/CodeGen/MIR/X86/metadata-operands.mir +++ b/test/CodeGen/MIR/X86/metadata-operands.mir @@ -50,7 +50,7 @@ stack: body: | bb.0.entry: liveins: %edi - ; CHECK: %0 = COPY %edi + ; CHECK: %0:gr32 = COPY %edi ; CHECK-NEXT: DBG_VALUE _, 0, !11, !DIExpression() %0 = COPY %edi DBG_VALUE _, 0, !12, !DIExpression() diff --git a/test/CodeGen/MIR/X86/roundtrip.mir b/test/CodeGen/MIR/X86/roundtrip.mir index c697f73060416..9679b52f2bac3 100644 --- a/test/CodeGen/MIR/X86/roundtrip.mir +++ b/test/CodeGen/MIR/X86/roundtrip.mir @@ -6,8 +6,8 @@ # CHECK: - { id: 1, class: gr32, preferred-register: '' } # CHECK: body: | # CHECK: bb.0: -# CHECK: %0 = MOV32r0 implicit-def %eflags -# CHECK: dead %1 = COPY %0 +# CHECK: %0:gr32 = MOV32r0 implicit-def %eflags +# CHECK: dead %1:gr32 = COPY %0 # CHECK: MOV32mr undef %rcx, 1, _, 0, _, killed %0 :: (volatile store 4) # CHECK: RETQ undef %eax name: func0 diff --git a/test/CodeGen/MIR/X86/simple-register-allocation-read-undef.mir b/test/CodeGen/MIR/X86/simple-register-allocation-read-undef.mir new file mode 100644 index 0000000000000..ff8fbe297c98b --- /dev/null +++ b/test/CodeGen/MIR/X86/simple-register-allocation-read-undef.mir @@ -0,0 +1,30 @@ +# RUN: llc -mtriple=x86_64-- %s -o - -run-pass=simple-register-coalescing | FileCheck %s +--- +name: f +body: | + bb.0: + JB_1 %bb.2, undef implicit killed %eflags + JMP_1 %bb.1 + + bb.1: + %0 : gr64 = IMPLICIT_DEF + NOOP implicit-def undef %1.sub_32bit : gr64 + NOOP implicit-def %1.sub_16bit : gr64 + JMP_1 %bb.3 + + bb.2: + NOOP implicit-def %0 + %1 = COPY %0 + + bb.3: + NOOP implicit killed %0 + NOOP implicit killed %1 +... + +# We should have a setting of both sub_32bit and sub_16bit. The first one +# should be undef and not dead, and the second should not be undef. + +# CHECK-NOT: dead +# CHECK: NOOP implicit-def undef %1.sub_32bit +# CHECK-NOT: undef +# CHECK-NEXT: NOOP implicit-def %1.sub_16bit diff --git a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir index 86e735e616e50..b292a023d5217 100644 --- a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir +++ b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir @@ -20,7 +20,7 @@ frameInfo: maxAlignment: 4 # CHECK: fixedStack: # CHECK-NEXT: - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4, stack-id: 0, -# CHECK-NEXT: callee-saved-register: '' } +# CHECK-NEXT: callee-saved-register: '', callee-saved-restored: true } fixedStack: - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4 } stack: diff --git a/test/CodeGen/MIR/X86/stack-object-debug-info.mir b/test/CodeGen/MIR/X86/stack-object-debug-info.mir index 5c70582233e52..554d73b909de2 100644 --- a/test/CodeGen/MIR/X86/stack-object-debug-info.mir +++ b/test/CodeGen/MIR/X86/stack-object-debug-info.mir @@ -51,8 +51,8 @@ frameInfo: # CHECK-LABEL: foo # CHECK: stack: # CHECK: - { id: 0, name: y.i, type: default, offset: 0, size: 256, alignment: 16, -# CHECK-NEXT: callee-saved-register: '', di-variable: '!4', di-expression: '!DIExpression()', -# CHECK-NEXT: di-location: '!10' } +# CHECK-NEXT: callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: di-variable: '!4', di-expression: '!DIExpression()', di-location: '!10' } stack: - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4', di-expression: '!DIExpression()', di-location: '!7' } diff --git a/test/CodeGen/MIR/X86/stack-object-operands.mir b/test/CodeGen/MIR/X86/stack-object-operands.mir index 1c5208ee30eaf..262b6dcb3993d 100644 --- a/test/CodeGen/MIR/X86/stack-object-operands.mir +++ b/test/CodeGen/MIR/X86/stack-object-operands.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=x86 -run-pass none -o - %s | FileCheck %s # This test ensures that the MIR parser parses stack object machine operands # correctly. @@ -29,12 +30,14 @@ stack: - { id: 0, name: b, size: 4, alignment: 4 } - { id: 1, size: 4, alignment: 4 } body: | - ; CHECK: bb.0.entry: - ; CHECK-NEXT: %0 = MOV32rm %fixed-stack.0, 1, _, 0, _ - ; CHECK-NEXT: MOV32mr %stack.0.b, 1, _, 0, _, %0 - ; CHECK-NEXT: MOV32mi %stack.1, 1, _, 0, _, 2 - ; CHECK-NEXT: %1 = MOV32rm %stack.0.b, 1, _, 0, _ bb.0.entry: + ; CHECK-LABEL: name: test + ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, _, 0, _ + ; CHECK: MOV32mr %stack.0.b, 1, _, 0, _, [[MOV32rm]] + ; CHECK: MOV32mi %stack.1, 1, _, 0, _, 2 + ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.0.b, 1, _, 0, _ + ; CHECK: %eax = COPY [[MOV32rm1]] + ; CHECK: RETL %eax %0 = MOV32rm %fixed-stack.0, 1, _, 0, _ MOV32mr %stack.0.b, 1, _, 0, _, %0 MOV32mi %stack.1, 1, _, 0, _, 2 diff --git a/test/CodeGen/MIR/X86/stack-objects.mir b/test/CodeGen/MIR/X86/stack-objects.mir index ea3e8410df436..a8492a82fe5e4 100644 --- a/test/CodeGen/MIR/X86/stack-objects.mir +++ b/test/CodeGen/MIR/X86/stack-objects.mir @@ -22,14 +22,14 @@ frameInfo: maxAlignment: 8 # CHECK: stack: # CHECK-NEXT: - { id: 0, name: b, type: default, offset: -12, size: 4, alignment: 4, -# CHECK-NEXT: stack-id: 0, callee-saved-register: '', di-variable: '', di-expression: '', -# CHECK-NEXT: di-location: '' } +# CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } # CHECK-NEXT: - { id: 1, name: x, type: default, offset: -24, size: 8, alignment: 8, -# CHECK-NEXT: stack-id: 0, callee-saved-register: '', di-variable: '', di-expression: '', -# CHECK-NEXT: di-location: '' } +# CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } # CHECK-NEXT: - { id: 2, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, -# CHECK-NEXT: stack-id: 0, callee-saved-register: '', di-variable: '', di-expression: '', -# CHECK-NEXT: di-location: '' } +# CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } stack: - { id: 0, name: b, offset: -12, size: 4, alignment: 4 } - { id: 1, name: x, offset: -24, size: 8, alignment: 8 } diff --git a/test/CodeGen/MIR/X86/subregister-index-operands.mir b/test/CodeGen/MIR/X86/subregister-index-operands.mir index e6c7c6e2e4ce6..e3c5b9d17eecf 100644 --- a/test/CodeGen/MIR/X86/subregister-index-operands.mir +++ b/test/CodeGen/MIR/X86/subregister-index-operands.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=x86-64 -run-pass none -o - %s | FileCheck %s # This test ensures that the MIR parser parses and prints subregisters index # operands correctly. @@ -11,10 +12,6 @@ ... --- -# CHECK-LABEL: name: t -# CHECK: %0 = INSERT_SUBREG %edi, %al, {{[0-9]+}} -# CHECK: %1 = EXTRACT_SUBREG %eax, {{[0-9]+}} -# CHECK: %ax = REG_SEQUENCE %1, {{[0-9]+}}, %1, {{[0-9]+}} name: t tracksRegLiveness: true registers: @@ -23,6 +20,12 @@ registers: body: | bb.0.entry: liveins: %edi, %eax + ; CHECK-LABEL: name: t + ; CHECK: liveins: %edi, %eax + ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:gr32 = INSERT_SUBREG %edi, %al, 1 + ; CHECK: [[EXTRACT_SUBREG:%[0-9]+]]:gr8 = EXTRACT_SUBREG %eax, 2 + ; CHECK: %ax = REG_SEQUENCE [[EXTRACT_SUBREG]], 1, [[EXTRACT_SUBREG]], 2 + ; CHECK: RETQ %ax %0 = INSERT_SUBREG %edi, %al, %subreg.sub_8bit %1 = EXTRACT_SUBREG %eax, %subreg.sub_8bit_hi %ax = REG_SEQUENCE %1, %subreg.sub_8bit, %1, %subreg.sub_8bit_hi diff --git a/test/CodeGen/MIR/X86/subregister-operands.mir b/test/CodeGen/MIR/X86/subregister-operands.mir index 6dd44aec07a98..caf342e26716b 100644 --- a/test/CodeGen/MIR/X86/subregister-operands.mir +++ b/test/CodeGen/MIR/X86/subregister-operands.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=x86-64 -run-pass none -o - %s | FileCheck %s # This test ensures that the MIR parser parses subregisters in register operands # correctly. @@ -20,8 +21,13 @@ registers: body: | bb.0.entry: liveins: %edi - ; CHECK: %0 = COPY %edi - ; CHECK-NEXT: %1 = COPY %0.sub_8bit + ; CHECK-LABEL: name: t + ; CHECK: liveins: %edi + ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY %edi + ; CHECK: [[COPY1:%[0-9]+]]:gr8 = COPY [[COPY]].sub_8bit + ; CHECK: [[AND8ri:%[0-9]+]]:gr8 = AND8ri [[COPY1]], 1, implicit-def %eflags + ; CHECK: %al = COPY [[AND8ri]] + ; CHECK: RETQ %al %0 = COPY %edi %1 = COPY %0.sub_8bit %2 = AND8ri %1, 1, implicit-def %eflags diff --git a/test/CodeGen/MIR/X86/unreachable-mbb-undef-phi.mir b/test/CodeGen/MIR/X86/unreachable-mbb-undef-phi.mir new file mode 100644 index 0000000000000..52867e5744570 --- /dev/null +++ b/test/CodeGen/MIR/X86/unreachable-mbb-undef-phi.mir @@ -0,0 +1,38 @@ +# RUN: llc -march=x86-64 %s -o - -run-pass=processimpdefs -run-pass=unreachable-mbb-elimination | FileCheck %s +--- +name: f +tracksRegLiveness: true +registers: + - { id: 0, class: gr32, preferred-register: '' } + - { id: 1, class: gr32, preferred-register: '' } + - { id: 2, class: gr32, preferred-register: '' } +body: | + bb.0: + %0 = IMPLICIT_DEF + JMP_1 %bb.1 + + bb.1: + %1 = PHI %0, %bb.0, %2, %bb.2 + %2 = ADD32ri8 killed %1, 1, implicit-def %eflags + JMP_1 %bb.3 + + bb.2: + JMP_1 %bb.1 + + bb.3: +... + +# bb2 above is dead and should be removed and the PHI should be replaced with a +# COPY from an undef value since the bb0 value in the PHI is undef. + +# CHECK: bb.0: +# CHECK: successors: %bb.1 +# CHECK: JMP_1 %bb.1 + +# CHECK: bb.1: +# CHECK: successors: %bb.2 +# CHECK: [[TMP1:%[0-9]+]]:gr32 = COPY undef %{{[0-9]+}} +# CHECK: %{{[0-9]+}}:gr32 = ADD32ri8 killed [[TMP1]], 1 +# CHECK: JMP_1 %bb.2 + +# CHECK: bb.2: diff --git a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir index 726ea87fb4402..e3c331a780a1f 100644 --- a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir +++ b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir @@ -25,11 +25,11 @@ frameInfo: adjustsStack: true # CHECK: stack: # CHECK-NEXT: - { id: 0, name: '', type: default, offset: -20, size: 4, alignment: 4, -# CHECK-NEXT: stack-id: 0, callee-saved-register: '', di-variable: '', di-expression: '', -# CHECK-NEXT: di-location: '' } +# CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } # CHECK-NEXT: - { id: 1, name: '', type: default, offset: -32, size: 8, alignment: 8, -# CHECK-NEXT: stack-id: 0, callee-saved-register: '', di-variable: '', di-expression: '', -# CHECK-NEXT: di-location: '' } +# CHECK-NEXT: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: di-variable: '', di-expression: '', di-location: '' } # CHECK-NEXT: - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1, stack: - { id: 0, offset: -20, size: 4, alignment: 4 } diff --git a/test/CodeGen/MIR/X86/virtual-registers.mir b/test/CodeGen/MIR/X86/virtual-registers.mir index 0d181f895aa9b..6e298910dcb62 100644 --- a/test/CodeGen/MIR/X86/virtual-registers.mir +++ b/test/CodeGen/MIR/X86/virtual-registers.mir @@ -44,15 +44,15 @@ body: | bb.0.entry: successors: %bb.2.exit, %bb.1.less liveins: %edi - ; CHECK: %0 = COPY %edi - ; CHECK-NEXT: %1 = SUB32ri8 %0, 10 + ; CHECK: %0:gr32 = COPY %edi + ; CHECK-NEXT: %1:gr32 = SUB32ri8 %0, 10 %0 = COPY %edi %1 = SUB32ri8 %0, 10, implicit-def %eflags JG_1 %bb.2.exit, implicit %eflags JMP_1 %bb.1.less bb.1.less: - ; CHECK: %2 = MOV32r0 + ; CHECK: %2:gr32 = MOV32r0 ; CHECK-NEXT: %eax = COPY %2 %2 = MOV32r0 implicit-def %eflags %eax = COPY %2 @@ -78,15 +78,15 @@ body: | bb.0.entry: successors: %bb.2.exit, %bb.1.less liveins: %edi - ; CHECK: %0 = COPY %edi - ; CHECK-NEXT: %1 = SUB32ri8 %0, 10 + ; CHECK: %0:gr32 = COPY %edi + ; CHECK-NEXT: %1:gr32 = SUB32ri8 %0, 10 %2 = COPY %edi %0 = SUB32ri8 %2, 10, implicit-def %eflags JG_1 %bb.2.exit, implicit %eflags JMP_1 %bb.1.less bb.1.less: - ; CHECK: %2 = MOV32r0 + ; CHECK: %2:gr32 = MOV32r0 ; CHECK-NEXT: %eax = COPY %2 %10 = MOV32r0 implicit-def %eflags %eax = COPY %10 diff --git a/test/CodeGen/Mips/cstmaterialization/constMaterialization.ll b/test/CodeGen/Mips/cstmaterialization/constMaterialization.ll new file mode 100644 index 0000000000000..f34c70efa7a80 --- /dev/null +++ b/test/CodeGen/Mips/cstmaterialization/constMaterialization.ll @@ -0,0 +1,136 @@ +; RUN: llc -march=mips < %s | FileCheck %s -check-prefixes=ALL,MIPS +; RUN: llc -march=mips < %s -mattr=+micromips | FileCheck %s -check-prefixes=ALL,MM + +; Test the patterns used for constant materialization. + +; Constants generated using li16 +define i32 @Li16LowBoundary() { +entry: + ; ALL-LABEL: Li16LowBoundary: + ; MIPS: addiu $2, $zero, -1 + ; MM: li16 $2, -1 + ; ALL-NOT: lui + ; ALL-NOT: ori + ; MIPS-NOT: li16 + ; MM-NOT: addiu + + ret i32 -1 +} + +define i32 @Li16HighBoundary() { +entry: + ; ALL-LABEL: Li16HighBoundary: + ; MIPS: addiu $2, $zero, 126 + ; MM: li16 $2, 126 + ; ALL-NOT: lui + ; ALL-NOT: ori + ; MM-NOT: addiu + ; MIPS-NOT: li16 + + ret i32 126 +} + +; Constants generated using addiu +define i32 @AddiuLowBoundary() { +entry: + ; ALL-LABEL: AddiuLowBoundary: + ; ALL: addiu $2, $zero, -32768 + ; ALL-NOT: lui + ; ALL-NOT: ori + ; ALL-NOT: li16 + + ret i32 -32768 +} + +define i32 @AddiuZero() { +entry: + ; ALL-LABEL: AddiuZero: + ; MIPS: addiu $2, $zero, 0 + ; MM: li16 $2, 0 + ; ALL-NOT: lui + ; ALL-NOT: ori + ; MIPS-NOT: li16 + ; MM-NOT: addiu + + ret i32 0 +} + +define i32 @AddiuHighBoundary() { +entry: + ; ALL-LABEL: AddiuHighBoundary: + ; ALL: addiu $2, $zero, 32767 + ; ALL-NOT: lui + ; ALL-NOT: ori + ; ALL-NOT: li16 + + ret i32 32767 +} + +; Constants generated using ori +define i32 @OriLowBoundary() { +entry: + ; ALL-LABEL: OriLowBoundary: + ; ALL: ori $2, $zero, 32768 + ; ALL-NOT: addiu + ; ALL-NOT: lui + ; ALL-NOT: li16 + + ret i32 32768 +} + +define i32 @OriHighBoundary() { +entry: + ; ALL-LABEL: OriHighBoundary: + ; ALL: ori $2, $zero, 65535 + ; ALL-NOT: addiu + ; ALL-NOT: lui + ; ALL-NOT: li16 + + ret i32 65535 +} + +; Constants generated using lui +define i32 @LuiPositive() { +entry: + ; ALL-LABEL: LuiPositive: + ; ALL: lui $2, 1 + ; ALL-NOT: addiu + ; ALL-NOT: ori + ; ALL-NOT: li16 + + ret i32 65536 +} + +define i32 @LuiNegative() { +entry: + ; ALL-LABEL: LuiNegative: + ; ALL: lui $2, 65535 + ; ALL-NOT: addiu + ; ALL-NOT: ori + ; ALL-NOT: li16 + + ret i32 -65536 +} + +; Constants generated using a combination of lui and ori +define i32 @LuiWithLowBitsSet() { +entry: + ; ALL-LABEL: LuiWithLowBitsSet: + ; ALL: lui $1, 1 + ; ALL: ori $2, $1, 1 + ; ALL-NOT: addiu + ; ALL-NOT: li16 + + ret i32 65537 +} + +define i32 @BelowAddiuLowBoundary() { +entry: + ; ALL-LABEL: BelowAddiuLowBoundary: + ; ALL: lui $1, 65535 + ; ALL: ori $2, $1, 32767 + ; ALL-NOT: addiu + ; ALL-NOT: li16 + + ret i32 -32769 +} diff --git a/test/CodeGen/Mips/dsp-spill-reload.ll b/test/CodeGen/Mips/dsp-spill-reload.ll new file mode 100644 index 0000000000000..871a450171582 --- /dev/null +++ b/test/CodeGen/Mips/dsp-spill-reload.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=mips -mattr=+dsp < %s -asm-show-inst -O0 | FileCheck %s \ +; RUN: --check-prefixes=ASM,ALL +; RUN: llc -march=mips -mattr=+dsp,+micromips < %s -O0 -filetype=obj | \ +; RUN: llvm-objdump -d - | FileCheck %s --check-prefixes=MM-OBJ,ALL + +; Test that spill and reloads use the dsp "variant" instructions. We use -O0 +; to use the simple register allocator. + +; To test the micromips output, we have to take a round trip through the +; object file encoder/decoder as the instruction mapping tables are used to +; support micromips. + +; FIXME: We should be able to get rid of those instructions with the variable +; value registers. + +; ALL-LABEL: spill_reload: + +define <4 x i8> @spill_reload(<4 x i8> %a, <4 x i8> %b, i32 %g) { +entry: + %c = tail call <4 x i8> @llvm.mips.addu.qb(<4 x i8> %a, <4 x i8> %b) + %cond = icmp eq i32 %g, 0 + br i1 %cond, label %true, label %end + +; ASM: SWDSP +; ASM: SWDSP +; ASM: SWDSP + +; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp) +; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp) +; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp) +; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp) + +true: + ret <4 x i8> %c + +; ASM: LWDSP + +; MM-OBJ: lw ${{[0-9]+}}, {{[0-9]+}}($sp) + +end: + %d = tail call <4 x i8> @llvm.mips.addu.qb(<4 x i8> %c, <4 x i8> %a) + ret <4 x i8> %d + +; ASM: LWDSP +; ASM: LWDSP + +; MM-OBJ: lw ${{[0-9]+}}, {{[0-9]+}}($sp) +; MM-OBJ: lw ${{[0-9]+}}, {{[0-9]+}}($sp) + +} + +declare <4 x i8> @llvm.mips.addu.qb(<4 x i8>, <4 x i8>) nounwind diff --git a/test/CodeGen/Mips/llvm-ir/and.ll b/test/CodeGen/Mips/llvm-ir/and.ll index c26b60d0ff9a5..18d7a439f62ae 100644 --- a/test/CodeGen/Mips/llvm-ir/and.ll +++ b/test/CodeGen/Mips/llvm-ir/and.ll @@ -37,10 +37,12 @@ entry: ; GP32: and $2, $4, $5 - ; GP64: and $2, $4, $5 + ; GP64: and $1, $4, $5 + + ; MM32: and16 $[[T0:[0-9]+]], $5 + ; MM32: move $2, $[[T0]] - ; MM: and16 $[[T0:[0-9]+]], $5 - ; MM: move $2, $[[T0]] + ; MM64: and $1, $4, $5 %r = and i1 %a, %b ret i1 %r @@ -52,10 +54,12 @@ entry: ; GP32: and $2, $4, $5 - ; GP64: and $2, $4, $5 + ; GP64: and $1, $4, $5 - ; MM: and16 $[[T0:[0-9]+]], $5 - ; MM: move $2, $[[T0]] + ; MM32: and16 $[[T0:[0-9]+]], $5 + ; MM32: move $2, $[[T0]] + + ; MM64: and $1, $4, $5 %r = and i8 %a, %b ret i8 %r @@ -67,10 +71,12 @@ entry: ; GP32: and $2, $4, $5 - ; GP64: and $2, $4, $5 + ; GP64: and $1, $4, $5 + + ; MM32: and16 $[[T0:[0-9]+]], $5 + ; MM32 move $2, $[[T0]] - ; MM: and16 $[[T0:[0-9]+]], $5 - ; MM: move $2, $[[T0]] + ; MM64: and $1, $4, $5 %r = and i16 %a, %b ret i16 %r diff --git a/test/CodeGen/Mips/llvm-ir/not.ll b/test/CodeGen/Mips/llvm-ir/not.ll index 914b6164ad00a..ab7a3c4613a26 100644 --- a/test/CodeGen/Mips/llvm-ir/not.ll +++ b/test/CodeGen/Mips/llvm-ir/not.ll @@ -135,7 +135,10 @@ define signext i1 @nor_i1(i1 signext %a, i1 signext %b) { entry: ; ALL-LABEL: nor_i1: - ; ALL: nor $2, $5, $4 + ; GP32: nor $2, $5, $4 + ; GP64: or $1, $5, $4 + ; MM32: nor $2, $5, $4 + ; MM64: or $1, $5, $4 %or = or i1 %b, %a %r = xor i1 %or, -1 @@ -146,7 +149,10 @@ define signext i8 @nor_i8(i8 signext %a, i8 signext %b) { entry: ; ALL-LABEL: nor_i8: - ; ALL: nor $2, $5, $4 + ; GP32: nor $2, $5, $4 + ; GP64: or $1, $5, $4 + ; MM32: nor $2, $5, $4 + ; MM64: or $1, $5, $4 %or = or i8 %b, %a %r = xor i8 %or, -1 @@ -157,7 +163,10 @@ define signext i16 @nor_i16(i16 signext %a, i16 signext %b) { entry: ; ALL-LABEL: nor_i16: - ; ALL: nor $2, $5, $4 + ; GP32: nor $2, $5, $4 + ; GP64: or $1, $5, $4 + ; MM32: nor $2, $5, $4 + ; MM64: or $1, $5, $4 %or = or i16 %b, %a %r = xor i16 %or, -1 diff --git a/test/CodeGen/Mips/llvm-ir/or.ll b/test/CodeGen/Mips/llvm-ir/or.ll index c7f89ef5d2262..609cf0210c38b 100644 --- a/test/CodeGen/Mips/llvm-ir/or.ll +++ b/test/CodeGen/Mips/llvm-ir/or.ll @@ -24,10 +24,12 @@ entry: ; GP32: or $2, $4, $5 - ; GP64: or $2, $4, $5 + ; GP64: or $1, $4, $5 + + ; MM32: or16 $[[T0:[0-9]+]], $5 + ; MM32 move $2, $[[T0]] - ; MM: or16 $[[T0:[0-9]+]], $5 - ; MM: move $2, $[[T0]] + ; MM64: or $1, $4, $5 %r = or i1 %a, %b ret i1 %r @@ -39,10 +41,12 @@ entry: ; GP32: or $2, $4, $5 - ; GP64: or $2, $4, $5 + ; GP64: or $1, $4, $5 - ; MM: or16 $[[T0:[0-9]+]], $5 - ; MM: move $2, $[[T0]] + ; MM32: or16 $[[T0:[0-9]+]], $5 + ; MM32 move $2, $[[T0]] + + ; MM64: or $1, $4, $5 %r = or i8 %a, %b ret i8 %r @@ -54,10 +58,12 @@ entry: ; GP32: or $2, $4, $5 - ; GP64: or $2, $4, $5 + ; GP64: or $1, $4, $5 + + ; MM32: or16 $[[T0:[0-9]+]], $5 + ; MM32 move $2, $[[T0]] - ; MM: or16 $[[T0:[0-9]+]], $5 - ; MM: move $2, $[[T0]] + ; MM64: or $1, $4, $5 %r = or i16 %a, %b ret i16 %r diff --git a/test/CodeGen/Mips/llvm-ir/xor.ll b/test/CodeGen/Mips/llvm-ir/xor.ll index 1d45e200a2edd..068d390839de9 100644 --- a/test/CodeGen/Mips/llvm-ir/xor.ll +++ b/test/CodeGen/Mips/llvm-ir/xor.ll @@ -35,10 +35,12 @@ entry: ; GP32: xor $2, $4, $5 - ; GP64: xor $2, $4, $5 + ; GP64: xor $1, $4, $5 + + ; MM32: xor16 $[[T0:[0-9]+]], $5 + ; MM32: move $2, $[[T0]] - ; MM: xor16 $[[T0:[0-9]+]], $5 - ; MM: move $2, $[[T0]] + ; MM64: xor $1, $4, $5 %r = xor i1 %a, %b ret i1 %r @@ -50,10 +52,12 @@ entry: ; GP32: xor $2, $4, $5 - ; GP64: xor $2, $4, $5 + ; GP64: xor $1, $4, $5 + + ; MM32: xor16 $[[T0:[0-9]+]], $5 + ; MM32: move $2, $[[T0]] - ; MM: xor16 $[[T0:[0-9]+]], $5 - ; MM: move $2, $[[T0]] + ; MM64: xor $1, $4, $5 %r = xor i8 %a, %b ret i8 %r @@ -65,10 +69,12 @@ entry: ; GP32: xor $2, $4, $5 - ; GP64: xor $2, $4, $5 + ; GP64: xor $1, $4, $5 + + ; MM32: xor16 $[[T0:[0-9]+]], $5 + ; MM32: move $2, $[[T0]] - ; MM: xor16 $[[T0:[0-9]+]], $5 - ; MM: move $2, $[[T0]] + ; MM64: xor $1, $4, $5 %r = xor i16 %a, %b ret i16 %r diff --git a/test/CodeGen/Mips/mirparser/target-flags-pic-mxgot-tls.mir b/test/CodeGen/Mips/mirparser/target-flags-pic-mxgot-tls.mir new file mode 100644 index 0000000000000..05923377ec6f9 --- /dev/null +++ b/test/CodeGen/Mips/mirparser/target-flags-pic-mxgot-tls.mir @@ -0,0 +1,275 @@ +# RUN: llc -march=mips64 -target-abi n64 -start-before=expand-isel-pseudos \ +# RUN: -stop-after=expand-isel-pseudos -relocation-model=pic -mxgot \ +# RUN: -o /dev/null %s + +# A simple test to show that we can parse the target specific flags: gpoff-hi, +# gpoff-lo, tlsgd, tlsldm, dtprel-hi, dtprel-lo, got-hi, got-lo, call-hi, +# call-lo. + +--- | + @v = global i32 0, align 4 + @k = thread_local global i32 0, align 4 + @j = external thread_local global i32, align 4 + @__tls_guard = internal thread_local global i1 false, align 1 + declare extern_weak void @_ZTH1j() + + declare i32 @_Z1gi(i32 signext) + + define i32 @_Z2k1i(i32 signext %asd) { + entry: + %call = tail call i32 @_Z1gi(i32 signext %asd) + %add = add nsw i32 %call, %asd + %0 = load i32, i32* @v, align 4 + %add1 = add nsw i32 %add, %0 + %.b.i.i = load i1, i1* @__tls_guard, align 1 + br i1 %.b.i.i, label %entry._ZTW1k.exit_crit_edge, label %init.i.i + + entry._ZTW1k.exit_crit_edge: + %.pre = load i32, i32* @k, align 4 + br label %_ZTW1k.exit + + init.i.i: + store i1 true, i1* @__tls_guard, align 1 + %call.i.i.i = tail call i32 @_Z1gi(i32 signext 3) + store i32 %call.i.i.i, i32* @k, align 4 + br label %_ZTW1k.exit + + _ZTW1k.exit: + %1 = phi i32 [ %.pre, %entry._ZTW1k.exit_crit_edge ], [ %call.i.i.i, %init.i.i ] + %add2 = add nsw i32 %add1, %1 + br i1 icmp ne (void ()* @_ZTH1j, void ()* null), label %2, label %_ZTW1j.exit + + ;