diff --git a/debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch b/debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch deleted file mode 100644 index 17b2d416..00000000 --- a/debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch +++ /dev/null @@ -1,72 +0,0 @@ -From 6e7b660ee185445640110c80d80aafd436682fca Mon Sep 17 00:00:00 2001 -From: Yichao Yu -Date: Fri, 9 Dec 2016 15:59:46 -0500 -Subject: [PATCH] Fix unwind info relocation with large code model on AArch64 - ---- - lib/MC/MCObjectFileInfo.cpp | 2 ++ - .../AArch64/ELF_ARM64_BE-large-relocations.s | 18 ++++++++++++++++++ - .../RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s | 18 ++++++++++++++++++ - 3 files changed, 38 insertions(+) - create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s - create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s - -Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp -+++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp -@@ -328,6 +328,8 @@ void MCObjectFileInfo::initELFMCObjectFi - dwarf::DW_EH_PE_sdata4 - : dwarf::DW_EH_PE_absptr; - break; -+ case Triple::aarch64: -+ case Triple::aarch64_be: - case Triple::x86_64: - if (PositionIndependent) { - PersonalityEncoding = -Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s -=================================================================== ---- /dev/null -+++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s -@@ -0,0 +1,18 @@ -+# RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -code-model=large -filetype=obj -o %T/be-large-reloc.o %s -+# RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o -+ -+ .text -+ .globl g -+ .p2align 2 -+ .type g,@function -+g: -+ .cfi_startproc -+ mov x0, xzr -+ ret -+ .Lfunc_end0: -+ .size g, .Lfunc_end0-g -+ .cfi_endproc -+ -+# Skip the CIE and load the 8 bytes PC begin pointer. -+# Assuming the CIE and the FDE length are both 4 bytes. -+# rtdyld-check: *{8}(section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc) -Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s -=================================================================== ---- /dev/null -+++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s -@@ -0,0 +1,18 @@ -+# RUN: llvm-mc -triple=arm64-none-linux-gnu -code-model=large -filetype=obj -o %T/large-reloc.o %s -+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o -+ -+ .text -+ .globl g -+ .p2align 2 -+ .type g,@function -+g: -+ .cfi_startproc -+ mov x0, xzr -+ ret -+ .Lfunc_end0: -+ .size g, .Lfunc_end0-g -+ .cfi_endproc -+ -+# Skip the CIE and load the 8 bytes PC begin pointer. -+# Assuming the CIE and the FDE length are both 4 bytes. -+# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) diff --git a/debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch b/debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch deleted file mode 100644 index 1b815a2b..00000000 --- a/debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch +++ /dev/null @@ -1,24 +0,0 @@ -From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001 -From: Valentin Churavy -Date: Thu, 5 Jul 2018 12:37:50 -0400 -Subject: [PATCH] Fix unwind info relocation with large code model on AArch64 - ---- - lib/MC/MCObjectFileInfo.cpp | 2 ++ - .../AArch64/ELF_ARM64_large-relocations.s | 20 +++++++++++++++++++ - 2 files changed, 22 insertions(+) - create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s - -Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp -+++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp -@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFi - break; - case Triple::ppc64: - case Triple::ppc64le: -+ case Triple::aarch64: -+ case Triple::aarch64_be: - case Triple::x86_64: - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | - (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); diff --git a/debian/patches/julia/llvm-D34078-vectorize-fdiv.patch b/debian/patches/julia/llvm-D34078-vectorize-fdiv.patch deleted file mode 100644 index cd33b4a8..00000000 --- a/debian/patches/julia/llvm-D34078-vectorize-fdiv.patch +++ /dev/null @@ -1,53 +0,0 @@ -From f94d12b6108b944199b715f31f25a022f75d2feb Mon Sep 17 00:00:00 2001 -From: Yichao Yu -Date: Sat, 10 Jun 2017 08:45:13 -0400 -Subject: [PATCH 4/4] Enable support for floating-point division reductions - -Similar to fsub, fdiv can also be vectorized using fmul. ---- - lib/Transforms/Utils/LoopUtils.cpp | 1 + - test/Transforms/LoopVectorize/float-reduction.ll | 22 ++++++++++++++++++++++ - 2 files changed, 23 insertions(+) - -Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Utils/LoopUtils.cpp -+++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp -@@ -513,6 +513,7 @@ RecurrenceDescriptor::isRecurrenceInstr( - return InstDesc(Kind == RK_IntegerOr, I); - case Instruction::Xor: - return InstDesc(Kind == RK_IntegerXor, I); -+ case Instruction::FDiv: - case Instruction::FMul: - return InstDesc(Kind == RK_FloatMult, I, UAI); - case Instruction::FSub: -Index: llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/LoopVectorize/float-reduction.ll -+++ llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll -@@ -44,3 +44,25 @@ for.body: - for.end: ; preds = %for.body - ret float %sub - } -+ -+;CHECK-LABEL: @foodiv( -+;CHECK: fdiv fast <4 x float> -+;CHECK: ret -+define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp { -+entry: -+ br label %for.body -+ -+for.body: ; preds = %for.body, %entry -+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] -+ %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ] -+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv -+ %0 = load float, float* %arrayidx, align 4 -+ %sub = fdiv fast float %sum.04, %0 -+ %indvars.iv.next = add i64 %indvars.iv, 1 -+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32 -+ %exitcond = icmp eq i32 %lftr.wideiv, 200 -+ br i1 %exitcond, label %for.end, label %for.body -+ -+for.end: ; preds = %for.body -+ ret float %sub -+} diff --git a/debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch b/debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch deleted file mode 100644 index 93643ff8..00000000 --- a/debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch +++ /dev/null @@ -1,82 +0,0 @@ -commit 6a311a7a804831fea43cfb2f61322adcb407a1af -Author: Keno Fischer -Date: Thu Jan 18 15:57:05 2018 -0500 - - [JumpThreading] Don't restrict cast-traversal to i1 - - Summary: - In D17663, JumpThreading learned to look trough simple cast instructions, - but only if the source of those cast instructions was a phi/cmp i1 - (in an effort to limit compile time effects). I think this condition - is too restrictive. For switches with limited value range, InstCombine - will readily introduce an extra `trunc` instruction to a smaller - integer type (e.g. from i8 to i2), leaving us in the somewhat perverse - situation that jump-threading would work before running instcombine, - but not after. Since instcombine produces this pattern, I think we - need to consider it canonical and support it in JumpThreading. - In general, for limiting recursion, I think the existing restriction - to phi and cmp nodes should be sufficient to avoid looking through - unprofitable chains of instructions. - - Reviewers: haicheng, gberry, bmakam, mcrosier - - Subscribers: llvm-commits - - Differential Revision: https://reviews.llvm.org/D42262 - -Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Scalar/JumpThreading.cpp -+++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp -@@ -656,11 +656,9 @@ bool JumpThreadingPass::ComputeValueKnow - } - - // Handle Cast instructions. Only see through Cast when the source operand is -- // PHI or Cmp and the source type is i1 to save the compilation time. -+ // PHI or Cmp to save the compilation time. - if (CastInst *CI = dyn_cast(I)) { - Value *Source = CI->getOperand(0); -- if (!Source->getType()->isIntegerTy(1)) -- return false; - if (!isa(Source) && !isa(Source)) - return false; - ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI); -Index: llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/JumpThreading/basic.ll -+++ llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll -@@ -547,6 +547,34 @@ l5: - ; CHECK: } - } - -+define i1 @trunc_switch(i1 %arg) { -+; CHECK-LABEL: @trunc_switch -+top: -+; CHECK: br i1 %arg, label %exitA, label %exitB -+ br i1 %arg, label %common, label %B -+ -+B: -+ br label %common -+ -+common: -+ %phi = phi i8 [ 2, %B ], [ 1, %top ] -+ %trunc = trunc i8 %phi to i2 -+; CHECK-NOT: switch -+ switch i2 %trunc, label %unreach [ -+ i2 1, label %exitA -+ i2 -2, label %exitB -+ ] -+ -+unreach: -+ unreachable -+ -+exitA: -+ ret i1 true -+ -+exitB: -+ ret i1 false -+} -+ - ; CHECK-LABEL: define void @h_con(i32 %p) { - define void @h_con(i32 %p) { - %x = icmp ult i32 %p, 5 diff --git a/debian/patches/julia/llvm-D44892-Perf-integration.patch b/debian/patches/julia/llvm-D44892-Perf-integration.patch deleted file mode 100644 index e849bcd3..00000000 --- a/debian/patches/julia/llvm-D44892-Perf-integration.patch +++ /dev/null @@ -1,677 +0,0 @@ -From 45bc0f0badbdbabaed7d204757c2aad7ab49a3fe Mon Sep 17 00:00:00 2001 -From: DokFaust -Date: Mon, 11 Jun 2018 12:59:42 +0200 -Subject: [PATCH] PerfJITEventListener integration, requires compile flag - LLVM_USE_PERF - ---- - CMakeLists.txt | 13 + - include/llvm/Config/config.h.cmake | 3 + - include/llvm/Config/llvm-config.h.cmake | 3 + - .../llvm/ExecutionEngine/JITEventListener.h | 9 + - lib/ExecutionEngine/CMakeLists.txt | 4 + - lib/ExecutionEngine/LLVMBuild.txt | 2 +- - lib/ExecutionEngine/Orc/LLVMBuild.txt | 2 +- - .../PerfJITEvents/CMakeLists.txt | 5 + - .../PerfJITEvents/LLVMBuild.txt | 23 + - .../PerfJITEvents/PerfJITEventListener.cpp | 492 ++++++++++++++++++ - 10 files changed, 554 insertions(+), 2 deletions(-) - create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt - create mode 100644 lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt - create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index f8da6cf9211..fb92c825a46 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -426,6 +426,16 @@ if( LLVM_USE_OPROFILE ) - endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) - endif( LLVM_USE_OPROFILE ) - -+option(LLVM_USE_PERF -+ "Use perf JIT interface to inform perf about JIT code" OFF) -+ -+# If enabled, verify we are on a platform that supports perf. -+if( LLVM_USE_PERF ) -+ if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) -+ message(FATAL_ERROR "perf support is available on Linux only.") -+ endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) -+endif( LLVM_USE_PERF ) -+ - set(LLVM_USE_SANITIZER "" CACHE STRING - "Define the sanitizer used to build binaries and tests.") - set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH -@@ -634,6 +644,9 @@ endif (LLVM_USE_INTEL_JITEVENTS) - if (LLVM_USE_OPROFILE) - set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT) - endif (LLVM_USE_OPROFILE) -+if (LLVM_USE_PERF) -+ set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents) -+endif (LLVM_USE_PERF) - - message(STATUS "Constructing LLVMBuild project information") - execute_process( -diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake -index 940f8420304..17787ed779b 100644 ---- a/include/llvm/Config/config.h.cmake -+++ b/include/llvm/Config/config.h.cmake -@@ -377,6 +377,9 @@ - /* Define if we have the oprofile JIT-support library */ - #cmakedefine01 LLVM_USE_OPROFILE - -+/* Define if we have the perf JIT-support library */ -+#cmakedefine01 LLVM_USE_PERF -+ - /* LLVM version information */ - #cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}" - -diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake -index 4daa00f3bc4..8d9c3b24d52 100644 ---- a/include/llvm/Config/llvm-config.h.cmake -+++ b/include/llvm/Config/llvm-config.h.cmake -@@ -65,6 +65,9 @@ - /* Define if we have the oprofile JIT-support library */ - #cmakedefine01 LLVM_USE_OPROFILE - -+/* Define if we have the perf JIT-support library */ -+#cmakedefine01 LLVM_USE_PERF -+ - /* Major version of the LLVM API */ - #define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR} - -diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h -index ff7840f00a4..1cc2c423a8b 100644 ---- a/include/llvm/ExecutionEngine/JITEventListener.h -+++ b/include/llvm/ExecutionEngine/JITEventListener.h -@@ -115,6 +115,15 @@ public: - } - #endif // USE_OPROFILE - -+#ifdef LLVM_USE_PERF -+ static JITEventListener *createPerfJITEventListener(); -+#else -+ static JITEventListener *createPerfJITEventListener() -+ { -+ return nullptr; -+ } -+#endif //USE_PERF -+ - private: - virtual void anchor(); - }; -diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt -index 84b34919e44..893d113a685 100644 ---- a/lib/ExecutionEngine/CMakeLists.txt -+++ b/lib/ExecutionEngine/CMakeLists.txt -@@ -30,3 +30,7 @@ endif( LLVM_USE_OPROFILE ) - if( LLVM_USE_INTEL_JITEVENTS ) - add_subdirectory(IntelJITEvents) - endif( LLVM_USE_INTEL_JITEVENTS ) -+ -+if( LLVM_USE_PERF ) -+ add_subdirectory(PerfJITEvents) -+endif( LLVM_USE_PERF ) -diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt -index 9d29a41f504..b6e1bda6a51 100644 ---- a/lib/ExecutionEngine/LLVMBuild.txt -+++ b/lib/ExecutionEngine/LLVMBuild.txt -@@ -16,7 +16,7 @@ - ;===------------------------------------------------------------------------===; - - [common] --subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc -+subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents - - [component_0] - type = Library -diff --git a/lib/ExecutionEngine/Orc/LLVMBuild.txt b/lib/ExecutionEngine/Orc/LLVMBuild.txt -index 8f05172e77a..ef4ae64e823 100644 ---- a/lib/ExecutionEngine/Orc/LLVMBuild.txt -+++ b/lib/ExecutionEngine/Orc/LLVMBuild.txt -@@ -19,4 +19,4 @@ - type = Library - name = OrcJIT - parent = ExecutionEngine --required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils -+required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils -diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt -new file mode 100644 -index 00000000000..136cc429d02 ---- /dev/null -+++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt -@@ -0,0 +1,5 @@ -+add_llvm_library(LLVMPerfJITEvents -+ PerfJITEventListener.cpp -+ ) -+ -+add_dependencies(LLVMPerfJITEvents LLVMCodeGen) -diff --git a/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt -new file mode 100644 -index 00000000000..b1958a69260 ---- /dev/null -+++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt -@@ -0,0 +1,23 @@ -+;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===; -+; -+; The LLVM Compiler Infrastructure -+; -+; This file is distributed under the University of Illinois Open Source -+; License. See LICENSE.TXT for details. -+; -+;===------------------------------------------------------------------------===; -+; -+; This is an LLVMBuild description file for the components in this subdirectory. -+; -+; For more information on the LLVMBuild system, please see: -+; -+; http://llvm.org/docs/LLVMBuild.html -+; -+;===------------------------------------------------------------------------===; -+ -+[component_0] -+type = OptionalLibrary -+name = PerfJITEvents -+parent = ExecutionEngine -+required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support TransformUtils -+ -diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp -new file mode 100644 -index 00000000000..c2b97dd59f3 ---- /dev/null -+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp -@@ -0,0 +1,492 @@ -+//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This file defines a JITEventListener object that tells perf about JITted -+// functions, including source line information. -+// -+// Documentation for perf jit integration is available at: -+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt -+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt -+// -+//===----------------------------------------------------------------------===// -+ -+#include "llvm/ADT/Twine.h" -+#include "llvm/Config/config.h" -+#include "llvm/DebugInfo/DWARF/DWARFContext.h" -+#include "llvm/ExecutionEngine/JITEventListener.h" -+#include "llvm/Object/ObjectFile.h" -+#include "llvm/Object/SymbolSize.h" -+#include "llvm/Support/Debug.h" -+#include "llvm/Support/Errno.h" -+#include "llvm/Support/FileSystem.h" -+#include "llvm/Support/MemoryBuffer.h" -+#include "llvm/Support/Mutex.h" -+#include "llvm/Support/MutexGuard.h" -+#include "llvm/Support/Path.h" -+#include "llvm/Support/Process.h" -+#include "llvm/Support/Threading.h" -+#include "llvm/Support/raw_ostream.h" -+ -+#include // mmap() -+#include // getpid() -+#include // clock_gettime(), time(), localtime_r() */ -+#include // for getpid(), read(), close() -+ -+using namespace llvm; -+using namespace llvm::object; -+typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; -+ -+namespace { -+ -+// language identifier (XXX: should we generate something better from debug -+// info?) -+#define JIT_LANG "llvm-IR" -+#define LLVM_PERF_JIT_MAGIC \ -+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ -+ (uint32_t)'D') -+#define LLVM_PERF_JIT_VERSION 1 -+ -+// bit 0: set if the jitdump file is using an architecture-specific timestamp -+// clock source -+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0) -+ -+struct LLVMPerfJitHeader; -+ -+class PerfJITEventListener : public JITEventListener { -+public: -+ PerfJITEventListener(); -+ ~PerfJITEventListener() { -+ if (MarkerAddr) -+ CloseMarker(); -+ } -+ -+ void NotifyObjectEmitted(const ObjectFile &Obj, -+ const RuntimeDyld::LoadedObjectInfo &L) override; -+ void NotifyFreeingObject(const ObjectFile &Obj) override; -+ -+private: -+ bool InitDebuggingDir(); -+ bool OpenMarker(); -+ void CloseMarker(); -+ static bool FillMachine(LLVMPerfJitHeader &hdr); -+ -+ void NotifyCode(Expected &Symbol, uint64_t CodeAddr, -+ uint64_t CodeSize); -+ void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines); -+ -+ // cache lookups -+ pid_t Pid; -+ -+ // base directory for output data -+ std::string JitPath; -+ -+ // output data stream, closed via Dumpstream -+ int DumpFd = -1; -+ -+ // output data stream -+ std::unique_ptr Dumpstream; -+ -+ // prevent concurrent dumps from messing up the output file -+ sys::Mutex Mutex; -+ -+ // perf mmap marker -+ void *MarkerAddr = NULL; -+ -+ // perf support ready -+ bool SuccessfullyInitialized = false; -+ -+ // identifier for functions, primarily to identify when moving them around -+ uint64_t CodeGeneration = 1; -+}; -+ -+// The following are POD struct definitions from the perf jit specification -+ -+enum LLVMPerfJitRecordType { -+ JIT_CODE_LOAD = 0, -+ JIT_CODE_MOVE = 1, // not emitted, code isn't moved -+ JIT_CODE_DEBUG_INFO = 2, -+ JIT_CODE_CLOSE = 3, // not emitted, unnecessary -+ JIT_CODE_UNWINDING_INFO = 4, // not emitted -+ -+ JIT_CODE_MAX -+}; -+ -+struct LLVMPerfJitHeader { -+ uint32_t Magic; // characters "JiTD" -+ uint32_t Version; // header version -+ uint32_t TotalSize; // total size of header -+ uint32_t ElfMach; // elf mach target -+ uint32_t Pad1; // reserved -+ uint32_t Pid; -+ uint64_t Timestamp; // timestamp -+ uint64_t Flags; // flags -+}; -+ -+// record prefix (mandatory in each record) -+struct LLVMPerfJitRecordPrefix { -+ uint32_t Id; // record type identifier -+ uint32_t TotalSize; -+ uint64_t Timestamp; -+}; -+ -+struct LLVMPerfJitRecordCodeLoad { -+ LLVMPerfJitRecordPrefix Prefix; -+ -+ uint32_t Pid; -+ uint32_t Tid; -+ uint64_t Vma; -+ uint64_t CodeAddr; -+ uint64_t CodeSize; -+ uint64_t CodeIndex; -+}; -+ -+struct LLVMPerfJitDebugEntry { -+ uint64_t Addr; -+ int Lineno; // source line number starting at 1 -+ int Discrim; // column discriminator, 0 is default -+ // followed by null terminated filename, \xff\0 if same as previous entry -+}; -+ -+struct LLVMPerfJitRecordDebugInfo { -+ LLVMPerfJitRecordPrefix Prefix; -+ -+ uint64_t CodeAddr; -+ uint64_t NrEntry; -+ // followed by NrEntry LLVMPerfJitDebugEntry records -+}; -+ -+static inline uint64_t timespec_to_ns(const struct timespec *ts) { -+ const uint64_t NanoSecPerSec = 1000000000; -+ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; -+} -+ -+static inline uint64_t perf_get_timestamp(void) { -+ struct timespec ts; -+ int ret; -+ -+ ret = clock_gettime(CLOCK_MONOTONIC, &ts); -+ if (ret) -+ return 0; -+ -+ return timespec_to_ns(&ts); -+} -+ -+PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) { -+ // check if clock-source is supported -+ if (!perf_get_timestamp()) { -+ errs() << "kernel does not support CLOCK_MONOTONIC\n"; -+ return; -+ } -+ -+ if (!InitDebuggingDir()) { -+ errs() << "could not initialize debugging directory\n"; -+ return; -+ } -+ -+ std::string Filename; -+ raw_string_ostream FilenameBuf(Filename); -+ FilenameBuf << JitPath << "/jit-" << Pid << ".dump"; -+ -+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and -+ // raw_fd_ostream doesn't expose the FD. -+ using sys::fs::openFileForWrite; -+ if (auto EC = -+ openFileForWrite(FilenameBuf.str(), DumpFd, sys::fs::F_RW, 0666)) { -+ errs() << "could not open JIT dump file " << FilenameBuf.str() << ": " -+ << EC.message() << "\n"; -+ return; -+ } -+ -+ Dumpstream = make_unique(DumpFd, true); -+ -+ LLVMPerfJitHeader Header = {0}; -+ if (!FillMachine(Header)) -+ return; -+ -+ // signal this process emits JIT information -+ if (!OpenMarker()) -+ return; -+ -+ // emit dumpstream header -+ Header.Magic = LLVM_PERF_JIT_MAGIC; -+ Header.Version = LLVM_PERF_JIT_VERSION; -+ Header.TotalSize = sizeof(Header); -+ Header.Pid = Pid; -+ Header.Timestamp = perf_get_timestamp(); -+ Dumpstream->write(reinterpret_cast(&Header), sizeof(Header)); -+ -+ // Everything initialized, can do profiling now. -+ if (!Dumpstream->has_error()) -+ SuccessfullyInitialized = true; -+} -+ -+void PerfJITEventListener::NotifyObjectEmitted( -+ const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) { -+ -+ if (!SuccessfullyInitialized) -+ return; -+ -+ OwningBinary DebugObjOwner = L.getObjectForDebug(Obj); -+ const ObjectFile &DebugObj = *DebugObjOwner.getBinary(); -+ -+ // Get the address of the object image for use as a unique identifier -+ std::unique_ptr Context = DWARFContext::create(DebugObj); -+ -+ // Use symbol info to iterate over functions in the object. -+ for (const std::pair &P : computeSymbolSizes(DebugObj)) { -+ SymbolRef Sym = P.first; -+ std::string SourceFileName; -+ -+ Expected SymTypeOrErr = Sym.getType(); -+ if (!SymTypeOrErr) { -+ // There's not much we can with errors here -+ consumeError(SymTypeOrErr.takeError()); -+ continue; -+ } -+ SymbolRef::Type SymType = *SymTypeOrErr; -+ if (SymType != SymbolRef::ST_Function) -+ continue; -+ -+ Expected Name = Sym.getName(); -+ if (!Name) { -+ consumeError(Name.takeError()); -+ continue; -+ } -+ -+ Expected AddrOrErr = Sym.getAddress(); -+ if (!AddrOrErr) { -+ consumeError(AddrOrErr.takeError()); -+ continue; -+ } -+ uint64_t Addr = *AddrOrErr; -+ uint64_t Size = P.second; -+ -+ // According to spec debugging info has to come before loading the -+ // corresonding code load. -+ DILineInfoTable Lines = Context->getLineInfoForAddressRange( -+ Addr, Size, FileLineInfoKind::AbsoluteFilePath); -+ -+ NotifyDebug(Addr, Lines); -+ NotifyCode(Name, Addr, Size); -+ } -+ -+ Dumpstream->flush(); -+} -+ -+void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) { -+ // perf currently doesn't have an interface for unloading. But munmap()ing the -+ // code section does, so that's ok. -+} -+ -+bool PerfJITEventListener::InitDebuggingDir() { -+ time_t Time; -+ struct tm LocalTime; -+ char TimeBuffer[sizeof("YYYYMMDD")]; -+ SmallString<64> Path; -+ -+ // search for location to dump data to -+ if (const char *BaseDir = getenv("JITDUMPDIR")) -+ Path.append(BaseDir); -+ else if (!sys::path::home_directory(Path)) -+ Path = "."; -+ -+ // create debug directory -+ Path += "/.debug/jit/"; -+ if (auto EC = sys::fs::create_directories(Path)) { -+ errs() << "could not create jit cache directory " << Path << ": " -+ << EC.message() << "\n"; -+ return false; -+ } -+ -+ // create unique directory for dump data related to this process -+ time(&Time); -+ localtime_r(&Time, &LocalTime); -+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); -+ Path += JIT_LANG "-jit-"; -+ Path += TimeBuffer; -+ -+ SmallString<128> UniqueDebugDir; -+ -+ using sys::fs::createUniqueDirectory; -+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { -+ errs() << "could not create unique jit cache directory " << UniqueDebugDir -+ << ": " << EC.message() << "\n"; -+ return false; -+ } -+ -+ JitPath = UniqueDebugDir.str(); -+ -+ return true; -+} -+ -+bool PerfJITEventListener::OpenMarker() { -+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap -+ // is captured either live (perf record running when we mmap) or in deferred -+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump -+ // file for more meta data info about the jitted code. Perf report/annotate -+ // detect this special filename and process the jitdump file. -+ // -+ // Mapping must be PROT_EXEC to ensure it is captured by perf record -+ // even when not using -d option. -+ MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC, -+ MAP_PRIVATE, DumpFd, 0); -+ -+ if (MarkerAddr == MAP_FAILED) { -+ errs() << "could not mmap JIT marker\n"; -+ return false; -+ } -+ return true; -+} -+ -+void PerfJITEventListener::CloseMarker() { -+ if (!MarkerAddr) -+ return; -+ -+ munmap(MarkerAddr, sys::Process::getPageSize()); -+ MarkerAddr = nullptr; -+} -+ -+bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) { -+ char id[16]; -+ struct { -+ uint16_t e_type; -+ uint16_t e_machine; -+ } info; -+ -+ size_t RequiredMemory = sizeof(id) + sizeof(info); -+ -+ ErrorOr> MB = -+ MemoryBuffer::getFileSlice("/proc/self/exe", -+ RequiredMemory, -+ 0); -+ -+ // This'll not guarantee that enough data was actually read from the -+ // underlying file. Instead the trailing part of the buffer would be -+ // zeroed. Given the ELF signature check below that seems ok though, -+ // it's unlikely that the file ends just after that, and the -+ // consequence would just be that perf wouldn't recognize the -+ // signature. -+ if (auto EC = MB.getError()) { -+ errs() << "could not open /proc/self/exe: " << EC.message() << "\n"; -+ return false; -+ } -+ -+ memcpy(&id, (*MB)->getBufferStart(), sizeof(id)); -+ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info)); -+ -+ // check ELF signature -+ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') { -+ errs() << "invalid elf signature\n"; -+ return false; -+ } -+ -+ hdr.ElfMach = info.e_machine; -+ -+ return true; -+} -+ -+void PerfJITEventListener::NotifyCode(Expected &Symbol, -+ uint64_t CodeAddr, uint64_t CodeSize) { -+ assert(SuccessfullyInitialized); -+ -+ // 0 length functions can't have samples. -+ if (CodeSize == 0) -+ return; -+ -+ LLVMPerfJitRecordCodeLoad rec; -+ rec.Prefix.Id = JIT_CODE_LOAD; -+ rec.Prefix.TotalSize = sizeof(rec) + // debug record itself -+ Symbol->size() + 1 + // symbol name -+ CodeSize; // and code -+ rec.Prefix.Timestamp = perf_get_timestamp(); -+ -+ rec.CodeSize = CodeSize; -+ rec.Vma = 0; -+ rec.CodeAddr = CodeAddr; -+ rec.Pid = Pid; -+ rec.Tid = get_threadid(); -+ -+ // avoid interspersing output -+ MutexGuard Guard(Mutex); -+ -+ rec.CodeIndex = CodeGeneration++; // under lock! -+ -+ Dumpstream->write(reinterpret_cast(&rec), sizeof(rec)); -+ Dumpstream->write(Symbol->data(), Symbol->size() + 1); -+ Dumpstream->write(reinterpret_cast(CodeAddr), CodeSize); -+} -+ -+void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, -+ DILineInfoTable Lines) { -+ assert(SuccessfullyInitialized); -+ -+ // Didn't get useful debug info. -+ if (Lines.empty()) -+ return; -+ -+ LLVMPerfJitRecordDebugInfo rec; -+ rec.Prefix.Id = JIT_CODE_DEBUG_INFO; -+ rec.Prefix.TotalSize = sizeof(rec); // will be increased further -+ rec.Prefix.Timestamp = perf_get_timestamp(); -+ rec.CodeAddr = CodeAddr; -+ rec.NrEntry = Lines.size(); -+ -+ // compute total size size of record (variable due to filenames) -+ DILineInfoTable::iterator Begin = Lines.begin(); -+ DILineInfoTable::iterator End = Lines.end(); -+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) { -+ DILineInfo &line = It->second; -+ rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry); -+ rec.Prefix.TotalSize += line.FileName.size() + 1; -+ } -+ -+ // The debug_entry describes the source line information. It is defined as -+ // follows in order: -+ // * uint64_t code_addr: address of function for which the debug information -+ // is generated -+ // * uint32_t line : source file line number (starting at 1) -+ // * uint32_t discrim : column discriminator, 0 is default -+ // * char name[n] : source file name in ASCII, including null termination -+ -+ // avoid interspersing output -+ MutexGuard Guard(Mutex); -+ -+ Dumpstream->write(reinterpret_cast(&rec), sizeof(rec)); -+ -+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) { -+ LLVMPerfJitDebugEntry LineInfo; -+ DILineInfo &Line = It->second; -+ -+ LineInfo.Addr = It->first; -+ // The function re-created by perf is preceded by a elf -+ // header. Need to adjust for that, otherwise the results are -+ // wrong. -+ LineInfo.Addr += 0x40; -+ LineInfo.Lineno = Line.Line; -+ LineInfo.Discrim = Line.Discriminator; -+ -+ Dumpstream->write(reinterpret_cast(&LineInfo), -+ sizeof(LineInfo)); -+ Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1); -+ } -+} -+ -+// There should be only a single event listener per process, otherwise perf gets -+// confused. -+llvm::ManagedStatic PerfListener; -+ -+} // end anonymous namespace -+ -+namespace llvm { -+JITEventListener *JITEventListener::createPerfJITEventListener() { -+ return &*PerfListener; -+} -+ -+} // namespace llvm -+ --- -2.17.1 - diff --git a/debian/patches/julia/llvm-D50010-VNCoercion-ni.patch b/debian/patches/julia/llvm-D50010-VNCoercion-ni.patch deleted file mode 100644 index cb658d1b..00000000 --- a/debian/patches/julia/llvm-D50010-VNCoercion-ni.patch +++ /dev/null @@ -1,89 +0,0 @@ -commit 8eb2b102a203d83fb713f3bf79acf235dabdd8cd -Author: Keno Fischer -Date: Mon Jul 30 16:59:08 2018 -0400 - - [VNCoercion] Disallow coercion between different ni addrspaces - - Summary: - I'm not sure if it would be legal by the IR reference to introduce - an addrspacecast here, since the IR reference is a bit vague on - the exact semantics, but at least for our usage of it (and I - suspect for many other's usage) it is not. For us, addrspacecasts - between non-integral address spaces carry frontend information that the - optimizer cannot deduce afterwards in a generic way (though we - have frontend specific passes in our pipline that do propagate - these). In any case, I'm sure nobody is using it this way at - the moment, since it would have introduced inttoptrs, which - are definitely illegal. - - Fixes PR38375 - - Reviewers: sanjoy, reames, dberlin - - Subscribers: llvm-commits - - Differential Revision: https://reviews.llvm.org/D50010 - -diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp -index c3feea6a0a4..735d1e7b792 100644 ---- a/lib/Transforms/Utils/VNCoercion.cpp -+++ b/lib/Transforms/Utils/VNCoercion.cpp -@@ -20,14 +20,21 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, - StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) - return false; - -+ Type *StoredValTy = StoredVal->getType(); -+ - // The store has to be at least as big as the load. - if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) - return false; - -- // Don't coerce non-integral pointers to integers or vice versa. -- if (DL.isNonIntegralPointerType(StoredVal->getType()) != -- DL.isNonIntegralPointerType(LoadTy)) -+ bool StoredNI = DL.isNonIntegralPointerType(StoredValTy); -+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy); -+ if (StoredNI != LoadNI) { - return false; -+ } else if (StoredNI && LoadNI && -+ cast(StoredValTy)->getAddressSpace() != -+ cast(LoadTy)->getAddressSpace()) { -+ return false; -+ } - - return true; - } -diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll -index 9ae4132231d..5217fc1a06a 100644 ---- a/test/Transforms/GVN/non-integral-pointers.ll -+++ b/test/Transforms/GVN/non-integral-pointers.ll -@@ -1,6 +1,6 @@ - ; RUN: opt -gvn -S < %s | FileCheck %s - --target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" -+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5" - target triple = "x86_64-unknown-linux-gnu" - - define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) { -@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) { - alwaysTaken: - ret i64 42 - } -+ -+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) { -+ ; CHECK-LABEL: @multini( -+ ; CHECK-NOT: inttoptr -+ ; CHECK-NOT: ptrtoint -+ ; CHECK-NOT: addrspacecast -+ entry: -+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc -+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken -+ -+ neverTaken: -+ %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)** -+ %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc -+ ret i8 addrspace(5)* %differentas -+ -+ alwaysTaken: -+ ret i8 addrspace(5)* null -+ } diff --git a/debian/patches/julia/llvm-D50167-scev-umin.patch b/debian/patches/julia/llvm-D50167-scev-umin.patch deleted file mode 100644 index fd3d4db2..00000000 --- a/debian/patches/julia/llvm-D50167-scev-umin.patch +++ /dev/null @@ -1,1143 +0,0 @@ -commit 556c30af1c797be294edde0ce621884f5acf11f0 -Author: Keno Fischer -Date: Wed Aug 1 20:45:11 2018 -0400 - - RFC: [SCEV] Add explicit representations of umin/smin - - Summary: - Currently we express umin as `~umax(~x, ~y)`. However, this becomes - a problem for operands in non-integral pointer spaces, because `~x` - is not something we can compute for `x` non-integral. However, since - comparisons are generally still allowed, we are actually able to - express `umin(x, y)` directly as long as we don't try to express is - as a umax. Support this by adding an explicit umin/smin representation - to SCEV. We do this by factoring the existing getUMax/getSMax functions - into a new function that does all four. The previous two functions - were largely identical, except that the SMax variant used `isKnownPredicate` - while the UMax variant used `isKnownViaNonRecursiveReasoning`. - - Trying to make the UMax variant also use `isKnownPredicate` yields to - an infinite recursion, while trying to make the `SMax` variant use - `isKnownViaNonRecursiveReasoning` causes - `Transforms/IndVarSimplify/backedge-on-min-max.ll` to fail. - - I would appreciate any insight into which predicate is correct here. - - Reviewers: reames, sanjoy - - Subscribers: javed.absar, llvm-commits - - Differential Revision: https://reviews.llvm.org/D50167 - -Index: llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolution.h -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/include/llvm/Analysis/ScalarEvolution.h -+++ llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolution.h -@@ -582,12 +582,15 @@ public: - /// \p IndexExprs The expressions for the indices. - const SCEV *getGEPExpr(GEPOperator *GEP, - const SmallVectorImpl &IndexExprs); -+ const SCEV *getUSMinMaxExpr(unsigned Kind, SmallVectorImpl &Operands); - const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); - const SCEV *getSMaxExpr(SmallVectorImpl &Operands); - const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS); - const SCEV *getUMaxExpr(SmallVectorImpl &Operands); - const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS); -+ const SCEV *getSMinExpr(SmallVectorImpl &Operands); - const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS); -+ const SCEV *getUMinExpr(SmallVectorImpl &Operands); - const SCEV *getUnknown(Value *V); - const SCEV *getCouldNotCompute(); - -Index: llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolutionExpander.h -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/include/llvm/Analysis/ScalarEvolutionExpander.h -+++ llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolutionExpander.h -@@ -367,6 +367,10 @@ namespace llvm { - - Value *visitUMaxExpr(const SCEVUMaxExpr *S); - -+ Value *visitSMinExpr(const SCEVSMinExpr *S); -+ -+ Value *visitUMinExpr(const SCEVUMinExpr *S); -+ - Value *visitUnknown(const SCEVUnknown *S) { - return S->getValue(); - } -Index: llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolutionExpressions.h -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/include/llvm/Analysis/ScalarEvolutionExpressions.h -+++ llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolutionExpressions.h -@@ -40,7 +40,7 @@ class Type; - // These should be ordered in terms of increasing complexity to make the - // folders simpler. - scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr, -- scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, -+ scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr, - scUnknown, scCouldNotCompute - }; - -@@ -187,6 +187,8 @@ class Type; - S->getSCEVType() == scMulExpr || - S->getSCEVType() == scSMaxExpr || - S->getSCEVType() == scUMaxExpr || -+ S->getSCEVType() == scSMinExpr || -+ S->getSCEVType() == scUMinExpr || - S->getSCEVType() == scAddRecExpr; - } - }; -@@ -204,7 +206,9 @@ class Type; - return S->getSCEVType() == scAddExpr || - S->getSCEVType() == scMulExpr || - S->getSCEVType() == scSMaxExpr || -- S->getSCEVType() == scUMaxExpr; -+ S->getSCEVType() == scUMaxExpr || -+ S->getSCEVType() == scSMinExpr || -+ S->getSCEVType() == scUMinExpr; - } - - /// Set flags for a non-recurrence without clearing previously set flags. -@@ -396,6 +400,42 @@ class Type; - } - }; - -+ /// This class represents a signed minimum selection. -+ class SCEVSMinExpr : public SCEVCommutativeExpr { -+ friend class ScalarEvolution; -+ -+ SCEVSMinExpr(const FoldingSetNodeIDRef ID, -+ const SCEV *const *O, size_t N) -+ : SCEVCommutativeExpr(ID, scSMinExpr, O, N) { -+ // Min never overflows. -+ setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); -+ } -+ -+ public: -+ /// Methods for support type inquiry through isa, cast, and dyn_cast: -+ static bool classof(const SCEV *S) { -+ return S->getSCEVType() == scSMinExpr; -+ } -+ }; -+ -+ /// This class represents an unsigned minimum selection. -+ class SCEVUMinExpr : public SCEVCommutativeExpr { -+ friend class ScalarEvolution; -+ -+ SCEVUMinExpr(const FoldingSetNodeIDRef ID, -+ const SCEV *const *O, size_t N) -+ : SCEVCommutativeExpr(ID, scUMinExpr, O, N) { -+ // Min never overflows. -+ setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); -+ } -+ -+ public: -+ /// Methods for support type inquiry through isa, cast, and dyn_cast: -+ static bool classof(const SCEV *S) { -+ return S->getSCEVType() == scUMinExpr; -+ } -+ }; -+ - /// This means that we are dealing with an entirely unknown SCEV - /// value, and only represent it as its LLVM Value. This is the - /// "bottom" value for the analysis. -@@ -468,6 +508,10 @@ class Type; - return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S); - case scUMaxExpr: - return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S); -+ case scSMinExpr: -+ return ((SC*)this)->visitSMinExpr((const SCEVSMinExpr*)S); -+ case scUMinExpr: -+ return ((SC*)this)->visitUMinExpr((const SCEVUMinExpr*)S); - case scUnknown: - return ((SC*)this)->visitUnknown((const SCEVUnknown*)S); - case scCouldNotCompute: -@@ -521,6 +565,8 @@ class Type; - case scMulExpr: - case scSMaxExpr: - case scUMaxExpr: -+ case scSMinExpr: -+ case scUMinExpr: - case scAddRecExpr: - for (const auto *Op : cast(S)->operands()) - push(Op); -@@ -683,6 +729,26 @@ class Type; - return !Changed ? Expr : SE.getUMaxExpr(Operands); - } - -+ const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) { -+ SmallVector Operands; -+ bool Changed = false; -+ for (auto *Op : Expr->operands()) { -+ Operands.push_back(((SC *)this)->visit(Op)); -+ Changed |= Op != Operands.back(); -+ } -+ return !Changed ? Expr : SE.getSMinExpr(Operands); -+ } -+ -+ const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) { -+ SmallVector Operands; -+ bool Changed = false; -+ for (auto *Op : Expr->operands()) { -+ Operands.push_back(((SC*)this)->visit(Op)); -+ Changed |= Op != Operands.back(); -+ } -+ return !Changed ? Expr : SE.getUMinExpr(Operands); -+ } -+ - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - return Expr; - } -Index: llvm-toolchain-6.0-6.0.1/lib/Analysis/ScalarEvolution.cpp -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/lib/Analysis/ScalarEvolution.cpp -+++ llvm-toolchain-6.0-6.0.1/lib/Analysis/ScalarEvolution.cpp -@@ -271,7 +271,9 @@ void SCEV::print(raw_ostream &OS) const - case scAddExpr: - case scMulExpr: - case scUMaxExpr: -- case scSMaxExpr: { -+ case scSMaxExpr: -+ case scUMinExpr: -+ case scSMinExpr: { - const SCEVNAryExpr *NAry = cast(this); - const char *OpStr = nullptr; - switch (NAry->getSCEVType()) { -@@ -279,6 +281,8 @@ void SCEV::print(raw_ostream &OS) const - case scMulExpr: OpStr = " * "; break; - case scUMaxExpr: OpStr = " umax "; break; - case scSMaxExpr: OpStr = " smax "; break; -+ case scUMinExpr: OpStr = " umin "; break; -+ case scSMinExpr: OpStr = " smin "; break; - } - OS << "("; - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); -@@ -347,6 +351,8 @@ Type *SCEV::getType() const { - case scMulExpr: - case scUMaxExpr: - case scSMaxExpr: -+ case scUMinExpr: -+ case scSMinExpr: - return cast(this)->getType(); - case scAddExpr: - return cast(this)->getType(); -@@ -718,7 +724,9 @@ static int CompareSCEVComplexity( - case scAddExpr: - case scMulExpr: - case scSMaxExpr: -- case scUMaxExpr: { -+ case scUMaxExpr: -+ case scSMinExpr: -+ case scUMinExpr: { - const SCEVNAryExpr *LC = cast(LHS); - const SCEVNAryExpr *RC = cast(RHS); - -@@ -922,6 +930,8 @@ public: - void visitUDivExpr(const SCEVUDivExpr *Numerator) {} - void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} - void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} -+ void visitSMinExpr(const SCEVSMinExpr *Numerator) {} -+ void visitUMinExpr(const SCEVUMinExpr *Numerator) {} - void visitUnknown(const SCEVUnknown *Numerator) {} - void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} - -@@ -2276,6 +2286,8 @@ bool ScalarEvolution::isAvailableAtLoopE - case scMulExpr: - case scUMaxExpr: - case scSMaxExpr: -+ case scUMinExpr: -+ case scSMinExpr: - case scUDivExpr: - return true; - case scUnknown: -@@ -3405,23 +3417,20 @@ ScalarEvolution::getGEPExpr(GEPOperator - return getAddExpr(BaseExpr, TotalOffset, Wrap); - } - --const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, -- const SCEV *RHS) { -- SmallVector Ops = {LHS, RHS}; -- return getSMaxExpr(Ops); --} -- - const SCEV * --ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { -- assert(!Ops.empty() && "Cannot get empty smax!"); -+ScalarEvolution::getUSMinMaxExpr(unsigned Kind, SmallVectorImpl &Ops) { -+ assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); - if (Ops.size() == 1) return Ops[0]; - #ifndef NDEBUG - Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); - for (unsigned i = 1, e = Ops.size(); i != e; ++i) - assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && -- "SCEVSMaxExpr operand types don't match!"); -+ "Operand types don't match!"); - #endif - -+ bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr; -+ bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr; -+ - // Sort by complexity, this groups all similar expression types together. - GroupByComplexity(Ops, &LI, DT); - -@@ -3430,61 +3439,85 @@ ScalarEvolution::getSMaxExpr(SmallVector - if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { - ++Idx; - assert(Idx < Ops.size()); -+ auto &FoldOp = -+ Kind == scSMaxExpr ? APIntOps::smax : -+ Kind == scSMinExpr ? APIntOps::smin : -+ Kind == scUMaxExpr ? APIntOps::umax : -+ APIntOps::umin; - while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { - // We found two constants, fold them together! - ConstantInt *Fold = ConstantInt::get( -- getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt())); -+ getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt())); - Ops[0] = getConstant(Fold); - Ops.erase(Ops.begin()+1); // Erase the folded element - if (Ops.size() == 1) return Ops[0]; - LHSC = cast(Ops[0]); - } - -- // If we are left with a constant minimum-int, strip it off. -- if (cast(Ops[0])->getValue()->isMinValue(true)) { -- Ops.erase(Ops.begin()); -- --Idx; -- } else if (cast(Ops[0])->getValue()->isMaxValue(true)) { -- // If we have an smax with a constant maximum-int, it will always be -- // maximum-int. -- return Ops[0]; -+ if (IsMax) { -+ // If we are left with a constant minimum-int, strip it off. -+ if (cast(Ops[0])->getValue()->isMinValue(IsSigned)) { -+ Ops.erase(Ops.begin()); -+ --Idx; -+ } else if (cast(Ops[0])->getValue()->isMaxValue(IsSigned)) { -+ // If we have an smax with a constant maximum-int, it will always be -+ // maximum-int. -+ return Ops[0]; -+ } -+ } else { -+ // If we are left with a constant maximum-int, strip it off. -+ if (cast(Ops[0])->getValue()->isMaxValue(IsSigned)) { -+ Ops.erase(Ops.begin()); -+ --Idx; -+ } else if (cast(Ops[0])->getValue()->isMinValue(IsSigned)) { -+ // If we have an smax with a constant minimum-int, it will always be -+ // maximum-int. -+ return Ops[0]; -+ } - } - - if (Ops.size() == 1) return Ops[0]; - } - -- // Find the first SMax -- while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) -+ // Find the first operation of the same kind -+ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() != Kind) - ++Idx; - - // Check to see if one of the operands is an SMax. If so, expand its operands - // onto our operand list, and recurse to simplify. - if (Idx < Ops.size()) { -- bool DeletedSMax = false; -- while (const SCEVSMaxExpr *SMax = dyn_cast(Ops[Idx])) { -+ bool DeletedAny = false; -+ while (Ops[Idx]->getSCEVType() == Kind) { -+ const SCEVCommutativeExpr *SCE = cast(Ops[Idx]); - Ops.erase(Ops.begin()+Idx); -- Ops.append(SMax->op_begin(), SMax->op_end()); -- DeletedSMax = true; -+ Ops.append(SCE->op_begin(), SCE->op_end()); -+ DeletedAny = true; - } - -- if (DeletedSMax) -- return getSMaxExpr(Ops); -+ if (DeletedAny) -+ return getUSMinMaxExpr(Kind, Ops); - } - - // Okay, check to see if the same value occurs in the operand list twice. If - // so, delete one. Since we sorted the list, these values are required to - // be adjacent. -- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) -- // X smax Y smax Y --> X smax Y -- // X smax Y --> X, if X is always greater than Y -- if (Ops[i] == Ops[i+1] || -- isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { -- Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); -- --i; --e; -- } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { -- Ops.erase(Ops.begin()+i, Ops.begin()+i+1); -- --i; --e; -- } -+ llvm::CmpInst::Predicate GEPred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; -+ llvm::CmpInst::Predicate LEPred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; -+ llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred; -+ llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred; -+ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) { -+ if (Ops[i] == Ops[i+1] || -+ isKnownPredicate(FirstPred, Ops[i], Ops[i+1])) { -+ // X op Y op Y --> X op Y -+ // X op Y --> X, if we know X, Y are ordered appropriately -+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); -+ --i; --e; -+ } else if (isKnownPredicate(SecondPred, Ops[i], Ops[i+1])) { -+ // X op Y --> Y, if we know X, Y are ordered appropriately -+ Ops.erase(Ops.begin()+i, Ops.begin()+i+1); -+ --i; --e; -+ } -+ } - - if (Ops.size() == 1) return Ops[0]; - -@@ -3493,132 +3526,73 @@ ScalarEvolution::getSMaxExpr(SmallVector - // Okay, it looks like we really DO need an smax expr. Check to see if we - // already have one, otherwise create a new one. - FoldingSetNodeID ID; -- ID.AddInteger(scSMaxExpr); -+ ID.AddInteger(Kind); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - ID.AddPointer(Ops[i]); - void *IP = nullptr; - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - const SCEV **O = SCEVAllocator.Allocate(Ops.size()); - std::uninitialized_copy(Ops.begin(), Ops.end(), O); -- SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), -- O, Ops.size()); -+ SCEV *S = nullptr; -+ -+ if (Kind == scSMaxExpr) { -+ S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), -+ O, Ops.size()); -+ } else if (Kind == scUMaxExpr) { -+ S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), -+ O, Ops.size()); -+ } else if (Kind == scSMinExpr) { -+ S = new (SCEVAllocator) SCEVSMinExpr(ID.Intern(SCEVAllocator), -+ O, Ops.size()); -+ } else { -+ assert(Kind == scUMinExpr); -+ S = new (SCEVAllocator) SCEVUMinExpr(ID.Intern(SCEVAllocator), -+ O, Ops.size()); -+ } -+ - UniqueSCEVs.InsertNode(S, IP); - addToLoopUseLists(S); - return S; - } - --const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, -+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, - const SCEV *RHS) { - SmallVector Ops = {LHS, RHS}; -- return getUMaxExpr(Ops); -+ return getSMaxExpr(Ops); - } - --const SCEV * --ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { -- assert(!Ops.empty() && "Cannot get empty umax!"); -- if (Ops.size() == 1) return Ops[0]; --#ifndef NDEBUG -- Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); -- for (unsigned i = 1, e = Ops.size(); i != e; ++i) -- assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && -- "SCEVUMaxExpr operand types don't match!"); --#endif -- -- // Sort by complexity, this groups all similar expression types together. -- GroupByComplexity(Ops, &LI, DT); -- -- // If there are any constants, fold them together. -- unsigned Idx = 0; -- if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { -- ++Idx; -- assert(Idx < Ops.size()); -- while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { -- // We found two constants, fold them together! -- ConstantInt *Fold = ConstantInt::get( -- getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt())); -- Ops[0] = getConstant(Fold); -- Ops.erase(Ops.begin()+1); // Erase the folded element -- if (Ops.size() == 1) return Ops[0]; -- LHSC = cast(Ops[0]); -- } -- -- // If we are left with a constant minimum-int, strip it off. -- if (cast(Ops[0])->getValue()->isMinValue(false)) { -- Ops.erase(Ops.begin()); -- --Idx; -- } else if (cast(Ops[0])->getValue()->isMaxValue(false)) { -- // If we have an umax with a constant maximum-int, it will always be -- // maximum-int. -- return Ops[0]; -- } -- -- if (Ops.size() == 1) return Ops[0]; -- } -- -- // Find the first UMax -- while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) -- ++Idx; -- -- // Check to see if one of the operands is a UMax. If so, expand its operands -- // onto our operand list, and recurse to simplify. -- if (Idx < Ops.size()) { -- bool DeletedUMax = false; -- while (const SCEVUMaxExpr *UMax = dyn_cast(Ops[Idx])) { -- Ops.erase(Ops.begin()+Idx); -- Ops.append(UMax->op_begin(), UMax->op_end()); -- DeletedUMax = true; -- } -- -- if (DeletedUMax) -- return getUMaxExpr(Ops); -- } -- -- // Okay, check to see if the same value occurs in the operand list twice. If -- // so, delete one. Since we sorted the list, these values are required to -- // be adjacent. -- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) -- // X umax Y umax Y --> X umax Y -- // X umax Y --> X, if X is always greater than Y -- if (Ops[i] == Ops[i+1] || -- isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { -- Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); -- --i; --e; -- } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { -- Ops.erase(Ops.begin()+i, Ops.begin()+i+1); -- --i; --e; -- } -- -- if (Ops.size() == 1) return Ops[0]; -+const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { -+ return getUSMinMaxExpr(scSMaxExpr, Ops); -+} - -- assert(!Ops.empty() && "Reduced umax down to nothing!"); -+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, -+ const SCEV *RHS) { -+ SmallVector Ops = {LHS, RHS}; -+ return getUMaxExpr(Ops); -+} - -- // Okay, it looks like we really DO need a umax expr. Check to see if we -- // already have one, otherwise create a new one. -- FoldingSetNodeID ID; -- ID.AddInteger(scUMaxExpr); -- for (unsigned i = 0, e = Ops.size(); i != e; ++i) -- ID.AddPointer(Ops[i]); -- void *IP = nullptr; -- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; -- const SCEV **O = SCEVAllocator.Allocate(Ops.size()); -- std::uninitialized_copy(Ops.begin(), Ops.end(), O); -- SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), -- O, Ops.size()); -- UniqueSCEVs.InsertNode(S, IP); -- addToLoopUseLists(S); -- return S; -+const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { -+ return getUSMinMaxExpr(scUMaxExpr, Ops); - } - - const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, - const SCEV *RHS) { -- // ~smax(~x, ~y) == smin(x, y). -- return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); -+ SmallVector Ops = { LHS, RHS }; -+ return getSMinExpr(Ops); -+} -+ -+const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl &Ops) { -+ return getUSMinMaxExpr(scSMinExpr, Ops); - } - - const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, - const SCEV *RHS) { -- // ~umax(~x, ~y) == umin(x, y) -- return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); -+ SmallVector Ops = { LHS, RHS }; -+ return getUMinExpr(Ops); -+} -+ -+const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl &Ops) { -+ return getUSMinMaxExpr(scUMinExpr, Ops); - } - - const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { -@@ -5002,6 +4976,7 @@ static bool IsAvailableOnEntry(const Loo - switch (S->getSCEVType()) { - case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: - case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: -+ case scUMinExpr: case scSMinExpr: - // These expressions are available if their operand(s) is/are. - return true; - -@@ -7885,7 +7860,9 @@ static Constant *BuildConstantFromSCEV(c - } - case scSMaxExpr: - case scUMaxExpr: -- break; // TODO: smax, umax. -+ case scSMinExpr: -+ case scUMinExpr: -+ break; // TODO: smax, umax, smin, umax. - } - return nullptr; - } -@@ -8015,6 +7992,10 @@ const SCEV *ScalarEvolution::computeSCEV - return getSMaxExpr(NewOps); - if (isa(Comm)) - return getUMaxExpr(NewOps); -+ if (isa(Comm)) -+ return getSMinExpr(NewOps); -+ if (isa(Comm)) -+ return getUMinExpr(NewOps); - llvm_unreachable("Unknown commutative SCEV type!"); - } - } -@@ -10998,7 +10979,9 @@ ScalarEvolution::computeLoopDisposition( - case scAddExpr: - case scMulExpr: - case scUMaxExpr: -- case scSMaxExpr: { -+ case scSMaxExpr: -+ case scUMinExpr: -+ case scSMinExpr: { - bool HasVarying = false; - for (auto *Op : cast(S)->operands()) { - LoopDisposition D = getLoopDisposition(Op, L); -@@ -11085,7 +11068,9 @@ ScalarEvolution::computeBlockDisposition - case scAddExpr: - case scMulExpr: - case scUMaxExpr: -- case scSMaxExpr: { -+ case scSMaxExpr: -+ case scUMinExpr: -+ case scSMinExpr: { - const SCEVNAryExpr *NAry = cast(S); - bool Proper = true; - for (const SCEV *NAryOp : NAry->operands()) { -Index: llvm-toolchain-6.0-6.0.1/lib/Analysis/ScalarEvolutionExpander.cpp -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/lib/Analysis/ScalarEvolutionExpander.cpp -+++ llvm-toolchain-6.0-6.0.1/lib/Analysis/ScalarEvolutionExpander.cpp -@@ -1629,14 +1629,15 @@ Value *SCEVExpander::visitSMaxExpr(const - for (int i = S->getNumOperands()-2; i >= 0; --i) { - // In the case of mixed integer and pointer types, do the - // rest of the comparisons as integer. -- if (S->getOperand(i)->getType() != Ty) { -+ Type *OpTy = S->getOperand(i)->getType(); -+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { - Ty = SE.getEffectiveSCEVType(Ty); - LHS = InsertNoopCastOfTo(LHS, Ty); - } - Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); - rememberInstruction(ICmp); -- Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); -+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); - rememberInstruction(Sel); - LHS = Sel; - } -@@ -1653,13 +1654,64 @@ Value *SCEVExpander::visitUMaxExpr(const - for (int i = S->getNumOperands()-2; i >= 0; --i) { - // In the case of mixed integer and pointer types, do the - // rest of the comparisons as integer. -- if (S->getOperand(i)->getType() != Ty) { -+ Type *OpTy = S->getOperand(i)->getType(); -+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { - Ty = SE.getEffectiveSCEVType(Ty); - LHS = InsertNoopCastOfTo(LHS, Ty); - } - Value *RHS = expandCodeFor(S->getOperand(i), Ty); - Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); - rememberInstruction(ICmp); -+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); -+ rememberInstruction(Sel); -+ LHS = Sel; -+ } -+ // In the case of mixed integer and pointer types, cast the -+ // final result back to the pointer type. -+ if (LHS->getType() != S->getType()) -+ LHS = InsertNoopCastOfTo(LHS, S->getType()); -+ return LHS; -+} -+ -+Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { -+ Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); -+ Type *Ty = LHS->getType(); -+ for (int i = S->getNumOperands()-2; i >= 0; --i) { -+ // In the case of mixed integer and pointer types, do the -+ // rest of the comparisons as integer. -+ Type *OpTy = S->getOperand(i)->getType(); -+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { -+ Ty = SE.getEffectiveSCEVType(Ty); -+ LHS = InsertNoopCastOfTo(LHS, Ty); -+ } -+ Value *RHS = expandCodeFor(S->getOperand(i), Ty); -+ Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); -+ rememberInstruction(ICmp); -+ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); -+ rememberInstruction(Sel); -+ LHS = Sel; -+ } -+ // In the case of mixed integer and pointer types, cast the -+ // final result back to the pointer type. -+ if (LHS->getType() != S->getType()) -+ LHS = InsertNoopCastOfTo(LHS, S->getType()); -+ return LHS; -+} -+ -+Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { -+ Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); -+ Type *Ty = LHS->getType(); -+ for (int i = S->getNumOperands()-2; i >= 0; --i) { -+ // In the case of mixed integer and pointer types, do the -+ // rest of the comparisons as integer. -+ Type *OpTy = S->getOperand(i)->getType(); -+ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { -+ Ty = SE.getEffectiveSCEVType(Ty); -+ LHS = InsertNoopCastOfTo(LHS, Ty); -+ } -+ Value *RHS = expandCodeFor(S->getOperand(i), Ty); -+ Value *ICmp = Builder.CreateICmpULT(LHS, RHS); -+ rememberInstruction(ICmp); - Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); - rememberInstruction(Sel); - LHS = Sel; -Index: llvm-toolchain-6.0-6.0.1/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll -=================================================================== ---- /dev/null -+++ llvm-toolchain-6.0-6.0.1/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll -@@ -0,0 +1,50 @@ -+; RUN: opt -loop-versioning -S < %s | FileCheck %s -+ -+; NB: addrspaces 10-13 are non-integral -+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" -+ -+%jl_value_t = type opaque -+%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 } -+ -+define void @"japi1_permutedims!_33509"(%jl_value_t addrspace(10)**) { -+; CHECK: [[CMP:%[^ ]*]] = icmp ult double addrspace(13)* [[A:%[^ ]*]], [[B:%[^ ]*]] -+; CHECK: [[SELECT:%[^ ]*]] = select i1 %18, double addrspace(13)* [[A]], double addrspace(13)* [[B]] -+top: -+ %1 = alloca [3 x i64], align 8 -+ %2 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8 -+ %3 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, i64 1 -+ %4 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %3, align 8 -+ %5 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 0 -+ store i64 1, i64* %5, align 8 -+ %6 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 1 -+ %7 = load i64, i64* inttoptr (i64 24 to i64*), align 8 -+ %8 = addrspacecast %jl_value_t addrspace(10)* %4 to %jl_value_t addrspace(11)* -+ %9 = bitcast %jl_value_t addrspace(11)* %8 to double addrspace(13)* addrspace(11)* -+ %10 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %9, align 8 -+ %11 = addrspacecast %jl_value_t addrspace(10)* %2 to %jl_value_t addrspace(11)* -+ %12 = bitcast %jl_value_t addrspace(11)* %11 to double addrspace(13)* addrspace(11)* -+ %13 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %12, align 8 -+ %14 = load i64, i64* %6, align 8 -+ br label %L74 -+ -+L74: -+ %value_phi20 = phi i64 [ 1, %top ], [ %22, %L74 ] -+ %value_phi21 = phi i64 [ 1, %top ], [ %23, %L74 ] -+ %value_phi22 = phi i64 [ 1, %top ], [ %25, %L74 ] -+ %15 = add i64 %value_phi21, -1 -+ %16 = getelementptr inbounds double, double addrspace(13)* %10, i64 %15 -+ %17 = bitcast double addrspace(13)* %16 to i64 addrspace(13)* -+ %18 = load i64, i64 addrspace(13)* %17, align 8 -+ %19 = add i64 %value_phi20, -1 -+ %20 = getelementptr inbounds double, double addrspace(13)* %13, i64 %19 -+ %21 = bitcast double addrspace(13)* %20 to i64 addrspace(13)* -+ store i64 %18, i64 addrspace(13)* %21, align 8 -+ %22 = add i64 %value_phi20, 1 -+ %23 = add i64 %14, %value_phi21 -+ %24 = icmp eq i64 %value_phi22, %7 -+ %25 = add i64 %value_phi22, 1 -+ br i1 %24, label %L94, label %L74 -+ -+L94: -+ ret void -+} -Index: llvm-toolchain-6.0-6.0.1/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll -+++ llvm-toolchain-6.0-6.0.1/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll -@@ -58,7 +58,7 @@ for.end: - - ; Here it is not obvious what the limits are, since 'step' could be negative. - --; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a))))) -+; CHECK: Low: ((60000 + %a) umin (60000 + (-40000 * %step) + %a)) - ; CHECK: High: (4 + ((60000 + %a) umax (60000 + (-40000 * %step) + %a))) - - define void @g(i64 %step) { -Index: llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll -+++ llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll -@@ -22,5 +22,5 @@ afterfor: ; preds = %forinc, %entry - ret i32 %j.0.lcssa - } - --; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %{{[xy]}})) smax (-1 + (-1 * %{{[xy]}})))) -+; CHECK: backedge-taken count is (-2147483633 + (-1 * (%x smin %y))) - -Index: llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/min-max-exprs.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/ScalarEvolution/min-max-exprs.ll -+++ llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/min-max-exprs.ll -@@ -33,7 +33,7 @@ bb2: - %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6 - ; min(N, i+3) - ; CHECK: select i1 %tmp4, i64 %tmp5, i64 %tmp6 --; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64)))))) -+; CHECK-NEXT: --> ((sext i32 {3,+,1}<%bb1> to i64) smin (sext i32 %N to i64)) - %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9 - %tmp12 = load i32, i32* %tmp11, align 4 - %tmp13 = shl nsw i32 %tmp12, 1 -Index: llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/pr28705.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/ScalarEvolution/pr28705.ll -+++ llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/pr28705.ll -@@ -5,7 +5,7 @@ - ; with "%.sroa.speculated + 1". - ; - ; CHECK-LABEL: @foo( --; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1 -+; CHECK: %[[EXIT:.+]] = add i32 %.sroa.speculated, 1 - ; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ] - ; - define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr { -Index: llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/predicated-trip-count.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/ScalarEvolution/predicated-trip-count.ll -+++ llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/predicated-trip-count.ll -@@ -80,7 +80,7 @@ return: ; preds = %bb5 - ; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) - ; CHECK: Loop %bb3: Unpredictable backedge-taken count. - ; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. --; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M)))) -+; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (sext i16 %Start to i32) + (-1 * ((1 + (sext i16 %Start to i32)) smin %M))) - ; CHECK-NEXT: Predicates: - ; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: - -Index: llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/trip-count3.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/ScalarEvolution/trip-count3.ll -+++ llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/trip-count3.ll -@@ -4,7 +4,7 @@ - ; dividing by the stride will have a remainder. This could theoretically - ; be teaching it how to use a more elaborate trip count computation. - --; CHECK: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64) -+; CHECK: Loop %bb3.i: backedge-taken count is ((63 + (-1 * (63 smin %0)) + %0) /u 64) - ; CHECK: Loop %bb3.i: max backedge-taken count is 33554431 - - %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/conjunctive-checks.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/conjunctive-checks.ll -+++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/conjunctive-checks.ll -@@ -4,16 +4,6 @@ define void @f_0(i32 *%arr, i32 *%a_len_ - ; CHECK-LABEL: @f_0( - - ; CHECK: loop.preheader: --; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n --; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len --; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] --; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] --; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] --; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 --; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 --; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] --; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit -- - ; CHECK: loop.preheader2: - ; CHECK: br label %loop - -@@ -57,14 +47,10 @@ define void @f_1( - ; CHECK-LABEL: @f_1( - - ; CHECK: loop.preheader: --; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b --; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a --; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] --; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] --; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n --; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] --; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] --; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] -+; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a -+; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a -+; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n -+; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n - ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 - ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 - -Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/decrementing-loop.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/decrementing-loop.ll -+++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/decrementing-loop.ll -@@ -28,11 +28,8 @@ define void @decrementing_loop(i32 *%arr - ret void - - ; CHECK: loop.preheader: --; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len --; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n --; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] --; CHECK: [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]] --; CHECK: [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]] -+; CHECK: [[len_hiclamp_cmp:[^ ]+]] = icmp slt i32 %len, %n -+; CHECK: [[len_hiclamp:[^ ]+]] = select i1 [[len_hiclamp_cmp]], i32 %len, i32 %n - ; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0 - ; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0 - ; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1 -Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/multiple-access-no-preloop.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/multiple-access-no-preloop.ll -+++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/multiple-access-no-preloop.ll -@@ -37,14 +37,10 @@ define void @multiple_access_no_preloop( - ; CHECK-LABEL: @multiple_access_no_preloop( - - ; CHECK: loop.preheader: --; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b --; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a --; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] --; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] --; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n --; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] --; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] --; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] -+; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a -+; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a -+; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n -+; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n - ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 - ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 - -Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/ranges_of_different_types.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/ranges_of_different_types.ll -+++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/ranges_of_different_types.ll -@@ -22,12 +22,11 @@ define void @test_01(i32* %arr, i32* %a_ - ; CHECK-NOT: preloop - ; CHECK: entry: - ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 --; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len --; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 --; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 --; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] --; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 --; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 -+; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13 -+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 -+; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 -+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 -+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 - ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at - ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit - ; CHECK: loop -@@ -82,13 +81,11 @@ define void @test_02(i32* %arr, i32* %a_ - ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 - ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 - ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 --; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 --; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len --; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 --; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 --; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] --; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 --; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 -+; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] -+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 -+; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101 -+; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0 -+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0 - ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader - ; CHECK: loop.preloop: - ; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ] -@@ -150,14 +147,11 @@ define void @test_03(i32* %arr, i32* %a_ - ; CHECK-NOT: preloop - ; CHECK: entry: - ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 --; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len --; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len --; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 --; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 --; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] --; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 --; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 --; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] -+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 -+; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 -+; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] -+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 -+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 - ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at - ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit - ; CHECK: postloop: -@@ -207,10 +201,9 @@ define void @test_04(i32* %arr, i32* %a_ - ; CHECK-LABEL: test_04( - ; CHECK: entry: - ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 --; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len --; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 --; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 --; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] -+; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13 -+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 -+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 - ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader - ; CHECK: in.bounds.preloop: - ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop -@@ -251,12 +244,11 @@ define void @test_05(i32* %arr, i32* %a_ - ; CHECK-NOT: preloop - ; CHECK: entry: - ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 --; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len --; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 --; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 --; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] --; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 --; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 -+; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13 -+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 -+; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 -+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 -+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 - ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at - ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit - ; CHECK: loop -@@ -296,13 +288,11 @@ define void @test_06(i32* %arr, i32* %a_ - ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 - ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 - ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 --; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 --; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len --; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 --; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 --; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] --; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 --; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 -+; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] -+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 -+; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101 -+; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0 -+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0 - ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader - ; CHECK: in.bounds.preloop: - ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop -@@ -343,14 +333,11 @@ define void @test_07(i32* %arr, i32* %a_ - ; CHECK-NOT: preloop - ; CHECK: entry: - ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 --; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len --; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len --; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 --; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 --; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] --; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 --; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 --; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] -+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 -+; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 -+; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] -+; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 -+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 - ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at - ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit - ; CHECK: loop -@@ -387,10 +374,9 @@ define void @test_08(i32* %arr, i32* %a_ - ; CHECK-LABEL: test_08( - ; CHECK: entry: - ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 --; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len --; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 --; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 --; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] -+; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13 -+; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 -+; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 - ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader - ; CHECK: in.bounds.preloop: - ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop -Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/single-access-no-preloop.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/single-access-no-preloop.ll -+++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/single-access-no-preloop.ll -@@ -85,11 +85,9 @@ define void @single_access_no_preloop_wi - ; CHECK-LABEL: @single_access_no_preloop_with_offset( - - ; CHECK: loop.preheader: --; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n --; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len --; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] --; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] --; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] -+; CHECK: [[safe_range_end:[^ ]+]] = add i32 %len, -4 -+; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]] -+; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[safe_range_end]] - ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 - ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 - ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] -Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/single-access-with-preloop.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/single-access-with-preloop.ll -+++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/single-access-with-preloop.ll -@@ -33,11 +33,9 @@ define void @single_access_with_preloop( - ; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647 - ; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647 - ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. --; CHECK: [[not_safe_start:[^ ]+]] = add i32 [[safe_offset_preloop]], -1 --; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n --; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]] --; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]] --; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]] -+; CHECK: [[safe_start:[^ ]+]] = sub i32 0, [[safe_offset_preloop]] -+; CHECK: [[exit_preloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_start]] -+; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = select i1 [[exit_preloop_at_cond_loclamp]], i32 %n, i32 [[safe_start]] - ; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0 - ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0 - -@@ -45,17 +43,15 @@ define void @single_access_with_preloop( - ; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647 - ; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]] - ; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]] --; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1 - ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. --; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len --; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]] --; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]] -+; CHECK: [[safe_upper_end:[^ ]+]] = sub i32 %len, [[safe_offset_mainloop]] -+; CHECK: [[exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_upper_end]] -+; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_loclamp]], i32 %n, i32 [[safe_upper_end]] - ; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0 - ; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0 --; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648 --; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]] --; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]] --; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]] -+; CHECK: [[safe_lower_end:[^ ]+]] = sub i32 2147483647, [[safe_offset_mainloop_2]] -+; CHECK: [[exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp slt i32 [[exit_mainloop_at_loclamp]], [[safe_lower_end]] -+; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_hiclamp]], i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]] - ; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0 - ; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_hiclamp]], i32 0 - -Index: llvm-toolchain-6.0-6.0.1/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll -+++ llvm-toolchain-6.0-6.0.1/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll -@@ -14,8 +14,6 @@ target datalayout = "e-p:64:64:64-i1:8:8 - ; current LSR cost model. - ; CHECK-NOT: = ptrtoint i8* undef to i64 - ; CHECK: .lr.ph --; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1 --; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}} - ; CHECK: ret void - define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 { - bb: diff --git a/debian/patches/julia/llvm-PPC-addrspaces.patch b/debian/patches/julia/llvm-PPC-addrspaces.patch deleted file mode 100644 index 74836576..00000000 --- a/debian/patches/julia/llvm-PPC-addrspaces.patch +++ /dev/null @@ -1,26 +0,0 @@ -From 15899eaab58e96bb7bbe7a14099674e255656a50 Mon Sep 17 00:00:00 2001 -From: Valentin Churavy -Date: Fri, 23 Feb 2018 14:41:20 -0500 -Subject: [PATCH] Make AddrSpaceCast noops on PPC - -PPC as AArch64 doesn't have address-spaces so we can drop them in the backend ---- - lib/Target/PowerPC/PPCISelLowering.h | 5 +++++ - 1 file changed, 5 insertions(+) - -Index: llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h -=================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/lib/Target/PowerPC/PPCISelLowering.h -+++ llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h -@@ -889,6 +889,11 @@ namespace llvm { - return true; - } - -+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { -+ // Addrspacecasts are always noops. -+ return true; -+ } -+ - bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, - SelectionDAG &DAG, - ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; diff --git a/debian/patches/julia/llvm-rL326967-aligned-load.patch b/debian/patches/julia/llvm-rL326967-aligned-load.patch deleted file mode 100644 index 62c11230..00000000 --- a/debian/patches/julia/llvm-rL326967-aligned-load.patch +++ /dev/null @@ -1,301 +0,0 @@ -commit b398d8e1fa5a5a914957fa22d0a64db97f6c265e -Author: Craig Topper -Date: Thu Mar 8 00:21:17 2018 +0000 - - [X86] Fix some isel patterns that used aligned vector load instructions with unaligned predicates. - - These patterns weren't checking the alignment of the load, but were using the aligned instructions. This will cause a GP fault if the data isn't aligned. - - I believe these were introduced in r312450. - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326967 91177308-0d34-0410-b5e6-96231b3b80d8 - -diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td -index db3dfe56531..50c7763a2c3 100644 ---- a/lib/Target/X86/X86InstrVecCompiler.td -+++ b/lib/Target/X86/X86InstrVecCompiler.td -@@ -261,10 +261,10 @@ let Predicates = [HasVLX] in { - // will zero the upper bits. - // TODO: Is there a safe way to detect whether the producing instruction - // already zeroed the upper bits? --multiclass subvector_zero_lowering { -+multiclass subvector_zero_lowering { - def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)), - (SrcTy RC:$src), (iPTR 0))), - (SUBREG_TO_REG (i64 0), -@@ -274,91 +274,91 @@ multiclass subvector_zero_lowering("VMOV"#MoveStr#"rm") addr:$src), SubIdx)>; -+ (!cast("VMOV"#LoadStr#"rm") addr:$src), SubIdx)>; - } - - let Predicates = [HasAVX, NoVLX] in { -- defm : subvector_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, loadv2f64, -- sub_xmm>; -- defm : subvector_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, loadv4f32, -- sub_xmm>; -- defm : subvector_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, loadv2i64, -- sub_xmm>; -- defm : subvector_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, loadv2i64, -- sub_xmm>; -- defm : subvector_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, loadv2i64, -- sub_xmm>; -- defm : subvector_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, loadv2i64, -- sub_xmm>; --} -- --let Predicates = [HasVLX] in { -- defm : subvector_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, -+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v4f64, v2f64, v8i32, - loadv2f64, sub_xmm>; -- defm : subvector_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, -+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v8f32, v4f32, v8i32, - loadv4f32, sub_xmm>; -- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, -+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v4i64, v2i64, v8i32, - loadv2i64, sub_xmm>; -- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, -+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i32, v4i32, v8i32, - loadv2i64, sub_xmm>; -- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, -+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i16, v8i16, v8i32, - loadv2i64, sub_xmm>; -- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, -- loadv2i64, sub_xmm>; -- -- defm : subvector_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, -- loadv2f64, sub_xmm>; -- defm : subvector_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, -- loadv4f32, sub_xmm>; -- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, -- loadv2i64, sub_xmm>; -- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, -- loadv2i64, sub_xmm>; -- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, -- loadv2i64, sub_xmm>; -- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, -+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i8, v16i8, v8i32, - loadv2i64, sub_xmm>; -+} - -- defm : subvector_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, -- loadv4f64, sub_ymm>; -- defm : subvector_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, -- loadv8f32, sub_ymm>; -- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, -- loadv4i64, sub_ymm>; -- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, -- loadv4i64, sub_ymm>; -- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, -- loadv4i64, sub_ymm>; -- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, -- loadv4i64, sub_ymm>; -+let Predicates = [HasVLX] in { -+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v4f64, -+ v2f64, v8i32, loadv2f64, sub_xmm>; -+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v8f32, -+ v4f32, v8i32, loadv4f32, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v4i64, -+ v2i64, v8i32, loadv2i64, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i32, -+ v4i32, v8i32, loadv2i64, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i16, -+ v8i16, v8i32, loadv2i64, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i8, -+ v16i8, v8i32, loadv2i64, sub_xmm>; -+ -+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v8f64, -+ v2f64, v16i32, loadv2f64, sub_xmm>; -+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v16f32, -+ v4f32, v16i32, loadv4f32, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i64, -+ v2i64, v16i32, loadv2i64, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i32, -+ v4i32, v16i32, loadv2i64, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i16, -+ v8i16, v16i32, loadv2i64, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v64i8, -+ v16i8, v16i32, loadv2i64, sub_xmm>; -+ -+ defm : subvector_zero_lowering<"APDZ256", "UPDZ256", VR256X, v8f64, -+ v4f64, v16i32, loadv4f64, sub_ymm>; -+ defm : subvector_zero_lowering<"APSZ256", "UPDZ256", VR256X, v16f32, -+ v8f32, v16i32, loadv8f32, sub_ymm>; -+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v8i64, -+ v4i64, v16i32, loadv4i64, sub_ymm>; -+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v16i32, -+ v8i32, v16i32, loadv4i64, sub_ymm>; -+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v32i16, -+ v16i16, v16i32, loadv4i64, sub_ymm>; -+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v64i8, -+ v32i8, v16i32, loadv4i64, sub_ymm>; - } - - let Predicates = [HasAVX512, NoVLX] in { -- defm : subvector_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, loadv2f64, -- sub_xmm>; -- defm : subvector_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, loadv4f32, -- sub_xmm>; -- defm : subvector_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, loadv2i64, -- sub_xmm>; -- defm : subvector_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, loadv2i64, -- sub_xmm>; -- defm : subvector_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, loadv2i64, -- sub_xmm>; -- defm : subvector_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, loadv2i64, -- sub_xmm>; -- -- defm : subvector_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, -- loadv4f64, sub_ymm>; -- defm : subvector_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, -- loadv8f32, sub_ymm>; -- defm : subvector_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, -- loadv4i64, sub_ymm>; -- defm : subvector_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, -- loadv4i64, sub_ymm>; -- defm : subvector_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, -- loadv4i64, sub_ymm>; -- defm : subvector_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, -- loadv4i64, sub_ymm>; -+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v8f64, v2f64, -+ v16i32,loadv2f64, sub_xmm>; -+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v16f32, v4f32, -+ v16i32, loadv4f32, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i64, v2i64, -+ v16i32, loadv2i64, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i32, v4i32, -+ v16i32, loadv2i64, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i16, v8i16, -+ v16i32, loadv2i64, sub_xmm>; -+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v64i8, v16i8, -+ v16i32, loadv2i64, sub_xmm>; -+ -+ defm : subvector_zero_lowering<"APDY", "UPDY", VR256, v8f64, v4f64, -+ v16i32, loadv4f64, sub_ymm>; -+ defm : subvector_zero_lowering<"APSY", "UPSY", VR256, v16f32, v8f32, -+ v16i32, loadv8f32, sub_ymm>; -+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v8i64, v4i64, -+ v16i32, loadv4i64, sub_ymm>; -+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v16i32, v8i32, -+ v16i32, loadv4i64, sub_ymm>; -+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v32i16, v16i16, -+ v16i32, loadv4i64, sub_ymm>; -+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v64i8, v32i8, -+ v16i32, loadv4i64, sub_ymm>; - } - - // List of opcodes that guaranteed to zero the upper elements of vector regs. -diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll -index 6ecd8116443..0f2cf594b1c 100644 ---- a/test/CodeGen/X86/merge-consecutive-loads-256.ll -+++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll -@@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi - define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp { - ; AVX-LABEL: merge_4f64_2f64_2z: - ; AVX: # %bb.0: --; AVX-NEXT: vmovaps 32(%rdi), %xmm0 -+; AVX-NEXT: vmovups 32(%rdi), %xmm0 - ; AVX-NEXT: retq - ; - ; X32-AVX-LABEL: merge_4f64_2f64_2z: - ; X32-AVX: # %bb.0: - ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax --; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0 -+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0 - ; X32-AVX-NEXT: retl - %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 - %val0 = load <2 x double>, <2 x double>* %ptr0 -@@ -109,13 +109,13 @@ define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline - define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp { - ; AVX-LABEL: merge_4f64_f64_45zz: - ; AVX: # %bb.0: --; AVX-NEXT: vmovaps 32(%rdi), %xmm0 -+; AVX-NEXT: vmovups 32(%rdi), %xmm0 - ; AVX-NEXT: retq - ; - ; X32-AVX-LABEL: merge_4f64_f64_45zz: - ; X32-AVX: # %bb.0: - ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax --; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0 -+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0 - ; X32-AVX-NEXT: retl - %ptr0 = getelementptr inbounds double, double* %ptr, i64 4 - %ptr1 = getelementptr inbounds double, double* %ptr, i64 5 -@@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline - define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp { - ; AVX-LABEL: merge_4i64_2i64_3z: - ; AVX: # %bb.0: --; AVX-NEXT: vmovaps 48(%rdi), %xmm0 -+; AVX-NEXT: vmovups 48(%rdi), %xmm0 - ; AVX-NEXT: retq - ; - ; X32-AVX-LABEL: merge_4i64_2i64_3z: - ; X32-AVX: # %bb.0: - ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax --; X32-AVX-NEXT: vmovaps 48(%eax), %xmm0 -+; X32-AVX-NEXT: vmovups 48(%eax), %xmm0 - ; X32-AVX-NEXT: retl - %ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3 - %val0 = load <2 x i64>, <2 x i64>* %ptr0 -@@ -217,13 +217,13 @@ define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp { - define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp { - ; AVX-LABEL: merge_4i64_i64_23zz: - ; AVX: # %bb.0: --; AVX-NEXT: vmovaps 16(%rdi), %xmm0 -+; AVX-NEXT: vmovups 16(%rdi), %xmm0 - ; AVX-NEXT: retq - ; - ; X32-AVX-LABEL: merge_4i64_i64_23zz: - ; X32-AVX: # %bb.0: - ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax --; X32-AVX-NEXT: vmovaps 16(%eax), %xmm0 -+; X32-AVX-NEXT: vmovups 16(%eax), %xmm0 - ; X32-AVX-NEXT: retl - %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2 - %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3 -diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll -index 62102eb382c..3c6eaf65292 100644 ---- a/test/CodeGen/X86/merge-consecutive-loads-512.ll -+++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll -@@ -106,13 +106,13 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin - define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp { - ; ALL-LABEL: merge_8f64_f64_12zzuuzz: - ; ALL: # %bb.0: --; ALL-NEXT: vmovaps 8(%rdi), %xmm0 -+; ALL-NEXT: vmovups 8(%rdi), %xmm0 - ; ALL-NEXT: retq - ; - ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz: - ; X32-AVX512F: # %bb.0: - ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax --; X32-AVX512F-NEXT: vmovaps 8(%eax), %xmm0 -+; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0 - ; X32-AVX512F-NEXT: retl - %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 - %ptr1 = getelementptr inbounds double, double* %ptr, i64 2 -@@ -190,7 +190,7 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline - define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp { - ; ALL-LABEL: merge_8i64_i64_56zz9uzz: - ; ALL: # %bb.0: --; ALL-NEXT: vmovaps 40(%rdi), %xmm0 -+; ALL-NEXT: vmovups 40(%rdi), %xmm0 - ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero - ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 - ; ALL-NEXT: retq -@@ -198,7 +198,7 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s - ; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz: - ; X32-AVX512F: # %bb.0: - ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax --; X32-AVX512F-NEXT: vmovaps 40(%eax), %xmm0 -+; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0 - ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero - ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 - ; X32-AVX512F-NEXT: retl diff --git a/debian/patches/julia/llvm-rL327898.patch b/debian/patches/julia/llvm-rL327898.patch deleted file mode 100644 index f4d9a430..00000000 --- a/debian/patches/julia/llvm-rL327898.patch +++ /dev/null @@ -1,6131 +0,0 @@ -commit 64c3384f94a1eb3e3510d6f66c3bccdfc9d9050b -Author: Nirav Dave -Date: Thu Feb 1 16:11:59 2018 +0000 - - r327898/dependencies roll up - - This is a squash of 13 commits required in the lead up to r327898, - which fixes https://github.com/JuliaLang/julia/issues/27603. The squashed - commits are: - - 332d15e981e86b9e058087174bb288ba18a15807 - b659d3fca5d24c25ee73f979edb382f7f24e05e2 - c01d1363ea080170fc5143d72f26eecd9270f03b - eab8a177a4caef9e42ef1d2aeb4ba15dc788d3f2 - bedb1391781b009ace95f5586e7fae5f03fe0689 - 11d041a905f82ac78e7ccf2394773e80b93d147c - e1ec36c55a0127988f42a3329ca835617b30de09 - b8d2903300c13d8fd151c8e5dc71017269617539 - 00884fea345f47ab05174a8f314ecd60d1676d02 - 28ab04cec0d9888af9d29946b3a048b8340abe0f - 3dd52e62ea3087efcca63c3772183d9471abc742 - bd3649ff6d6b4d18b3c6de253179d987a120518a - aea03035b9c633e6d745b6d3fc5b6378699f576c - - Their commit messages follow below: - - [SelectionDAG] Fix UpdateChains handling of TokenFactors - - Summary: - In Instruction Selection UpdateChains replaces all matched Nodes' - chain references including interior token factors and deletes them. - This may allow nodes which depend on these interior nodes but are not - part of the set of matched nodes to be left with a dangling dependence. - Avoid this by doing the replacement for matched non-TokenFactor nodes. - - Fixes PR36164. - - Reviewers: jonpa, RKSimon, bogner - - Subscribers: llvm-commits, hiraditya - - Differential Revision: https://reviews.llvm.org/D42754 - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323977 91177308-0d34-0410-b5e6-96231b3b80d8 - - Regenerate test result for vastart-defs-eflags.ll. NFC. - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323596 91177308-0d34-0410-b5e6-96231b3b80d8 - - Regenerate test result for testb-je-fusion.ll. NFC. - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323595 91177308-0d34-0410-b5e6-96231b3b80d8 - - [X86] Avoid using high register trick for test instruction - - Summary: - It seems it's main effect is to create addition copies when values are inr register that do not support this trick, which increase register pressure and makes the code bigger. - - Reviewers: craig.topper, niravd, spatel, hfinkel - - Subscribers: llvm-commits - - Differential Revision: https://reviews.llvm.org/D42646 - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323888 91177308-0d34-0410-b5e6-96231b3b80d8 - - Add a regression test for problems caused by D42646 . NFC - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323868 91177308-0d34-0410-b5e6-96231b3b80d8 - - Add test case for truncated and promotion to test. NFC - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323663 91177308-0d34-0410-b5e6-96231b3b80d8 - - [X86] Add test case to ensure testw is generated when optimizing for size. NFC - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323687 91177308-0d34-0410-b5e6-96231b3b80d8 - - [X86] Generate testl instruction through truncates. - - Summary: - This was introduced in D42646 but ended up being reverted because the original implementation was buggy. - - Depends on D42646 - - Reviewers: craig.topper, niravd, spatel, hfinkel - - Subscribers: llvm-commits - - Differential Revision: https://reviews.llvm.org/D42741 - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323899 91177308-0d34-0410-b5e6-96231b3b80d8 - - [X86] Don't look for TEST instruction shrinking opportunities when the root node is a X86ISD::SUB. - - I don't believe we ever create an X86ISD::SUB with a 0 constant which is what the TEST handling needs. The ternary operator at the end of this code shows up as only going one way in the llvm-cov report from the bots. - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324865 91177308-0d34-0410-b5e6-96231b3b80d8 - - [X86] Teach LowerBUILD_VECTOR to recognize pair-wise splats of 32-bit elements and use a 64-bit broadcast - - If we are splatting pairs of 32-bit elements, we can use a 64-bit broadcast to get the job done. - - We could probably could probably do this with other sizes too, for example four 16-bit elements. Or we could broadcast pairs of 16-bit elements using a 32-bit element broadcast. But I've left that as a future improvement. - - I've also restricted this to AVX2 only because we can only broadcast loads under AVX. - - Differential Revision: https://reviews.llvm.org/D42086 - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@322730 91177308-0d34-0410-b5e6-96231b3b80d8 - - [DAG, X86] Revert r327197 "Revert r327170, r327171, r327172" - - Reland ISel cycle checking improvements after simplifying node id - invariant traversal and correcting typo. - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@327898 91177308-0d34-0410-b5e6-96231b3b80d8 - - [ Modified for cherry-pick: Dropped Hexagon and SystemZ changes" - - [DAG, X86] Fix ISel-time node insertion ids - - As in SystemZ backend, correctly propagate node ids when inserting new - unselected nodes into the DAG during instruction Seleciton for X86 - target. - - Fixes PR36865. - - Reviewers: jyknight, craig.topper - - Subscribers: hiraditya, llvm-commits - - Differential Revision: https://reviews.llvm.org/D44797 - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@328233 91177308-0d34-0410-b5e6-96231b3b80d8 - - [DAG] Fix node id invalidation in Instruction Selection. - - Invalidation should be bit negation. Add missing negation. - - git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@328287 91177308-0d34-0410-b5e6-96231b3b80d8 - - Remove failing tests - - This removes tests that are failing due to codegen differences, - after the latest set of backports. Fixing thse for the backport - branch does not seem worth it. - -diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h -index de6849a1eae..e56eafc437c 100644 ---- a/include/llvm/CodeGen/SelectionDAGISel.h -+++ b/include/llvm/CodeGen/SelectionDAGISel.h -@@ -110,6 +110,11 @@ public: - CodeGenOpt::Level OptLevel, - bool IgnoreChains = false); - -+ static void InvalidateNodeId(SDNode *N); -+ static int getUninvalidatedNodeId(SDNode *N); -+ -+ static void EnforceNodeIdInvariant(SDNode *N); -+ - // Opcodes used by the DAG state machine: - enum BuiltinOpcodes { - OPC_Scope, -@@ -199,23 +204,28 @@ protected: - /// of the new node T. - void ReplaceUses(SDValue F, SDValue T) { - CurDAG->ReplaceAllUsesOfValueWith(F, T); -+ EnforceNodeIdInvariant(T.getNode()); - } - - /// ReplaceUses - replace all uses of the old nodes F with the use - /// of the new nodes T. - void ReplaceUses(const SDValue *F, const SDValue *T, unsigned Num) { - CurDAG->ReplaceAllUsesOfValuesWith(F, T, Num); -+ for (unsigned i = 0; i < Num; ++i) -+ EnforceNodeIdInvariant(T[i].getNode()); - } - - /// ReplaceUses - replace all uses of the old node F with the use - /// of the new node T. - void ReplaceUses(SDNode *F, SDNode *T) { - CurDAG->ReplaceAllUsesWith(F, T); -+ EnforceNodeIdInvariant(T); - } - - /// Replace all uses of \c F with \c T, then remove \c F from the DAG. - void ReplaceNode(SDNode *F, SDNode *T) { - CurDAG->ReplaceAllUsesWith(F, T); -+ EnforceNodeIdInvariant(T); - CurDAG->RemoveDeadNode(F); - } - -diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h -index 522c2f1b2cb..2d974234abf 100644 ---- a/include/llvm/CodeGen/SelectionDAGNodes.h -+++ b/include/llvm/CodeGen/SelectionDAGNodes.h -@@ -796,16 +796,44 @@ public: - /// searches to be performed in parallel, caching of results across - /// queries and incremental addition to Worklist. Stops early if N is - /// found but will resume. Remember to clear Visited and Worklists -- /// if DAG changes. -+ /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before -+ /// giving up. The TopologicalPrune flag signals that positive NodeIds are -+ /// topologically ordered (Operands have strictly smaller node id) and search -+ /// can be pruned leveraging this. - static bool hasPredecessorHelper(const SDNode *N, - SmallPtrSetImpl &Visited, - SmallVectorImpl &Worklist, -- unsigned int MaxSteps = 0) { -+ unsigned int MaxSteps = 0, -+ bool TopologicalPrune = false) { -+ SmallVector DeferredNodes; - if (Visited.count(N)) - return true; -+ -+ // Node Id's are assigned in three places: As a topological -+ // ordering (> 0), during legalization (results in values set to -+ // 0), new nodes (set to -1). If N has a topolgical id then we -+ // know that all nodes with ids smaller than it cannot be -+ // successors and we need not check them. Filter out all node -+ // that can't be matches. We add them to the worklist before exit -+ // in case of multiple calls. Note that during selection the topological id -+ // may be violated if a node's predecessor is selected before it. We mark -+ // this at selection negating the id of unselected successors and -+ // restricting topological pruning to positive ids. -+ -+ int NId = N->getNodeId(); -+ // If we Invalidated the Id, reconstruct original NId. -+ if (NId < -1) -+ NId = -(NId + 1); -+ -+ bool Found = false; - while (!Worklist.empty()) { - const SDNode *M = Worklist.pop_back_val(); -- bool Found = false; -+ int MId = M->getNodeId(); -+ if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) && -+ (MId > 0) && (MId < NId)) { -+ DeferredNodes.push_back(M); -+ continue; -+ } - for (const SDValue &OpV : M->op_values()) { - SDNode *Op = OpV.getNode(); - if (Visited.insert(Op).second) -@@ -814,11 +842,16 @@ public: - Found = true; - } - if (Found) -- return true; -+ break; - if (MaxSteps != 0 && Visited.size() >= MaxSteps) -- return false; -+ break; - } -- return false; -+ // Push deferred nodes back on worklist. -+ Worklist.append(DeferredNodes.begin(), DeferredNodes.end()); -+ // If we bailed early, conservatively return found. -+ if (MaxSteps != 0 && Visited.size() >= MaxSteps) -+ return true; -+ return Found; - } - - /// Return true if all the users of N are contained in Nodes. -diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp -index bd9fcfb5c1e..17e42240133 100644 ---- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp -+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp -@@ -937,6 +937,58 @@ public: - - } // end anonymous namespace - -+// This function is used to enforce the topological node id property -+// property leveraged during Instruction selection. Before selection all -+// nodes are given a non-negative id such that all nodes have a larger id than -+// their operands. As this holds transitively we can prune checks that a node N -+// is a predecessor of M another by not recursively checking through M's -+// operands if N's ID is larger than M's ID. This is significantly improves -+// performance of for various legality checks (e.g. IsLegalToFold / -+// UpdateChains). -+ -+// However, when we fuse multiple nodes into a single node -+// during selection we may induce a predecessor relationship between inputs and -+// outputs of distinct nodes being merged violating the topological property. -+// Should a fused node have a successor which has yet to be selected, our -+// legality checks would be incorrect. To avoid this we mark all unselected -+// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x => -+// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M. -+// We use bit-negation to more clearly enforce that node id -1 can only be -+// achieved by selected nodes). As the conversion is reversable the original Id, -+// topological pruning can still be leveraged when looking for unselected nodes. -+// This method is call internally in all ISel replacement calls. -+void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { -+ SmallVector Nodes; -+ Nodes.push_back(Node); -+ -+ while (!Nodes.empty()) { -+ SDNode *N = Nodes.pop_back_val(); -+ for (auto *U : N->uses()) { -+ auto UId = U->getNodeId(); -+ if (UId > 0) { -+ InvalidateNodeId(U); -+ Nodes.push_back(U); -+ } -+ } -+ } -+} -+ -+// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a -+// NodeId with the equivalent node id which is invalid for topological -+// pruning. -+void SelectionDAGISel::InvalidateNodeId(SDNode *N) { -+ int InvalidId = -(N->getNodeId() + 1); -+ N->setNodeId(InvalidId); -+} -+ -+// getUninvalidatedNodeId - get original uninvalidated node id. -+int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) { -+ int Id = N->getNodeId(); -+ if (Id < -1) -+ return -(Id + 1); -+ return Id; -+} -+ - void SelectionDAGISel::DoInstructionSelection() { - DEBUG(dbgs() << "===== Instruction selection begins: " - << printMBBReference(*FuncInfo->MBB) << " '" -@@ -972,6 +1024,33 @@ void SelectionDAGISel::DoInstructionSelection() { - if (Node->use_empty()) - continue; - -+#ifndef NDEBUG -+ SmallVector Nodes; -+ Nodes.push_back(Node); -+ -+ while (!Nodes.empty()) { -+ auto N = Nodes.pop_back_val(); -+ if (N->getOpcode() == ISD::TokenFactor || N->getNodeId() < 0) -+ continue; -+ for (const SDValue &Op : N->op_values()) { -+ if (Op->getOpcode() == ISD::TokenFactor) -+ Nodes.push_back(Op.getNode()); -+ else { -+ // We rely on topological ordering of node ids for checking for -+ // cycles when fusing nodes during selection. All unselected nodes -+ // successors of an already selected node should have a negative id. -+ // This assertion will catch such cases. If this assertion triggers -+ // it is likely you using DAG-level Value/Node replacement functions -+ // (versus equivalent ISEL replacement) in backend-specific -+ // selections. See comment in EnforceNodeIdInvariant for more -+ // details. -+ assert(Op->getNodeId() != -1 && -+ "Node has already selected predecessor node"); -+ } -+ } -+ } -+#endif -+ - // When we are using non-default rounding modes or FP exception behavior - // FP operations are represented by StrictFP pseudo-operations. They - // need to be simplified here so that the target-specific instruction -@@ -2134,52 +2213,44 @@ static SDNode *findGlueUse(SDNode *N) { - return nullptr; - } - --/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". --/// This function iteratively traverses up the operand chain, ignoring --/// certain nodes. --static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, -- SDNode *Root, SmallPtrSetImpl &Visited, -+/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path -+/// beyond "ImmedUse". We may ignore chains as they are checked separately. -+static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, - bool IgnoreChains) { -- // The NodeID's are given uniques ID's where a node ID is guaranteed to be -- // greater than all of its (recursive) operands. If we scan to a point where -- // 'use' is smaller than the node we're scanning for, then we know we will -- // never find it. -- // -- // The Use may be -1 (unassigned) if it is a newly allocated node. This can -- // happen because we scan down to newly selected nodes in the case of glue -- // uses. -- std::vector WorkList; -- WorkList.push_back(Use); -- -- while (!WorkList.empty()) { -- Use = WorkList.back(); -- WorkList.pop_back(); -- if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1) -- continue; -+ SmallPtrSet Visited; -+ SmallVector WorkList; -+ // Only check if we have non-immediate uses of Def. -+ if (ImmedUse->isOnlyUserOf(Def)) -+ return false; - -- // Don't revisit nodes if we already scanned it and didn't fail, we know we -- // won't fail if we scan it again. -- if (!Visited.insert(Use).second) -+ // We don't care about paths to Def that go through ImmedUse so mark it -+ // visited and mark non-def operands as used. -+ Visited.insert(ImmedUse); -+ for (const SDValue &Op : ImmedUse->op_values()) { -+ SDNode *N = Op.getNode(); -+ // Ignore chain deps (they are validated by -+ // HandleMergeInputChains) and immediate uses -+ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) - continue; -+ if (!Visited.insert(N).second) -+ continue; -+ WorkList.push_back(N); -+ } - -- for (const SDValue &Op : Use->op_values()) { -- // Ignore chain uses, they are validated by HandleMergeInputChains. -- if (Op.getValueType() == MVT::Other && IgnoreChains) -- continue; -- -+ // Initialize worklist to operands of Root. -+ if (Root != ImmedUse) { -+ for (const SDValue &Op : Root->op_values()) { - SDNode *N = Op.getNode(); -- if (N == Def) { -- if (Use == ImmedUse || Use == Root) -- continue; // We are not looking for immediate use. -- assert(N != Root); -- return true; -- } -- -- // Traverse up the operand chain. -+ // Ignore chains (they are validated by HandleMergeInputChains) -+ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) -+ continue; -+ if (!Visited.insert(N).second) -+ continue; - WorkList.push_back(N); - } - } -- return false; -+ -+ return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true); - } - - /// IsProfitableToFold - Returns true if it's profitable to fold the specific -@@ -2251,13 +2322,12 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, - - // If our query node has a glue result with a use, we've walked up it. If - // the user (which has already been selected) has a chain or indirectly uses -- // the chain, our WalkChainUsers predicate will not consider it. Because of -+ // the chain, HandleMergeInputChains will not consider it. Because of - // this, we cannot ignore chains in this predicate. - IgnoreChains = false; - } - -- SmallPtrSet Visited; -- return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); -+ return !findNonImmUse(Root, N.getNode(), U, IgnoreChains); - } - - void SelectionDAGISel::Select_INLINEASM(SDNode *N) { -@@ -2360,7 +2430,8 @@ void SelectionDAGISel::UpdateChains( - std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N, - static_cast(nullptr)); - }); -- CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); -+ if (ChainNode->getOpcode() != ISD::TokenFactor) -+ ReplaceUses(ChainVal, InputChain); - - // If the node became dead and we haven't already seen it, delete it. - if (ChainNode != NodeToMatch && ChainNode->use_empty() && -@@ -2375,143 +2446,6 @@ void SelectionDAGISel::UpdateChains( - DEBUG(dbgs() << "ISEL: Match complete!\n"); - } - --enum ChainResult { -- CR_Simple, -- CR_InducesCycle, -- CR_LeadsToInteriorNode --}; -- --/// WalkChainUsers - Walk down the users of the specified chained node that is --/// part of the pattern we're matching, looking at all of the users we find. --/// This determines whether something is an interior node, whether we have a --/// non-pattern node in between two pattern nodes (which prevent folding because --/// it would induce a cycle) and whether we have a TokenFactor node sandwiched --/// between pattern nodes (in which case the TF becomes part of the pattern). --/// --/// The walk we do here is guaranteed to be small because we quickly get down to --/// already selected nodes "below" us. --static ChainResult --WalkChainUsers(const SDNode *ChainedNode, -- SmallVectorImpl &ChainedNodesInPattern, -- DenseMap &TokenFactorResult, -- SmallVectorImpl &InteriorChainedNodes) { -- ChainResult Result = CR_Simple; -- -- for (SDNode::use_iterator UI = ChainedNode->use_begin(), -- E = ChainedNode->use_end(); UI != E; ++UI) { -- // Make sure the use is of the chain, not some other value we produce. -- if (UI.getUse().getValueType() != MVT::Other) continue; -- -- SDNode *User = *UI; -- -- if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph. -- continue; -- -- // If we see an already-selected machine node, then we've gone beyond the -- // pattern that we're selecting down into the already selected chunk of the -- // DAG. -- unsigned UserOpcode = User->getOpcode(); -- if (User->isMachineOpcode() || -- UserOpcode == ISD::CopyToReg || -- UserOpcode == ISD::CopyFromReg || -- UserOpcode == ISD::INLINEASM || -- UserOpcode == ISD::EH_LABEL || -- UserOpcode == ISD::LIFETIME_START || -- UserOpcode == ISD::LIFETIME_END) { -- // If their node ID got reset to -1 then they've already been selected. -- // Treat them like a MachineOpcode. -- if (User->getNodeId() == -1) -- continue; -- } -- -- // If we have a TokenFactor, we handle it specially. -- if (User->getOpcode() != ISD::TokenFactor) { -- // If the node isn't a token factor and isn't part of our pattern, then it -- // must be a random chained node in between two nodes we're selecting. -- // This happens when we have something like: -- // x = load ptr -- // call -- // y = x+4 -- // store y -> ptr -- // Because we structurally match the load/store as a read/modify/write, -- // but the call is chained between them. We cannot fold in this case -- // because it would induce a cycle in the graph. -- if (!std::count(ChainedNodesInPattern.begin(), -- ChainedNodesInPattern.end(), User)) -- return CR_InducesCycle; -- -- // Otherwise we found a node that is part of our pattern. For example in: -- // x = load ptr -- // y = x+4 -- // store y -> ptr -- // This would happen when we're scanning down from the load and see the -- // store as a user. Record that there is a use of ChainedNode that is -- // part of the pattern and keep scanning uses. -- Result = CR_LeadsToInteriorNode; -- InteriorChainedNodes.push_back(User); -- continue; -- } -- -- // If we found a TokenFactor, there are two cases to consider: first if the -- // TokenFactor is just hanging "below" the pattern we're matching (i.e. no -- // uses of the TF are in our pattern) we just want to ignore it. Second, -- // the TokenFactor can be sandwiched in between two chained nodes, like so: -- // [Load chain] -- // ^ -- // | -- // [Load] -- // ^ ^ -- // | \ DAG's like cheese -- // / \ do you? -- // / | -- // [TokenFactor] [Op] -- // ^ ^ -- // | | -- // \ / -- // \ / -- // [Store] -- // -- // In this case, the TokenFactor becomes part of our match and we rewrite it -- // as a new TokenFactor. -- // -- // To distinguish these two cases, do a recursive walk down the uses. -- auto MemoizeResult = TokenFactorResult.find(User); -- bool Visited = MemoizeResult != TokenFactorResult.end(); -- // Recursively walk chain users only if the result is not memoized. -- if (!Visited) { -- auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult, -- InteriorChainedNodes); -- MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first; -- } -- switch (MemoizeResult->second) { -- case CR_Simple: -- // If the uses of the TokenFactor are just already-selected nodes, ignore -- // it, it is "below" our pattern. -- continue; -- case CR_InducesCycle: -- // If the uses of the TokenFactor lead to nodes that are not part of our -- // pattern that are not selected, folding would turn this into a cycle, -- // bail out now. -- return CR_InducesCycle; -- case CR_LeadsToInteriorNode: -- break; // Otherwise, keep processing. -- } -- -- // Okay, we know we're in the interesting interior case. The TokenFactor -- // is now going to be considered part of the pattern so that we rewrite its -- // uses (it may have uses that are not part of the pattern) with the -- // ultimate chain result of the generated code. We will also add its chain -- // inputs as inputs to the ultimate TokenFactor we create. -- Result = CR_LeadsToInteriorNode; -- if (!Visited) { -- ChainedNodesInPattern.push_back(User); -- InteriorChainedNodes.push_back(User); -- } -- } -- -- return Result; --} -- - /// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains - /// operation for when the pattern matched at least one node with a chains. The - /// input vector contains a list of all of the chained nodes that we match. We -@@ -2521,47 +2455,56 @@ WalkChainUsers(const SDNode *ChainedNode, - static SDValue - HandleMergeInputChains(SmallVectorImpl &ChainNodesMatched, - SelectionDAG *CurDAG) { -- // Used for memoization. Without it WalkChainUsers could take exponential -- // time to run. -- DenseMap TokenFactorResult; -- // Walk all of the chained nodes we've matched, recursively scanning down the -- // users of the chain result. This adds any TokenFactor nodes that are caught -- // in between chained nodes to the chained and interior nodes list. -- SmallVector InteriorChainedNodes; -- for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { -- if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, -- TokenFactorResult, -- InteriorChainedNodes) == CR_InducesCycle) -- return SDValue(); // Would induce a cycle. -- } - -- // Okay, we have walked all the matched nodes and collected TokenFactor nodes -- // that we are interested in. Form our input TokenFactor node. -+ SmallPtrSet Visited; -+ SmallVector Worklist; - SmallVector InputChains; -- for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { -- // Add the input chain of this node to the InputChains list (which will be -- // the operands of the generated TokenFactor) if it's not an interior node. -- SDNode *N = ChainNodesMatched[i]; -- if (N->getOpcode() != ISD::TokenFactor) { -- if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) -- continue; -+ unsigned int Max = 8192; - -- // Otherwise, add the input chain. -- SDValue InChain = ChainNodesMatched[i]->getOperand(0); -- assert(InChain.getValueType() == MVT::Other && "Not a chain"); -- InputChains.push_back(InChain); -- continue; -- } -+ // Quick exit on trivial merge. -+ if (ChainNodesMatched.size() == 1) -+ return ChainNodesMatched[0]->getOperand(0); - -- // If we have a token factor, we want to add all inputs of the token factor -- // that are not part of the pattern we're matching. -- for (const SDValue &Op : N->op_values()) { -- if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), -- Op.getNode())) -- InputChains.push_back(Op); -- } -+ // Add chains that aren't already added (internal). Peek through -+ // token factors. -+ std::function AddChains = [&](const SDValue V) { -+ if (V.getValueType() != MVT::Other) -+ return; -+ if (V->getOpcode() == ISD::EntryToken) -+ return; -+ if (!Visited.insert(V.getNode()).second) -+ return; -+ if (V->getOpcode() == ISD::TokenFactor) { -+ for (const SDValue &Op : V->op_values()) -+ AddChains(Op); -+ } else -+ InputChains.push_back(V); -+ }; -+ -+ for (auto *N : ChainNodesMatched) { -+ Worklist.push_back(N); -+ Visited.insert(N); - } - -+ while (!Worklist.empty()) -+ AddChains(Worklist.pop_back_val()->getOperand(0)); -+ -+ // Skip the search if there are no chain dependencies. -+ if (InputChains.size() == 0) -+ return CurDAG->getEntryNode(); -+ -+ // If one of these chains is a successor of input, we must have a -+ // node that is both the predecessor and successor of the -+ // to-be-merged nodes. Fail. -+ Visited.clear(); -+ for (SDValue V : InputChains) -+ Worklist.push_back(V.getNode()); -+ -+ for (auto *N : ChainNodesMatched) -+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true)) -+ return SDValue(); -+ -+ // Return merged chain. - if (InputChains.size() == 1) - return InputChains[0]; - return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), -@@ -2606,8 +2549,8 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, - // Move the glue if needed. - if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && - (unsigned)OldGlueResultNo != ResNumResults-1) -- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), -- SDValue(Res, ResNumResults-1)); -+ ReplaceUses(SDValue(Node, OldGlueResultNo), -+ SDValue(Res, ResNumResults - 1)); - - if ((EmitNodeInfo & OPFL_GlueOutput) != 0) - --ResNumResults; -@@ -2615,14 +2558,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, - // Move the chain reference if needed. - if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && - (unsigned)OldChainResultNo != ResNumResults-1) -- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), -- SDValue(Res, ResNumResults-1)); -+ ReplaceUses(SDValue(Node, OldChainResultNo), -+ SDValue(Res, ResNumResults - 1)); - - // Otherwise, no replacement happened because the node already exists. Replace - // Uses of the old node with the new one. - if (Res != Node) { -- CurDAG->ReplaceAllUsesWith(Node, Res); -- CurDAG->RemoveDeadNode(Node); -+ ReplaceNode(Node, Res); -+ } else { -+ EnforceNodeIdInvariant(Res); - } - - return Res; -@@ -2939,8 +2883,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, - return; - case ISD::AssertSext: - case ISD::AssertZext: -- CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), -- NodeToMatch->getOperand(0)); -+ ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); - CurDAG->RemoveDeadNode(NodeToMatch); - return; - case ISD::INLINEASM: -@@ -3702,7 +3645,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, - NodeToMatch->getValueType(i).getSizeInBits() == - Res.getValueSizeInBits()) && - "invalid replacement"); -- CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); -+ ReplaceUses(SDValue(NodeToMatch, i), Res); - } - - // Update chain uses. -@@ -3715,8 +3658,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, - if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) == - MVT::Glue && - InputGlue.getNode()) -- CurDAG->ReplaceAllUsesOfValueWith( -- SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue); -+ ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), -+ InputGlue); - - assert(NodeToMatch->use_empty() && - "Didn't replace all uses of the node?"); -diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp -index f4776adb069..be5345e422d 100644 ---- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp -+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp -@@ -759,12 +759,11 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { - - if (ProduceCarry) { - // Replace the carry-use -- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); -+ ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); - } - - // Replace the remaining uses. -- CurDAG->ReplaceAllUsesWith(N, RegSequence); -- CurDAG->RemoveDeadNode(N); -+ ReplaceNode(N, RegSequence); - } - - void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { -diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp -index 8d32510e200..0f504718f28 100644 ---- a/lib/Target/ARM/ARMISelDAGToDAG.cpp -+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp -@@ -498,7 +498,7 @@ bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, - - void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { - CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); -- CurDAG->ReplaceAllUsesWith(N, M); -+ ReplaceUses(N, M); - } - - bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, -diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp -index a6ac4e3df74..3721856ff45 100644 ---- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp -+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp -@@ -777,7 +777,7 @@ void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) { - return; - } - -- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N,0), N->getOperand(0)); -+ ReplaceUses(SDValue(N, 0), N->getOperand(0)); - CurDAG->RemoveDeadNode(N); - } - -@@ -2182,4 +2182,3 @@ void HexagonDAGToDAGISel::rebalanceAddressTrees() { - RootHeights.clear(); - RootWeights.clear(); - } -- -diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp -index f08c5054065..0608f06ef7e 100644 ---- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp -+++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp -@@ -1914,7 +1914,6 @@ void HvxSelector::selectShuffle(SDNode *N) { - // If the mask is all -1's, generate "undef". - if (!UseLeft && !UseRight) { - ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode()); -- DAG.RemoveDeadNode(N); - return; - } - -@@ -1970,7 +1969,6 @@ void HvxSelector::selectRor(SDNode *N) { - NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV}); - - ISel.ReplaceNode(N, NewN); -- DAG.RemoveDeadNode(N); - } - - void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) { -@@ -2017,8 +2015,7 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { - MemOp[0] = cast(N)->getMemOperand(); - cast(Result)->setMemRefs(MemOp, MemOp + 1); - -- ReplaceUses(N, Result); -- CurDAG->RemoveDeadNode(N); -+ ReplaceNode(N, Result); - } - - void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { -@@ -2056,8 +2053,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { - MemOp[0] = cast(N)->getMemOperand(); - cast(Result)->setMemRefs(MemOp, MemOp + 1); - -- ReplaceUses(N, Result); -- CurDAG->RemoveDeadNode(N); -+ ReplaceNode(N, Result); - } - - void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { -@@ -2100,5 +2096,3 @@ void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { - ReplaceUses(SDValue(N, 1), SDValue(Result, 1)); - CurDAG->RemoveDeadNode(N); - } -- -- -diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp -index ce6f3d37f5c..fe59d820c88 100644 ---- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp -+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp -@@ -589,10 +589,16 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr, - // The selection DAG must no longer depend on their uniqueness when this - // function is used. - static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) { -- if (N.getNode()->getNodeId() == -1 || -- N.getNode()->getNodeId() > Pos->getNodeId()) { -+ if (N->getNodeId() == -1 || -+ (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > -+ SelectionDAGISel::getUninvalidatedNodeId(Pos))) { - DAG->RepositionNode(Pos->getIterator(), N.getNode()); -- N.getNode()->setNodeId(Pos->getNodeId()); -+ // Mark Node as invalid for pruning as after this it may be a successor to a -+ // selected node but otherwise be in the same position of Pos. -+ // Conservatively mark it with the same -abs(Id) to assure node id -+ // invariant is preserved. -+ N->setNodeId(Pos->getNodeId()); -+ SelectionDAGISel::InvalidateNodeId(N.getNode()); - } - } - -@@ -1022,8 +1028,7 @@ bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { - }; - SDValue New = convertTo( - DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops), 0)); -- ReplaceUses(N, New.getNode()); -- CurDAG->RemoveDeadNode(N); -+ ReplaceNode(N, New.getNode()); - return true; - } - -@@ -1114,8 +1119,7 @@ void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, - SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT); - SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower); - -- ReplaceUses(Node, Or.getNode()); -- CurDAG->RemoveDeadNode(Node); -+ ReplaceNode(Node, Or.getNode()); - - SelectCode(Or.getNode()); - } -diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp -index d79fd0ca4da..ee2d221e31c 100644 ---- a/lib/Target/X86/X86ISelDAGToDAG.cpp -+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp -@@ -988,10 +988,16 @@ bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM, - // IDs! The selection DAG must no longer depend on their uniqueness when this - // is used. - static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { -- if (N.getNode()->getNodeId() == -1 || -- N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) { -- DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode()); -- N.getNode()->setNodeId(Pos.getNode()->getNodeId()); -+ if (N->getNodeId() == -1 || -+ (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > -+ SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) { -+ DAG.RepositionNode(Pos->getIterator(), N.getNode()); -+ // Mark Node as invalid for pruning as after this it may be a successor to a -+ // selected node but otherwise be in the same position of Pos. -+ // Conservatively mark it with the same -abs(Id) to assure node id -+ // invariant is preserved. -+ N->setNodeId(Pos->getNodeId()); -+ SelectionDAGISel::InvalidateNodeId(N.getNode()); - } - } - -@@ -2092,50 +2098,84 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, - LoadNode->getOffset() != StoreNode->getOffset()) - return false; - -- // Check if the chain is produced by the load or is a TokenFactor with -- // the load output chain as an operand. Return InputChain by reference. -+ bool FoundLoad = false; -+ SmallVector ChainOps; -+ SmallVector LoopWorklist; -+ SmallPtrSet Visited; -+ const unsigned int Max = 1024; -+ -+ // Visualization of Load-Op-Store fusion: -+ // ------------------------- -+ // Legend: -+ // *-lines = Chain operand dependencies. -+ // |-lines = Normal operand dependencies. -+ // Dependencies flow down and right. n-suffix references multiple nodes. -+ // -+ // C Xn C -+ // * * * -+ // * * * -+ // Xn A-LD Yn TF Yn -+ // * * \ | * | -+ // * * \ | * | -+ // * * \ | => A--LD_OP_ST -+ // * * \| \ -+ // TF OP \ -+ // * | \ Zn -+ // * | \ -+ // A-ST Zn -+ // -+ -+ // This merge induced dependences from: #1: Xn -> LD, OP, Zn -+ // #2: Yn -> LD -+ // #3: ST -> Zn -+ -+ // Ensure the transform is safe by checking for the dual -+ // dependencies to make sure we do not induce a loop. -+ -+ // As LD is a predecessor to both OP and ST we can do this by checking: -+ // a). if LD is a predecessor to a member of Xn or Yn. -+ // b). if a Zn is a predecessor to ST. -+ -+ // However, (b) can only occur through being a chain predecessor to -+ // ST, which is the same as Zn being a member or predecessor of Xn, -+ // which is a subset of LD being a predecessor of Xn. So it's -+ // subsumed by check (a). -+ - SDValue Chain = StoreNode->getChain(); - -- bool ChainCheck = false; -+ // Gather X elements in ChainOps. - if (Chain == Load.getValue(1)) { -- ChainCheck = true; -- InputChain = LoadNode->getChain(); -+ FoundLoad = true; -+ ChainOps.push_back(Load.getOperand(0)); - } else if (Chain.getOpcode() == ISD::TokenFactor) { -- SmallVector ChainOps; - for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { - SDValue Op = Chain.getOperand(i); - if (Op == Load.getValue(1)) { -- ChainCheck = true; -+ FoundLoad = true; - // Drop Load, but keep its chain. No cycle check necessary. - ChainOps.push_back(Load.getOperand(0)); - continue; - } -- -- // Make sure using Op as part of the chain would not cause a cycle here. -- // In theory, we could check whether the chain node is a predecessor of -- // the load. But that can be very expensive. Instead visit the uses and -- // make sure they all have smaller node id than the load. -- int LoadId = LoadNode->getNodeId(); -- for (SDNode::use_iterator UI = Op.getNode()->use_begin(), -- UE = UI->use_end(); UI != UE; ++UI) { -- if (UI.getUse().getResNo() != 0) -- continue; -- if (UI->getNodeId() > LoadId) -- return false; -- } -- -+ LoopWorklist.push_back(Op.getNode()); - ChainOps.push_back(Op); - } -- -- if (ChainCheck) -- // Make a new TokenFactor with all the other input chains except -- // for the load. -- InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), -- MVT::Other, ChainOps); - } -- if (!ChainCheck) -+ -+ if (!FoundLoad) -+ return false; -+ -+ // Worklist is currently Xn. Add Yn to worklist. -+ for (SDValue Op : StoredVal->ops()) -+ if (Op.getNode() != LoadNode) -+ LoopWorklist.push_back(Op.getNode()); -+ -+ // Check (a) if Load is a predecessor to Xn + Yn -+ if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max, -+ true)) - return false; - -+ InputChain = -+ CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps); - return true; - } - -@@ -2335,6 +2375,8 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { - MemOp[1] = LoadNode->getMemOperand(); - Result->setMemRefs(MemOp, MemOp + 2); - -+ // Update Load Chain uses as well. -+ ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1)); - ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); - ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); - CurDAG->RemoveDeadNode(Node); -@@ -2946,12 +2988,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { - return; - } - -- case X86ISD::CMP: -- case X86ISD::SUB: { -- // Sometimes a SUB is used to perform comparison. -- if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0)) -- // This node is not a CMP. -- break; -+ case X86ISD::CMP: { - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - -@@ -2971,95 +3008,52 @@ void X86DAGToDAGISel::Select(SDNode *Node) { - if (!C) break; - uint64_t Mask = C->getZExtValue(); - -- // For example, convert "testl %eax, $8" to "testb %al, $8" -+ MVT VT; -+ int SubRegOp; -+ unsigned Op; -+ - if (isUInt<8>(Mask) && - (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) { -- SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i8); -- SDValue Reg = N0.getOperand(0); -- -- // Extract the l-register. -- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, -- MVT::i8, Reg); -- -- // Emit a testb. -- SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, -- Subreg, Imm); -- // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has -- // one, do not call ReplaceAllUsesWith. -- ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), -- SDValue(NewNode, 0)); -- CurDAG->RemoveDeadNode(Node); -- return; -+ // For example, convert "testl %eax, $8" to "testb %al, $8" -+ VT = MVT::i8; -+ SubRegOp = X86::sub_8bit; -+ Op = X86::TEST8ri; -+ } else if (OptForMinSize && isUInt<16>(Mask) && -+ (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { -+ // For example, "testl %eax, $32776" to "testw %ax, $32776". -+ // NOTE: We only want to form TESTW instructions if optimizing for -+ // min size. Otherwise we only save one byte and possibly get a length -+ // changing prefix penalty in the decoders. -+ VT = MVT::i16; -+ SubRegOp = X86::sub_16bit; -+ Op = X86::TEST16ri; -+ } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 && -+ (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) { -+ // For example, "testq %rax, $268468232" to "testl %eax, $268468232". -+ // NOTE: We only want to run that transform if N0 is 32 or 64 bits. -+ // Otherwize, we find ourselves in a position where we have to do -+ // promotion. If previous passes did not promote the and, we assume -+ // they had a good reason not to and do not promote here. -+ VT = MVT::i32; -+ SubRegOp = X86::sub_32bit; -+ Op = X86::TEST32ri; -+ } else { -+ // No eligible transformation was found. -+ break; - } - -- // For example, "testl %eax, $2048" to "testb %ah, $8". -- if (isShiftedUInt<8, 8>(Mask) && -- (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { -- // Shift the immediate right by 8 bits. -- SDValue ShiftedImm = CurDAG->getTargetConstant(Mask >> 8, dl, MVT::i8); -- SDValue Reg = N0.getOperand(0); -- -- // Extract the h-register. -- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, -- MVT::i8, Reg); -- -- // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only -- // target GR8_NOREX registers, so make sure the register class is -- // forced. -- SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, -- MVT::i32, Subreg, ShiftedImm); -- // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has -- // one, do not call ReplaceAllUsesWith. -- ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), -- SDValue(NewNode, 0)); -- CurDAG->RemoveDeadNode(Node); -- return; -- } -+ SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT); -+ SDValue Reg = N0.getOperand(0); - -- // For example, "testl %eax, $32776" to "testw %ax, $32776". -- // NOTE: We only want to form TESTW instructions if optimizing for -- // min size. Otherwise we only save one byte and possibly get a length -- // changing prefix penalty in the decoders. -- if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 && -- (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { -- SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16); -- SDValue Reg = N0.getOperand(0); -- -- // Extract the 16-bit subregister. -- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, -- MVT::i16, Reg); -- -- // Emit a testw. -- SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, -- Subreg, Imm); -- // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has -- // one, do not call ReplaceAllUsesWith. -- ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), -- SDValue(NewNode, 0)); -- CurDAG->RemoveDeadNode(Node); -- return; -- } -+ // Extract the subregister if necessary. -+ if (N0.getValueType() != VT) -+ Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg); - -- // For example, "testq %rax, $268468232" to "testl %eax, $268468232". -- if (isUInt<32>(Mask) && N0.getValueType() == MVT::i64 && -- (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) { -- SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i32); -- SDValue Reg = N0.getOperand(0); -- -- // Extract the 32-bit subregister. -- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, -- MVT::i32, Reg); -- -- // Emit a testl. -- SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, -- Subreg, Imm); -- // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has -- // one, do not call ReplaceAllUsesWith. -- ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), -- SDValue(NewNode, 0)); -- CurDAG->RemoveDeadNode(Node); -- return; -- } -+ // Emit a testl or testw. -+ SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm); -+ // Replace CMP with TEST. -+ ReplaceNode(Node, NewNode); -+ return; - } - break; - } -diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp -index c1ddb771e2f..86e71cba87b 100644 ---- a/lib/Target/X86/X86ISelLowering.cpp -+++ b/lib/Target/X86/X86ISelLowering.cpp -@@ -8131,6 +8131,32 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - return LD; - } - -+ // If this is a splat of pairs of 32-bit elements, we can use a narrower -+ // build_vector and broadcast it. -+ // TODO: We could probably generalize this more. -+ if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) { -+ SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), -+ DAG.getUNDEF(ExtVT), DAG.getUNDEF(ExtVT) }; -+ auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef Ops) { -+ // Make sure all the even/odd operands match. -+ for (unsigned i = 2; i != NumElems; ++i) -+ if (Ops[i % 2] != Op.getOperand(i)) -+ return false; -+ return true; -+ }; -+ if (CanSplat(Op, NumElems, Ops)) { -+ MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64; -+ MVT NarrowVT = MVT::getVectorVT(ExtVT, 4); -+ // Create a new build vector and cast to v2i64/v2f64. -+ SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2), -+ DAG.getBuildVector(NarrowVT, dl, Ops)); -+ // Broadcast from v2i64/v2f64 and cast to final VT. -+ MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems/2); -+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, -+ NewBV)); -+ } -+ } -+ - // For AVX-length vectors, build the individual 128-bit pieces and use - // shuffles to put them in place. - if (VT.is256BitVector() || VT.is512BitVector()) { -diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td -index 98cc8fb7439..3d5de637da2 100644 ---- a/lib/Target/X86/X86InstrArithmetic.td -+++ b/lib/Target/X86/X86InstrArithmetic.td -@@ -1257,14 +1257,6 @@ let isCompare = 1 in { - def TEST32mi : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>; - let Predicates = [In64BitMode] in - def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>; -- -- // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the -- // register class is constrained to GR8_NOREX. This pseudo is explicitly -- // marked side-effect free, since it doesn't have an isel pattern like -- // other test instructions. -- let isPseudo = 1, hasSideEffects = 0 in -- def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), -- "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; - } // Defs = [EFLAGS] - - def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL, -diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp -index 11ada51a870..84a9200a0ef 100644 ---- a/lib/Target/X86/X86InstrInfo.cpp -+++ b/lib/Target/X86/X86InstrInfo.cpp -@@ -7854,9 +7854,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { - case X86::VMOVUPSZ256mr_NOVLX: - return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr), - get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm); -- case X86::TEST8ri_NOREX: -- MI.setDesc(get(X86::TEST8ri)); -- return true; - case X86::MOV32ri64: - MI.setDesc(get(X86::MOV32ri)); - return true; -diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp -index 67d95c2233d..4e11397dec4 100644 ---- a/lib/Target/X86/X86MacroFusion.cpp -+++ b/lib/Target/X86/X86MacroFusion.cpp -@@ -86,7 +86,6 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, - case X86::TEST16mr: - case X86::TEST32mr: - case X86::TEST64mr: -- case X86::TEST8ri_NOREX: - case X86::AND16i16: - case X86::AND16ri: - case X86::AND16ri8: -diff --git a/test/CodeGen/SystemZ/pr36164.ll b/test/CodeGen/SystemZ/pr36164.ll -new file mode 100644 -index 00000000000..0c850091d31 ---- /dev/null -+++ b/test/CodeGen/SystemZ/pr36164.ll -@@ -0,0 +1,113 @@ -+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -+; RUN: llc %s -o - -mtriple=s390x-linux-gnu -mcpu=z13 -disable-basicaa | FileCheck %s -+ -+; This test checks that we do not a reference to a deleted node. -+ -+%0 = type { i32 } -+ -+@g_11 = external dso_local unnamed_addr global i1, align 4 -+@g_69 = external dso_local global i32, align 4 -+@g_73 = external dso_local unnamed_addr global i32, align 4 -+@g_832 = external dso_local constant %0, align 4 -+@g_938 = external dso_local unnamed_addr global i64, align 8 -+ -+; Function Attrs: nounwind -+define void @main() local_unnamed_addr #0 { -+; CHECK-LABEL: main: -+; CHECK: # %bb.0: -+; CHECK-NEXT: stmg %r12, %r15, 96(%r15) -+; CHECK-NEXT: .cfi_offset %r12, -64 -+; CHECK-NEXT: .cfi_offset %r13, -56 -+; CHECK-NEXT: .cfi_offset %r14, -48 -+; CHECK-NEXT: .cfi_offset %r15, -40 -+; CHECK-NEXT: lhi %r0, 1 -+; CHECK-NEXT: larl %r1, g_938 -+; CHECK-NEXT: lhi %r2, 2 -+; CHECK-NEXT: lhi %r3, 3 -+; CHECK-NEXT: lhi %r4, 0 -+; CHECK-NEXT: lhi %r5, 4 -+; CHECK-NEXT: larl %r14, g_11 -+; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 -+; CHECK-NEXT: strl %r0, g_73 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: strl %r0, g_69 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: lghi %r13, 24 -+; CHECK-NEXT: strl %r2, g_69 -+; CHECK-NEXT: ag %r13, 0(%r1) -+; CHECK-NEXT: lrl %r12, g_832 -+; CHECK-NEXT: strl %r3, g_69 -+; CHECK-NEXT: lrl %r12, g_832 -+; CHECK-NEXT: strl %r4, g_69 -+; CHECK-NEXT: lrl %r12, g_832 -+; CHECK-NEXT: strl %r0, g_69 -+; CHECK-NEXT: lrl %r12, g_832 -+; CHECK-NEXT: strl %r2, g_69 -+; CHECK-NEXT: lrl %r12, g_832 -+; CHECK-NEXT: strl %r3, g_69 -+; CHECK-NEXT: stgrl %r13, g_938 -+; CHECK-NEXT: lrl %r13, g_832 -+; CHECK-NEXT: strl %r5, g_69 -+; CHECK-NEXT: mvi 0(%r14), 1 -+; CHECK-NEXT: j .LBB0_1 -+ br label %1 -+ -+;