From a3ea521aec9f1be0694961a54213083dbdcbf8c0 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 30 Jan 2019 10:01:48 +0100 Subject: [PATCH 1/9] * New stable release. To be clear, this is the same as release 7.0.1 To be ABI compliant in Debian between 7 and 7.0.1, I took the pr39427-misscompile.diff. 7.1.0 is an official release for downstream users to bring back the ABI comptability. I am uploading this new version in the archive to: - avoid question like "why we don't have 7.1.0 in the archive?" - align with upstream - clearly show that we kept the ABI Upstream decided to rename the library from 7 to 7.0.1 As I kept the ABI, I reverted this patch (revert-change-soname.diff) More info on https://bugs.llvm.org/show_bug.cgi?id=39427 and https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=913271 --- debian/changelog | 18 +++++-- debian/orig-tar.sh | 2 +- debian/patches/revert-change-soname.diff | 65 ++++++++++++++++++++++++ debian/patches/series | 4 ++ 4 files changed, 85 insertions(+), 4 deletions(-) create mode 100644 debian/patches/revert-change-soname.diff diff --git a/debian/changelog b/debian/changelog index 4b46fe27..0ce89263 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,19 @@ -llvm-toolchain-7 (1:7.0.1~svn352582-1~exp1) UNRELEASED; urgency=medium +llvm-toolchain-7 (1:7.1.0~svn352582-1~exp1) UNRELEASED; urgency=medium - * experimental New snapshot release - * Remove pr39427-misscompile.diff as it has been applied upstream + * New stable release. + To be clear, this is the same as release 7.0.1 + To be ABI compliant in Debian between 7 and 7.0.1, + I took the pr39427-misscompile.diff. + 7.1.0 is an official release for downstream users to bring + back the ABI comptability. + I am uploading this new version in the archive to: + - avoid question like "why we don't have 7.1.0 in the archive?" + - align with upstream + - clearly show that we kept the ABI + Upstream decided to rename the library from 7 to 7.0.1 + As I kept the ABI, I reverted this patch (revert-change-soname.diff) + More info on https://bugs.llvm.org/show_bug.cgi?id=39427 + and https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=913271 -- Sylvestre Ledru Wed, 30 Jan 2019 08:53:14 +0100 diff --git a/debian/orig-tar.sh b/debian/orig-tar.sh index fa726547..7b7fc805 100755 --- a/debian/orig-tar.sh +++ b/debian/orig-tar.sh @@ -20,7 +20,7 @@ set -e SVN_BASE_URL=http://llvm.org/svn/llvm-project/ MAJOR_VERSION=7 -CURRENT_VERSION=7.0.1 # Should be changed to 3.5.1 later +CURRENT_VERSION=7.1.0 # Should be changed to 3.5.1 later if test -n "$1"; then # http://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/branches/google/stable/ diff --git a/debian/patches/revert-change-soname.diff b/debian/patches/revert-change-soname.diff new file mode 100644 index 00000000..24cca1ed --- /dev/null +++ b/debian/patches/revert-change-soname.diff @@ -0,0 +1,65 @@ +Index: tools/llvm-shlib/simple_version_script.map.in +=================================================================== +--- tools/llvm-shlib/simple_version_script.map.in (revision 352580) ++++ tools/llvm-shlib/simple_version_script.map.in (revision 352579) +@@ -1 +1 @@ +-LLVM_@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@ { global: *; }; ++LLVM_@LLVM_VERSION_MAJOR@ { global: *; }; +Index: tools/llvm-config/CMakeLists.txt +=================================================================== +--- tools/llvm-config/CMakeLists.txt (revision 352580) ++++ tools/llvm-config/CMakeLists.txt (revision 352579) +@@ -37,7 +37,7 @@ + set(LLVM_CXXFLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${COMPILE_FLAGS} ${LLVM_DEFINITIONS}") + set(LLVM_BUILD_SYSTEM cmake) + set(LLVM_HAS_RTTI ${LLVM_CONFIG_HAS_RTTI}) +-set(LLVM_DYLIB_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX}") ++set(LLVM_DYLIB_VERSION "${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}") + set(LLVM_HAS_GLOBAL_ISEL "ON") + + # Use the C++ link flags, since they should be a superset of C link flags. +Index: cmake/modules/AddLLVM.cmake +=================================================================== +--- cmake/modules/AddLLVM.cmake (revision 352580) ++++ cmake/modules/AddLLVM.cmake (revision 352579) +@@ -83,7 +83,7 @@ + # FIXME: Don't write the "local:" line on OpenBSD. + # in the export file, also add a linker script to version LLVM symbols (form: LLVM_N.M) + add_custom_command(OUTPUT ${native_export_file} +- COMMAND echo "LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} {" > ${native_export_file} ++ COMMAND echo "LLVM_${LLVM_VERSION_MAJOR} {" > ${native_export_file} + COMMAND grep -q "[[:alnum:]]" ${export_file} && echo " global:" >> ${native_export_file} || : + COMMAND sed -e "s/$/;/" -e "s/^/ /" < ${export_file} >> ${native_export_file} + COMMAND echo " local: *;" >> ${native_export_file} +@@ -500,7 +500,7 @@ + PROPERTIES + # Since 4.0.0, the ABI version is indicated by the major version + SOVERSION ${LLVM_VERSION_MAJOR} +- VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}) ++ VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}) + endif() + endif() + +@@ -522,7 +522,7 @@ + if(${output_name} STREQUAL "output_name-NOTFOUND") + set(output_name ${name}) + endif() +- set(library_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX}) ++ set(library_name ${output_name}-${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}) + set(api_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}) + set_target_properties(${name} PROPERTIES OUTPUT_NAME ${library_name}) + llvm_install_library_symlink(${api_name} ${library_name} SHARED +Index: docs/ReleaseNotes.rst +=================================================================== +--- docs/ReleaseNotes.rst (revision 352580) ++++ docs/ReleaseNotes.rst (revision 352579) +@@ -30,6 +30,9 @@ + is available on the Visual Studio Marketplace. The new integration + supports Visual Studio 2017. + ++* Libraries have been renamed from 7.0 to 7. This change also impacts ++ downstream libraries like lldb. ++ + * The LoopInstSimplify pass (``-loop-instsimplify``) has been removed. + + * Symbols starting with ``?`` are no longer mangled by LLVM when using the diff --git a/debian/patches/series b/debian/patches/series index be214d4e..b3ae4b64 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -134,3 +134,7 @@ D54409-powerpcspe-register-spilling.diff D54584-powerpcspe-double-parameter.diff D52340-rustc-debuginfo.diff +# Disable https://llvm.org/viewvc/llvm-project?view=revision&revision=352580 +# 7.0.1 was always abi compatible with 7.0 +# Therefor, the libraries are still compatible with the 7.1.0 version +revert-change-soname.diff From a5a2a2f386b857f6956b0769c18ff580fc8b8de2 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 7 Feb 2019 17:16:51 +0100 Subject: [PATCH 2/9] kfreebsd/kfreebsd-triple-clang.diff: update of the patch to fix the kfreebsd FTBFS (Closes: #921246) Many thanks to Svante Signell for the update --- debian/changelog | 9 +++++++ .../kfreebsd/kfreebsd-triple-clang.diff | 24 ++++++++++++++++--- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/debian/changelog b/debian/changelog index 0ed55629..47ddbb34 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,12 @@ +llvm-toolchain-7 (1:7.0.1-7) unstable; urgency=medium + + * kfreebsd/kfreebsd-triple-clang.diff: update of the patch to fix + the kfreebsd FTBFS (Closes: #921246) + Many thanks to Svante Signell for the update + + + -- + llvm-toolchain-7 (1:7.0.1-6) unstable; urgency=medium * Add support for kfreebsd (Closes: #921246) diff --git a/debian/patches/kfreebsd/kfreebsd-triple-clang.diff b/debian/patches/kfreebsd/kfreebsd-triple-clang.diff index 1462a1f9..37b29507 100644 --- a/debian/patches/kfreebsd/kfreebsd-triple-clang.diff +++ b/debian/patches/kfreebsd/kfreebsd-triple-clang.diff @@ -49,7 +49,25 @@ Index: llvm-toolchain-7-7.0.1/clang/lib/Driver/ToolChains/Gnu.cpp =================================================================== --- llvm-toolchain-7-7.0.1.orig/clang/lib/Driver/ToolChains/Gnu.cpp +++ llvm-toolchain-7-7.0.1/clang/lib/Driver/ToolChains/Gnu.cpp -@@ -499,6 +499,18 @@ void tools::gnutools::Linker::ConstructJ +@@ -234,6 +234,8 @@ static const char *getLDMOption(const ll + case llvm::Triple::x86: + if (T.isOSIAMCU()) + return "elf_iamcu"; ++ if (T.isOSkFreeBSD()) ++ return "elf_i386_fbsd"; + return "elf_i386"; + case llvm::Triple::aarch64: + return "aarch64linux"; +@@ -277,6 +279,8 @@ static const char *getLDMOption(const ll + case llvm::Triple::x86_64: + if (T.getEnvironment() == llvm::Triple::GNUX32) + return "elf32_x86_64"; ++ if (T.isOSkFreeBSD()) ++ return "elf_x86_64_fbsd"; + return "elf_x86_64"; + default: + return nullptr; +@@ -499,6 +503,18 @@ void tools::gnutools::Linker::ConstructJ CmdArgs.push_back("--wrap=pthread_create"); CmdArgs.push_back("-lc"); @@ -68,7 +86,7 @@ Index: llvm-toolchain-7-7.0.1/clang/lib/Driver/ToolChains/Gnu.cpp // Add IAMCU specific libs, if needed. if (IsIAMCU) -@@ -1836,7 +1848,8 @@ void Generic_GCC::GCCInstallationDetecto +@@ -1836,7 +1852,8 @@ void Generic_GCC::GCCInstallationDetecto "x86_64-redhat-linux", "x86_64-suse-linux", "x86_64-manbo-linux-gnu", "x86_64-linux-gnu", "x86_64-slackware-linux", "x86_64-unknown-linux", @@ -78,7 +96,7 @@ Index: llvm-toolchain-7-7.0.1/clang/lib/Driver/ToolChains/Gnu.cpp static const char *const X32LibDirs[] = {"/libx32", "/lib"}; static const char *const X32Triples[] = { "x86_64-linux-gnux32", "x86_64-unknown-linux-gnux32", -@@ -1847,8 +1860,9 @@ void Generic_GCC::GCCInstallationDetecto +@@ -1847,8 +1864,9 @@ void Generic_GCC::GCCInstallationDetecto "i386-linux-gnu", "i386-redhat-linux6E", "i686-redhat-linux", "i586-redhat-linux", "i386-redhat-linux", "i586-suse-linux", "i486-slackware-linux", "i686-montavista-linux", "i586-linux-gnu", From 816b488b26d5c3a05bfaedb469cf7546ee871054 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 7 Feb 2019 17:17:47 +0100 Subject: [PATCH 3/9] Enable ld gold for kfreebsd-amd64 and kfreebsd-i386 Many thanks to Svante Signell for the two updates --- debian/changelog | 6 +++--- debian/rules | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/debian/changelog b/debian/changelog index 47ddbb34..89e2aa4b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,10 +2,10 @@ llvm-toolchain-7 (1:7.0.1-7) unstable; urgency=medium * kfreebsd/kfreebsd-triple-clang.diff: update of the patch to fix the kfreebsd FTBFS (Closes: #921246) - Many thanks to Svante Signell for the update + * Enable ld gold for kfreebsd-amd64 and kfreebsd-i386 + Many thanks to Svante Signell for the two updates - - -- + -- Sylvestre Ledru Thu, 07 Feb 2019 17:17:40 +0100 llvm-toolchain-7 (1:7.0.1-6) unstable; urgency=medium diff --git a/debian/rules b/debian/rules index 5ecf357a..53e22967 100755 --- a/debian/rules +++ b/debian/rules @@ -113,7 +113,7 @@ endif # CMAKE_EXTRA += -DLLVM_ENABLE_LLD=ON # endif -BINUTILS_GOLD_ARCHS := amd64 arm64 armhf i386 ppc64 ppc64el x32 s390x hurd-i386 +BINUTILS_GOLD_ARCHS := amd64 arm64 armhf i386 ppc64 ppc64el x32 s390x hurd-i386 kfreebsd-amd64 kfreebsd-i386 ifeq ($(shell dpkg --compare-versions $(shell dpkg-query -W -f '$${Version}' binutils) ge 2.23.1-1~exp3 ; echo $$?),0) ifneq (,$(filter $(DEB_HOST_ARCH),$(BINUTILS_GOLD_ARCHS))) # -fused-ld=gold enables the gold linker (but is not supported by all archs / distro) From f39874ac626f45c8e299a6a4c7e9dec6eb8dadf3 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 9 Feb 2019 16:03:50 +0100 Subject: [PATCH 4/9] update the url to https --- debian/orig-tar.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/debian/orig-tar.sh b/debian/orig-tar.sh index f4717e13..5dba8f54 100755 --- a/debian/orig-tar.sh +++ b/debian/orig-tar.sh @@ -14,18 +14,18 @@ set -e # To create an rc1 release: # sh 4.0/debian/orig-tar.sh RELEASE_40 rc1 -SVN_BASE_URL=http://llvm.org/svn/llvm-project/ +SVN_BASE_URL=https://llvm.org/svn/llvm-project/ MAJOR_VERSION=6.0 CURRENT_VERSION=6.0.1 # Should be changed to 3.5.1 later if test -n "$1"; then -# http://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/branches/google/stable/ +# https://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/branches/google/stable/ # For example: sh 4.0/debian/orig-tar.sh release_400 BRANCH=$1 fi if test -n "$1" -a -n "$2"; then -# http://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/tags/RELEASE_34/rc1/ +# https://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/tags/RELEASE_34/rc1/ # For example: sh 4.0/debian/orig-tar.sh RELEASE_401 rc3 4.0.1 BRANCH=$1 TAG=$2 From e73cc836c4cb0217f08d656a48e55bb97dabc139 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 9 Feb 2019 16:55:18 +0100 Subject: [PATCH 5/9] rebase of the patch --- debian/patches/D52340-rustc-debuginfo.diff | 14 ++++---- debian/patches/hurd-pathmax.diff | 42 +++++++++++----------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/debian/patches/D52340-rustc-debuginfo.diff b/debian/patches/D52340-rustc-debuginfo.diff index a9e9a33b..9a24acac 100644 --- a/debian/patches/D52340-rustc-debuginfo.diff +++ b/debian/patches/D52340-rustc-debuginfo.diff @@ -1,8 +1,8 @@ -Index: llvm/lib/Bitcode/Reader/MetadataLoader.cpp +Index: llvm-toolchain-7_7.0.1~svn352582/lib/Bitcode/Reader/MetadataLoader.cpp =================================================================== ---- llvm.orig/lib/Bitcode/Reader/MetadataLoader.cpp -+++ llvm/lib/Bitcode/Reader/MetadataLoader.cpp -@@ -1313,7 +1313,7 @@ +--- llvm-toolchain-7_7.0.1~svn352582.orig/lib/Bitcode/Reader/MetadataLoader.cpp ++++ llvm-toolchain-7_7.0.1~svn352582/lib/Bitcode/Reader/MetadataLoader.cpp +@@ -1308,7 +1308,7 @@ Error MetadataLoader::MetadataLoaderImpl (Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams, @@ -11,10 +11,10 @@ Index: llvm/lib/Bitcode/Reader/MetadataLoader.cpp if (!IsNotUsedInTypeRef && Identifier) MetadataList.addTypeRef(*Identifier, *cast(CT)); -Index: llvm/test/Assembler/debug-variant-discriminator.ll +Index: llvm-toolchain-7_7.0.1~svn352582/test/Assembler/debug-variant-discriminator.ll =================================================================== ---- llvm.orig/test/Assembler/debug-variant-discriminator.ll -+++ llvm/test/Assembler/debug-variant-discriminator.ll +--- /dev/null ++++ llvm-toolchain-7_7.0.1~svn352582/test/Assembler/debug-variant-discriminator.ll @@ -0,0 +1,14 @@ +; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s +; RUN: verify-uselistorder %s diff --git a/debian/patches/hurd-pathmax.diff b/debian/patches/hurd-pathmax.diff index d68cd4c1..fcf72b75 100644 --- a/debian/patches/hurd-pathmax.diff +++ b/debian/patches/hurd-pathmax.diff @@ -1,7 +1,7 @@ -Index: llvm-toolchain-snapshot_6.0~svn317126/clang/lib/Basic/FileManager.cpp +Index: llvm-toolchain-7_7.1.0~svn353565/clang/lib/Basic/FileManager.cpp =================================================================== ---- llvm-toolchain-snapshot_6.0~svn317126.orig/clang/lib/Basic/FileManager.cpp -+++ llvm-toolchain-snapshot_6.0~svn317126/clang/lib/Basic/FileManager.cpp +--- llvm-toolchain-7_7.1.0~svn353565.orig/clang/lib/Basic/FileManager.cpp ++++ llvm-toolchain-7_7.1.0~svn353565/clang/lib/Basic/FileManager.cpp @@ -501,6 +501,12 @@ void FileManager::invalidateCache(const UniqueRealFiles.erase(Entry->getUniqueID()); } @@ -15,10 +15,10 @@ Index: llvm-toolchain-snapshot_6.0~svn317126/clang/lib/Basic/FileManager.cpp void FileManager::GetUniqueIDMapping( SmallVectorImpl &UIDToFiles) const { UIDToFiles.clear(); -Index: llvm-toolchain-snapshot_6.0~svn317126/lldb/include/lldb/lldb-defines.h +Index: llvm-toolchain-7_7.1.0~svn353565/lldb/include/lldb/lldb-defines.h =================================================================== ---- llvm-toolchain-snapshot_6.0~svn317126.orig/lldb/include/lldb/lldb-defines.h -+++ llvm-toolchain-snapshot_6.0~svn317126/lldb/include/lldb/lldb-defines.h +--- llvm-toolchain-7_7.1.0~svn353565.orig/lldb/include/lldb/lldb-defines.h ++++ llvm-toolchain-7_7.1.0~svn353565/lldb/include/lldb/lldb-defines.h @@ -28,6 +28,11 @@ #define INT32_MAX 2147483647 #endif @@ -31,11 +31,11 @@ Index: llvm-toolchain-snapshot_6.0~svn317126/lldb/include/lldb/lldb-defines.h #if !defined(UINT32_MAX) #define UINT32_MAX 4294967295U #endif -Index: llvm-toolchain-snapshot_6.0~svn317126/lib/Support/Unix/Path.inc +Index: llvm-toolchain-7_7.1.0~svn353565/lib/Support/Unix/Path.inc =================================================================== ---- llvm-toolchain-snapshot_6.0~svn317126.orig/lib/Support/Unix/Path.inc -+++ llvm-toolchain-snapshot_6.0~svn317126/lib/Support/Unix/Path.inc -@@ -64,6 +64,7 @@ +--- llvm-toolchain-7_7.1.0~svn353565.orig/lib/Support/Unix/Path.inc ++++ llvm-toolchain-7_7.1.0~svn353565/lib/Support/Unix/Path.inc +@@ -49,6 +49,7 @@ // For GNU Hurd #if defined(__GNU__) && !defined(PATH_MAX) # define PATH_MAX 4096 @@ -43,11 +43,11 @@ Index: llvm-toolchain-snapshot_6.0~svn317126/lib/Support/Unix/Path.inc #endif #include -Index: llvm-toolchain-snapshot_6.0~svn317126/tools/dsymutil/DwarfLinker.cpp +Index: llvm-toolchain-7_7.1.0~svn353565/tools/dsymutil/DwarfLinker.cpp =================================================================== ---- llvm-toolchain-snapshot_6.0~svn317126.orig/tools/dsymutil/DwarfLinker.cpp -+++ llvm-toolchain-snapshot_6.0~svn317126/tools/dsymutil/DwarfLinker.cpp -@@ -93,6 +93,11 @@ +--- llvm-toolchain-7_7.1.0~svn353565.orig/tools/dsymutil/DwarfLinker.cpp ++++ llvm-toolchain-7_7.1.0~svn353565/tools/dsymutil/DwarfLinker.cpp +@@ -101,6 +101,11 @@ #include #include @@ -59,10 +59,10 @@ Index: llvm-toolchain-snapshot_6.0~svn317126/tools/dsymutil/DwarfLinker.cpp namespace llvm { namespace dsymutil { -Index: llvm-toolchain-snapshot_6.0~svn317126/polly/lib/External/ppcg/cuda_common.c +Index: llvm-toolchain-7_7.1.0~svn353565/polly/lib/External/ppcg/cuda_common.c =================================================================== ---- llvm-toolchain-snapshot_6.0~svn317126.orig/polly/lib/External/ppcg/cuda_common.c -+++ llvm-toolchain-snapshot_6.0~svn317126/polly/lib/External/ppcg/cuda_common.c +--- llvm-toolchain-7_7.1.0~svn353565.orig/polly/lib/External/ppcg/cuda_common.c ++++ llvm-toolchain-7_7.1.0~svn353565/polly/lib/External/ppcg/cuda_common.c @@ -15,6 +15,11 @@ #include "cuda_common.h" #include "ppcg.h" @@ -75,11 +75,11 @@ Index: llvm-toolchain-snapshot_6.0~svn317126/polly/lib/External/ppcg/cuda_common /* Open the host .cu file and the kernel .hu and .cu files for writing. * Add the necessary includes. */ -Index: llvm-toolchain-6.0-6.0.1/clang/lib/Frontend/ModuleDependencyCollector.cpp +Index: llvm-toolchain-7_7.1.0~svn353565/clang/lib/Frontend/ModuleDependencyCollector.cpp =================================================================== ---- llvm-toolchain-6.0-6.0.1.orig/clang/lib/Frontend/ModuleDependencyCollector.cpp -+++ llvm-toolchain-6.0-6.0.1/clang/lib/Frontend/ModuleDependencyCollector.cpp -@@ -97,6 +97,11 @@ struct ModuleDependencyMMCallbacks : pub +--- llvm-toolchain-7_7.1.0~svn353565.orig/clang/lib/Frontend/ModuleDependencyCollector.cpp ++++ llvm-toolchain-7_7.1.0~svn353565/clang/lib/Frontend/ModuleDependencyCollector.cpp +@@ -99,6 +99,11 @@ struct ModuleDependencyMMCallbacks : pub } From 296939f8f7b90c0bf43169ff6613d8ae6f03810b Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 9 Feb 2019 16:55:32 +0100 Subject: [PATCH 6/9] Upstream decided to rename the library from 7 to 7.1.0 As I kept the ABI, I reverted the patches (debian/patches/7.1.0/*) --- debian/changelog | 8 +-- .../patches/7.1.0/revert-change-soname-2.diff | 19 +++++++ .../patches/7.1.0/revert-change-soname.diff | 56 +++++++++++++++++++ debian/patches/series | 5 +- 4 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 debian/patches/7.1.0/revert-change-soname-2.diff create mode 100644 debian/patches/7.1.0/revert-change-soname.diff diff --git a/debian/changelog b/debian/changelog index 6d4bcfc4..8e5a272d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -llvm-toolchain-7 (1:7.1.0~svn352582-1~exp1) UNRELEASED; urgency=medium +llvm-toolchain-7 (1:7.1.0~svn353565-1~exp1) UNRELEASED; urgency=medium * New stable release. To be clear, this is the same as release 7.0.1 @@ -10,12 +10,12 @@ llvm-toolchain-7 (1:7.1.0~svn352582-1~exp1) UNRELEASED; urgency=medium - avoid question like "why we don't have 7.1.0 in the archive?" - align with upstream - clearly show that we kept the ABI - Upstream decided to rename the library from 7 to 7.0.1 - As I kept the ABI, I reverted this patch (revert-change-soname.diff) + Upstream decided to rename the library from 7 to 7.1.0 + As I kept the ABI, I reverted the patches (debian/patches/7.1.0/*) More info on https://bugs.llvm.org/show_bug.cgi?id=39427 and https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=913271 - -- Sylvestre Ledru Wed, 30 Jan 2019 08:53:14 +0100 + -- Sylvestre Ledru Sat, 09 Feb 2019 16:34:12 +0100 llvm-toolchain-7 (1:7.0.1-7) unstable; urgency=medium diff --git a/debian/patches/7.1.0/revert-change-soname-2.diff b/debian/patches/7.1.0/revert-change-soname-2.diff new file mode 100644 index 00000000..527e57d8 --- /dev/null +++ b/debian/patches/7.1.0/revert-change-soname-2.diff @@ -0,0 +1,19 @@ +Index: llvm-toolchain-7_7.1.0~svn353565/cmake/modules/AddLLVM.cmake +=================================================================== +--- llvm-toolchain-7_7.1.0~svn353565.orig/cmake/modules/AddLLVM.cmake ++++ llvm-toolchain-7_7.1.0~svn353565/cmake/modules/AddLLVM.cmake +@@ -498,11 +498,9 @@ function(llvm_add_library name) + if(UNIX AND NOT APPLE AND NOT ARG_SONAME) + set_target_properties(${name} + PROPERTIES +- # Concatenate the version numbers since ldconfig expects exactly +- # one component indicating the ABI version, while LLVM uses +- # major+minor for that. +- SOVERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} +- VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}) ++ # Since 4.0.0, the ABI version is indicated by the major version ++ SOVERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX} ++ VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}) + endif() + endif() + diff --git a/debian/patches/7.1.0/revert-change-soname.diff b/debian/patches/7.1.0/revert-change-soname.diff new file mode 100644 index 00000000..d9912798 --- /dev/null +++ b/debian/patches/7.1.0/revert-change-soname.diff @@ -0,0 +1,56 @@ +Index: llvm-toolchain-7_7.1.0~svn353565/docs/ReleaseNotes.rst +=================================================================== +--- llvm-toolchain-7_7.1.0~svn353565.orig/docs/ReleaseNotes.rst ++++ llvm-toolchain-7_7.1.0~svn353565/docs/ReleaseNotes.rst +@@ -30,6 +30,9 @@ Non-comprehensive list of changes in thi + is available on the Visual Studio Marketplace. The new integration + supports Visual Studio 2017. + ++* Libraries have been renamed from 7.0 to 7. This change also impacts ++ downstream libraries like lldb. ++ + * The LoopInstSimplify pass (``-loop-instsimplify``) has been removed. + + * Symbols starting with ``?`` are no longer mangled by LLVM when using the +Index: llvm-toolchain-7_7.1.0~svn353565/tools/llvm-config/CMakeLists.txt +=================================================================== +--- llvm-toolchain-7_7.1.0~svn353565.orig/tools/llvm-config/CMakeLists.txt ++++ llvm-toolchain-7_7.1.0~svn353565/tools/llvm-config/CMakeLists.txt +@@ -37,7 +37,7 @@ set(LLVM_CFLAGS "${LLVM_DEFINITIONS}") + set(LLVM_CXXFLAGS "${COMPILE_FLAGS} ${LLVM_DEFINITIONS}") + set(LLVM_BUILD_SYSTEM cmake) + set(LLVM_HAS_RTTI ${LLVM_CONFIG_HAS_RTTI}) +-set(LLVM_DYLIB_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX}") ++set(LLVM_DYLIB_VERSION "${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}") + set(LLVM_HAS_GLOBAL_ISEL "ON") + + # Use the C++ link flags, since they should be a superset of C link flags. +Index: llvm-toolchain-7_7.1.0~svn353565/tools/llvm-shlib/simple_version_script.map.in +=================================================================== +--- llvm-toolchain-7_7.1.0~svn353565.orig/tools/llvm-shlib/simple_version_script.map.in ++++ llvm-toolchain-7_7.1.0~svn353565/tools/llvm-shlib/simple_version_script.map.in +@@ -1 +1 @@ +-LLVM_@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@ { global: *; }; ++LLVM_@LLVM_VERSION_MAJOR@ { global: *; }; +Index: llvm-toolchain-7_7.1.0~svn353565/cmake/modules/AddLLVM.cmake +=================================================================== +--- llvm-toolchain-7_7.1.0~svn353565.orig/cmake/modules/AddLLVM.cmake ++++ llvm-toolchain-7_7.1.0~svn353565/cmake/modules/AddLLVM.cmake +@@ -83,7 +83,7 @@ function(add_llvm_symbol_exports target_ + # FIXME: Don't write the "local:" line on OpenBSD. + # in the export file, also add a linker script to version LLVM symbols (form: LLVM_N.M) + add_custom_command(OUTPUT ${native_export_file} +- COMMAND echo "LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} {" > ${native_export_file} ++ COMMAND echo "LLVM_${LLVM_VERSION_MAJOR} {" > ${native_export_file} + COMMAND grep -q "[[:alnum:]]" ${export_file} && echo " global:" >> ${native_export_file} || : + COMMAND sed -e "s/$/;/" -e "s/^/ /" < ${export_file} >> ${native_export_file} + COMMAND echo " local: *;" >> ${native_export_file} +@@ -522,7 +522,7 @@ function(llvm_add_library name) + if(${output_name} STREQUAL "output_name-NOTFOUND") + set(output_name ${name}) + endif() +- set(library_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX}) ++ set(library_name ${output_name}-${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}) + set(api_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}) + set_target_properties(${name} PROPERTIES OUTPUT_NAME ${library_name}) + llvm_install_library_symlink(${api_name} ${library_name} SHARED diff --git a/debian/patches/series b/debian/patches/series index 180e77a6..36bc0b61 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -99,7 +99,6 @@ clang-arm-default-vfp3-on-armv7a.patch # For the bootstrap bootstrap-fix-include-next.diff clangd-atomic-cmake.patch -pr39427-misscompile.diff # Rustc rustc-aarch64-test-failure.diff @@ -138,7 +137,9 @@ D52340-rustc-debuginfo.diff # Disable https://llvm.org/viewvc/llvm-project?view=revision&revision=352580 # 7.0.1 was always abi compatible with 7.0 # Therefor, the libraries are still compatible with the 7.1.0 version -revert-change-soname.diff +7.1.0/revert-change-soname-2.diff +7.1.0/revert-change-soname.diff + # kfreebsd kfreebsd/clang_lib_Basic_Targets.diff From 4d5381ce585b0ec2d7b00830feaf5ab9055aa4c1 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 9 Feb 2019 16:56:00 +0100 Subject: [PATCH 7/9] improve the repack script --- debian/unpack.sh | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/debian/unpack.sh b/debian/unpack.sh index 0922c29a..953389fd 100644 --- a/debian/unpack.sh +++ b/debian/unpack.sh @@ -1,14 +1,17 @@ set -e -VERSION=7 -MAJOR_VERSION=7.0.1 +ORIG_VERSION=7 +MAJOR_VERSION=7.1.0 SVN_REV=`ls -1 *$MAJOR_VERSION*svn*bz2 | tail -1|perl -ne 'print "$1\n" if /svn(\d+)/;' | sort -ru` -SVN_REV=347285 +#SVN_REV=353565 VERSION=svn$SVN_REV -VERSION=+rc3 -tar jxvf llvm-toolchain-7_$MAJOR_VERSION~$VERSION.orig.tar.bz2 +#VERSION=+rc3 +LLVM_ARCHIVE=llvm-toolchain-7_$MAJOR_VERSION~$VERSION.orig.tar.bz2 +echo "unpack of $LLVM_ARCHIVE" +tar jxf $LLVM_ARCHIVE cd llvm-toolchain-7_$MAJOR_VERSION~$VERSION/ || ( echo "Bad SVN_REV:\"$SVN_REV\"" && exit 1 ) for f in ../llvm-toolchain-7_$MAJOR_VERSION~$VERSION.orig-clang.tar.bz2 ../llvm-toolchain-7_$MAJOR_VERSION~$VERSION.orig-clang-tools-extra.tar.bz2 ../llvm-toolchain-7_$MAJOR_VERSION~$VERSION.orig-compiler-rt.tar.bz2 ../llvm-toolchain-7_$MAJOR_VERSION~$VERSION.orig-lldb.tar.bz2 ../llvm-toolchain-7_$MAJOR_VERSION~$VERSION.orig-polly.tar.bz2 ../llvm-toolchain-7_$MAJOR_VERSION~$VERSION.orig-libcxxabi.tar.bz2 ../llvm-toolchain-7_$MAJOR_VERSION~$VERSION.orig-libcxx.tar.bz2 ../llvm-toolchain-7_$MAJOR_VERSION~$VERSION.orig-openmp.tar.bz2; do - tar jxvf $f + echo "Unpack of $f" + tar jxf $f done ln -s clang_$MAJOR_VERSION~$VERSION clang @@ -20,5 +23,5 @@ ln -s openmp_$MAJOR_VERSION~$VERSION openmp ln -s libcxx_$MAJOR_VERSION~$VERSION libcxx ln -s libcxxabi_$MAJOR_VERSION~$VERSION libcxxabi -cp -R ../$VERSION/debian . +cp -R ../$ORIG_VERSION/debian . QUILT_PATCHES=debian/patches/ quilt push -a --fuzz=0 From 408f329cd84ad41cef7fc41ee4ac2b4b4573945f Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 9 Feb 2019 17:21:34 +0100 Subject: [PATCH 8/9] Cherry-pick various fixes for julia --- debian/changelog | 4 +- .../llvm-D27629-AArch64-large_model_4.0.patch | 72 + ...lvm-D27629-AArch64-large_model_6.0.1.patch | 24 + .../julia/llvm-D34078-vectorize-fdiv.patch | 53 + .../llvm-D42262-jumpthreading-not-i1.patch | 82 + .../julia/llvm-D44892-Perf-integration.patch | 677 ++ .../julia/llvm-D50010-VNCoercion-ni.patch | 89 + .../patches/julia/llvm-D50167-scev-umin.patch | 1143 +++ .../patches/julia/llvm-PPC-addrspaces.patch | 26 + .../julia/llvm-rL326967-aligned-load.patch | 301 + debian/patches/julia/llvm-rL327898.patch | 6131 +++++++++++++++++ debian/patches/series | 26 + 12 files changed, 8627 insertions(+), 1 deletion(-) create mode 100644 debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch create mode 100644 debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch create mode 100644 debian/patches/julia/llvm-D34078-vectorize-fdiv.patch create mode 100644 debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch create mode 100644 debian/patches/julia/llvm-D44892-Perf-integration.patch create mode 100644 debian/patches/julia/llvm-D50010-VNCoercion-ni.patch create mode 100644 debian/patches/julia/llvm-D50167-scev-umin.patch create mode 100644 debian/patches/julia/llvm-PPC-addrspaces.patch create mode 100644 debian/patches/julia/llvm-rL326967-aligned-load.patch create mode 100644 debian/patches/julia/llvm-rL327898.patch diff --git a/debian/changelog b/debian/changelog index d8950755..c2170419 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,8 +2,10 @@ llvm-toolchain-6.0 (1:6.0.1-11) unstable; urgency=medium * Remove 'Multi-Arch: same' in libclang (Closes: #874248) + * Cherry-pick various llvm fixes for Julia + (Closes: #919628) - -- Sylvestre Ledru Thu, 24 Jan 2019 08:44:24 +0100 + -- Sylvestre Ledru Sat, 09 Feb 2019 17:22:59 +0100 llvm-toolchain-6.0 (1:6.0.1-10) unstable; urgency=medium diff --git a/debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch b/debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch new file mode 100644 index 00000000..17b2d416 --- /dev/null +++ b/debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch @@ -0,0 +1,72 @@ +From 6e7b660ee185445640110c80d80aafd436682fca Mon Sep 17 00:00:00 2001 +From: Yichao Yu +Date: Fri, 9 Dec 2016 15:59:46 -0500 +Subject: [PATCH] Fix unwind info relocation with large code model on AArch64 + +--- + lib/MC/MCObjectFileInfo.cpp | 2 ++ + .../AArch64/ELF_ARM64_BE-large-relocations.s | 18 ++++++++++++++++++ + .../RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s | 18 ++++++++++++++++++ + 3 files changed, 38 insertions(+) + create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s + create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s + +Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp ++++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp +@@ -328,6 +328,8 @@ void MCObjectFileInfo::initELFMCObjectFi + dwarf::DW_EH_PE_sdata4 + : dwarf::DW_EH_PE_absptr; + break; ++ case Triple::aarch64: ++ case Triple::aarch64_be: + case Triple::x86_64: + if (PositionIndependent) { + PersonalityEncoding = +Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s +=================================================================== +--- /dev/null ++++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s +@@ -0,0 +1,18 @@ ++# RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -code-model=large -filetype=obj -o %T/be-large-reloc.o %s ++# RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o ++ ++ .text ++ .globl g ++ .p2align 2 ++ .type g,@function ++g: ++ .cfi_startproc ++ mov x0, xzr ++ ret ++ .Lfunc_end0: ++ .size g, .Lfunc_end0-g ++ .cfi_endproc ++ ++# Skip the CIE and load the 8 bytes PC begin pointer. ++# Assuming the CIE and the FDE length are both 4 bytes. ++# rtdyld-check: *{8}(section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc) +Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s +=================================================================== +--- /dev/null ++++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s +@@ -0,0 +1,18 @@ ++# RUN: llvm-mc -triple=arm64-none-linux-gnu -code-model=large -filetype=obj -o %T/large-reloc.o %s ++# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o ++ ++ .text ++ .globl g ++ .p2align 2 ++ .type g,@function ++g: ++ .cfi_startproc ++ mov x0, xzr ++ ret ++ .Lfunc_end0: ++ .size g, .Lfunc_end0-g ++ .cfi_endproc ++ ++# Skip the CIE and load the 8 bytes PC begin pointer. ++# Assuming the CIE and the FDE length are both 4 bytes. ++# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) diff --git a/debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch b/debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch new file mode 100644 index 00000000..1b815a2b --- /dev/null +++ b/debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch @@ -0,0 +1,24 @@ +From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001 +From: Valentin Churavy +Date: Thu, 5 Jul 2018 12:37:50 -0400 +Subject: [PATCH] Fix unwind info relocation with large code model on AArch64 + +--- + lib/MC/MCObjectFileInfo.cpp | 2 ++ + .../AArch64/ELF_ARM64_large-relocations.s | 20 +++++++++++++++++++ + 2 files changed, 22 insertions(+) + create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s + +Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp ++++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp +@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFi + break; + case Triple::ppc64: + case Triple::ppc64le: ++ case Triple::aarch64: ++ case Triple::aarch64_be: + case Triple::x86_64: + FDECFIEncoding = dwarf::DW_EH_PE_pcrel | + (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4); diff --git a/debian/patches/julia/llvm-D34078-vectorize-fdiv.patch b/debian/patches/julia/llvm-D34078-vectorize-fdiv.patch new file mode 100644 index 00000000..cd33b4a8 --- /dev/null +++ b/debian/patches/julia/llvm-D34078-vectorize-fdiv.patch @@ -0,0 +1,53 @@ +From f94d12b6108b944199b715f31f25a022f75d2feb Mon Sep 17 00:00:00 2001 +From: Yichao Yu +Date: Sat, 10 Jun 2017 08:45:13 -0400 +Subject: [PATCH 4/4] Enable support for floating-point division reductions + +Similar to fsub, fdiv can also be vectorized using fmul. +--- + lib/Transforms/Utils/LoopUtils.cpp | 1 + + test/Transforms/LoopVectorize/float-reduction.ll | 22 ++++++++++++++++++++++ + 2 files changed, 23 insertions(+) + +Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Utils/LoopUtils.cpp ++++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp +@@ -513,6 +513,7 @@ RecurrenceDescriptor::isRecurrenceInstr( + return InstDesc(Kind == RK_IntegerOr, I); + case Instruction::Xor: + return InstDesc(Kind == RK_IntegerXor, I); ++ case Instruction::FDiv: + case Instruction::FMul: + return InstDesc(Kind == RK_FloatMult, I, UAI); + case Instruction::FSub: +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/LoopVectorize/float-reduction.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll +@@ -44,3 +44,25 @@ for.body: + for.end: ; preds = %for.body + ret float %sub + } ++ ++;CHECK-LABEL: @foodiv( ++;CHECK: fdiv fast <4 x float> ++;CHECK: ret ++define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp { ++entry: ++ br label %for.body ++ ++for.body: ; preds = %for.body, %entry ++ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] ++ %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ] ++ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv ++ %0 = load float, float* %arrayidx, align 4 ++ %sub = fdiv fast float %sum.04, %0 ++ %indvars.iv.next = add i64 %indvars.iv, 1 ++ %lftr.wideiv = trunc i64 %indvars.iv.next to i32 ++ %exitcond = icmp eq i32 %lftr.wideiv, 200 ++ br i1 %exitcond, label %for.end, label %for.body ++ ++for.end: ; preds = %for.body ++ ret float %sub ++} diff --git a/debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch b/debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch new file mode 100644 index 00000000..93643ff8 --- /dev/null +++ b/debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch @@ -0,0 +1,82 @@ +commit 6a311a7a804831fea43cfb2f61322adcb407a1af +Author: Keno Fischer +Date: Thu Jan 18 15:57:05 2018 -0500 + + [JumpThreading] Don't restrict cast-traversal to i1 + + Summary: + In D17663, JumpThreading learned to look trough simple cast instructions, + but only if the source of those cast instructions was a phi/cmp i1 + (in an effort to limit compile time effects). I think this condition + is too restrictive. For switches with limited value range, InstCombine + will readily introduce an extra `trunc` instruction to a smaller + integer type (e.g. from i8 to i2), leaving us in the somewhat perverse + situation that jump-threading would work before running instcombine, + but not after. Since instcombine produces this pattern, I think we + need to consider it canonical and support it in JumpThreading. + In general, for limiting recursion, I think the existing restriction + to phi and cmp nodes should be sufficient to avoid looking through + unprofitable chains of instructions. + + Reviewers: haicheng, gberry, bmakam, mcrosier + + Subscribers: llvm-commits + + Differential Revision: https://reviews.llvm.org/D42262 + +Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Scalar/JumpThreading.cpp ++++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp +@@ -656,11 +656,9 @@ bool JumpThreadingPass::ComputeValueKnow + } + + // Handle Cast instructions. Only see through Cast when the source operand is +- // PHI or Cmp and the source type is i1 to save the compilation time. ++ // PHI or Cmp to save the compilation time. + if (CastInst *CI = dyn_cast(I)) { + Value *Source = CI->getOperand(0); +- if (!Source->getType()->isIntegerTy(1)) +- return false; + if (!isa(Source) && !isa(Source)) + return false; + ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI); +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/JumpThreading/basic.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll +@@ -547,6 +547,34 @@ l5: + ; CHECK: } + } + ++define i1 @trunc_switch(i1 %arg) { ++; CHECK-LABEL: @trunc_switch ++top: ++; CHECK: br i1 %arg, label %exitA, label %exitB ++ br i1 %arg, label %common, label %B ++ ++B: ++ br label %common ++ ++common: ++ %phi = phi i8 [ 2, %B ], [ 1, %top ] ++ %trunc = trunc i8 %phi to i2 ++; CHECK-NOT: switch ++ switch i2 %trunc, label %unreach [ ++ i2 1, label %exitA ++ i2 -2, label %exitB ++ ] ++ ++unreach: ++ unreachable ++ ++exitA: ++ ret i1 true ++ ++exitB: ++ ret i1 false ++} ++ + ; CHECK-LABEL: define void @h_con(i32 %p) { + define void @h_con(i32 %p) { + %x = icmp ult i32 %p, 5 diff --git a/debian/patches/julia/llvm-D44892-Perf-integration.patch b/debian/patches/julia/llvm-D44892-Perf-integration.patch new file mode 100644 index 00000000..e849bcd3 --- /dev/null +++ b/debian/patches/julia/llvm-D44892-Perf-integration.patch @@ -0,0 +1,677 @@ +From 45bc0f0badbdbabaed7d204757c2aad7ab49a3fe Mon Sep 17 00:00:00 2001 +From: DokFaust +Date: Mon, 11 Jun 2018 12:59:42 +0200 +Subject: [PATCH] PerfJITEventListener integration, requires compile flag + LLVM_USE_PERF + +--- + CMakeLists.txt | 13 + + include/llvm/Config/config.h.cmake | 3 + + include/llvm/Config/llvm-config.h.cmake | 3 + + .../llvm/ExecutionEngine/JITEventListener.h | 9 + + lib/ExecutionEngine/CMakeLists.txt | 4 + + lib/ExecutionEngine/LLVMBuild.txt | 2 +- + lib/ExecutionEngine/Orc/LLVMBuild.txt | 2 +- + .../PerfJITEvents/CMakeLists.txt | 5 + + .../PerfJITEvents/LLVMBuild.txt | 23 + + .../PerfJITEvents/PerfJITEventListener.cpp | 492 ++++++++++++++++++ + 10 files changed, 554 insertions(+), 2 deletions(-) + create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt + create mode 100644 lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt + create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index f8da6cf9211..fb92c825a46 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -426,6 +426,16 @@ if( LLVM_USE_OPROFILE ) + endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) + endif( LLVM_USE_OPROFILE ) + ++option(LLVM_USE_PERF ++ "Use perf JIT interface to inform perf about JIT code" OFF) ++ ++# If enabled, verify we are on a platform that supports perf. ++if( LLVM_USE_PERF ) ++ if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) ++ message(FATAL_ERROR "perf support is available on Linux only.") ++ endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) ++endif( LLVM_USE_PERF ) ++ + set(LLVM_USE_SANITIZER "" CACHE STRING + "Define the sanitizer used to build binaries and tests.") + set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH +@@ -634,6 +644,9 @@ endif (LLVM_USE_INTEL_JITEVENTS) + if (LLVM_USE_OPROFILE) + set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT) + endif (LLVM_USE_OPROFILE) ++if (LLVM_USE_PERF) ++ set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents) ++endif (LLVM_USE_PERF) + + message(STATUS "Constructing LLVMBuild project information") + execute_process( +diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake +index 940f8420304..17787ed779b 100644 +--- a/include/llvm/Config/config.h.cmake ++++ b/include/llvm/Config/config.h.cmake +@@ -377,6 +377,9 @@ + /* Define if we have the oprofile JIT-support library */ + #cmakedefine01 LLVM_USE_OPROFILE + ++/* Define if we have the perf JIT-support library */ ++#cmakedefine01 LLVM_USE_PERF ++ + /* LLVM version information */ + #cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}" + +diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake +index 4daa00f3bc4..8d9c3b24d52 100644 +--- a/include/llvm/Config/llvm-config.h.cmake ++++ b/include/llvm/Config/llvm-config.h.cmake +@@ -65,6 +65,9 @@ + /* Define if we have the oprofile JIT-support library */ + #cmakedefine01 LLVM_USE_OPROFILE + ++/* Define if we have the perf JIT-support library */ ++#cmakedefine01 LLVM_USE_PERF ++ + /* Major version of the LLVM API */ + #define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR} + +diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h +index ff7840f00a4..1cc2c423a8b 100644 +--- a/include/llvm/ExecutionEngine/JITEventListener.h ++++ b/include/llvm/ExecutionEngine/JITEventListener.h +@@ -115,6 +115,15 @@ public: + } + #endif // USE_OPROFILE + ++#ifdef LLVM_USE_PERF ++ static JITEventListener *createPerfJITEventListener(); ++#else ++ static JITEventListener *createPerfJITEventListener() ++ { ++ return nullptr; ++ } ++#endif //USE_PERF ++ + private: + virtual void anchor(); + }; +diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt +index 84b34919e44..893d113a685 100644 +--- a/lib/ExecutionEngine/CMakeLists.txt ++++ b/lib/ExecutionEngine/CMakeLists.txt +@@ -30,3 +30,7 @@ endif( LLVM_USE_OPROFILE ) + if( LLVM_USE_INTEL_JITEVENTS ) + add_subdirectory(IntelJITEvents) + endif( LLVM_USE_INTEL_JITEVENTS ) ++ ++if( LLVM_USE_PERF ) ++ add_subdirectory(PerfJITEvents) ++endif( LLVM_USE_PERF ) +diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt +index 9d29a41f504..b6e1bda6a51 100644 +--- a/lib/ExecutionEngine/LLVMBuild.txt ++++ b/lib/ExecutionEngine/LLVMBuild.txt +@@ -16,7 +16,7 @@ + ;===------------------------------------------------------------------------===; + + [common] +-subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc ++subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents + + [component_0] + type = Library +diff --git a/lib/ExecutionEngine/Orc/LLVMBuild.txt b/lib/ExecutionEngine/Orc/LLVMBuild.txt +index 8f05172e77a..ef4ae64e823 100644 +--- a/lib/ExecutionEngine/Orc/LLVMBuild.txt ++++ b/lib/ExecutionEngine/Orc/LLVMBuild.txt +@@ -19,4 +19,4 @@ + type = Library + name = OrcJIT + parent = ExecutionEngine +-required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils ++required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils +diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt +new file mode 100644 +index 00000000000..136cc429d02 +--- /dev/null ++++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt +@@ -0,0 +1,5 @@ ++add_llvm_library(LLVMPerfJITEvents ++ PerfJITEventListener.cpp ++ ) ++ ++add_dependencies(LLVMPerfJITEvents LLVMCodeGen) +diff --git a/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt +new file mode 100644 +index 00000000000..b1958a69260 +--- /dev/null ++++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt +@@ -0,0 +1,23 @@ ++;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===; ++; ++; The LLVM Compiler Infrastructure ++; ++; This file is distributed under the University of Illinois Open Source ++; License. See LICENSE.TXT for details. ++; ++;===------------------------------------------------------------------------===; ++; ++; This is an LLVMBuild description file for the components in this subdirectory. ++; ++; For more information on the LLVMBuild system, please see: ++; ++; http://llvm.org/docs/LLVMBuild.html ++; ++;===------------------------------------------------------------------------===; ++ ++[component_0] ++type = OptionalLibrary ++name = PerfJITEvents ++parent = ExecutionEngine ++required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support TransformUtils ++ +diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp +new file mode 100644 +index 00000000000..c2b97dd59f3 +--- /dev/null ++++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp +@@ -0,0 +1,492 @@ ++//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines a JITEventListener object that tells perf about JITted ++// functions, including source line information. ++// ++// Documentation for perf jit integration is available at: ++// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt ++// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt ++// ++//===----------------------------------------------------------------------===// ++ ++#include "llvm/ADT/Twine.h" ++#include "llvm/Config/config.h" ++#include "llvm/DebugInfo/DWARF/DWARFContext.h" ++#include "llvm/ExecutionEngine/JITEventListener.h" ++#include "llvm/Object/ObjectFile.h" ++#include "llvm/Object/SymbolSize.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/Errno.h" ++#include "llvm/Support/FileSystem.h" ++#include "llvm/Support/MemoryBuffer.h" ++#include "llvm/Support/Mutex.h" ++#include "llvm/Support/MutexGuard.h" ++#include "llvm/Support/Path.h" ++#include "llvm/Support/Process.h" ++#include "llvm/Support/Threading.h" ++#include "llvm/Support/raw_ostream.h" ++ ++#include // mmap() ++#include // getpid() ++#include // clock_gettime(), time(), localtime_r() */ ++#include // for getpid(), read(), close() ++ ++using namespace llvm; ++using namespace llvm::object; ++typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; ++ ++namespace { ++ ++// language identifier (XXX: should we generate something better from debug ++// info?) ++#define JIT_LANG "llvm-IR" ++#define LLVM_PERF_JIT_MAGIC \ ++ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ ++ (uint32_t)'D') ++#define LLVM_PERF_JIT_VERSION 1 ++ ++// bit 0: set if the jitdump file is using an architecture-specific timestamp ++// clock source ++#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0) ++ ++struct LLVMPerfJitHeader; ++ ++class PerfJITEventListener : public JITEventListener { ++public: ++ PerfJITEventListener(); ++ ~PerfJITEventListener() { ++ if (MarkerAddr) ++ CloseMarker(); ++ } ++ ++ void NotifyObjectEmitted(const ObjectFile &Obj, ++ const RuntimeDyld::LoadedObjectInfo &L) override; ++ void NotifyFreeingObject(const ObjectFile &Obj) override; ++ ++private: ++ bool InitDebuggingDir(); ++ bool OpenMarker(); ++ void CloseMarker(); ++ static bool FillMachine(LLVMPerfJitHeader &hdr); ++ ++ void NotifyCode(Expected &Symbol, uint64_t CodeAddr, ++ uint64_t CodeSize); ++ void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines); ++ ++ // cache lookups ++ pid_t Pid; ++ ++ // base directory for output data ++ std::string JitPath; ++ ++ // output data stream, closed via Dumpstream ++ int DumpFd = -1; ++ ++ // output data stream ++ std::unique_ptr Dumpstream; ++ ++ // prevent concurrent dumps from messing up the output file ++ sys::Mutex Mutex; ++ ++ // perf mmap marker ++ void *MarkerAddr = NULL; ++ ++ // perf support ready ++ bool SuccessfullyInitialized = false; ++ ++ // identifier for functions, primarily to identify when moving them around ++ uint64_t CodeGeneration = 1; ++}; ++ ++// The following are POD struct definitions from the perf jit specification ++ ++enum LLVMPerfJitRecordType { ++ JIT_CODE_LOAD = 0, ++ JIT_CODE_MOVE = 1, // not emitted, code isn't moved ++ JIT_CODE_DEBUG_INFO = 2, ++ JIT_CODE_CLOSE = 3, // not emitted, unnecessary ++ JIT_CODE_UNWINDING_INFO = 4, // not emitted ++ ++ JIT_CODE_MAX ++}; ++ ++struct LLVMPerfJitHeader { ++ uint32_t Magic; // characters "JiTD" ++ uint32_t Version; // header version ++ uint32_t TotalSize; // total size of header ++ uint32_t ElfMach; // elf mach target ++ uint32_t Pad1; // reserved ++ uint32_t Pid; ++ uint64_t Timestamp; // timestamp ++ uint64_t Flags; // flags ++}; ++ ++// record prefix (mandatory in each record) ++struct LLVMPerfJitRecordPrefix { ++ uint32_t Id; // record type identifier ++ uint32_t TotalSize; ++ uint64_t Timestamp; ++}; ++ ++struct LLVMPerfJitRecordCodeLoad { ++ LLVMPerfJitRecordPrefix Prefix; ++ ++ uint32_t Pid; ++ uint32_t Tid; ++ uint64_t Vma; ++ uint64_t CodeAddr; ++ uint64_t CodeSize; ++ uint64_t CodeIndex; ++}; ++ ++struct LLVMPerfJitDebugEntry { ++ uint64_t Addr; ++ int Lineno; // source line number starting at 1 ++ int Discrim; // column discriminator, 0 is default ++ // followed by null terminated filename, \xff\0 if same as previous entry ++}; ++ ++struct LLVMPerfJitRecordDebugInfo { ++ LLVMPerfJitRecordPrefix Prefix; ++ ++ uint64_t CodeAddr; ++ uint64_t NrEntry; ++ // followed by NrEntry LLVMPerfJitDebugEntry records ++}; ++ ++static inline uint64_t timespec_to_ns(const struct timespec *ts) { ++ const uint64_t NanoSecPerSec = 1000000000; ++ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; ++} ++ ++static inline uint64_t perf_get_timestamp(void) { ++ struct timespec ts; ++ int ret; ++ ++ ret = clock_gettime(CLOCK_MONOTONIC, &ts); ++ if (ret) ++ return 0; ++ ++ return timespec_to_ns(&ts); ++} ++ ++PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) { ++ // check if clock-source is supported ++ if (!perf_get_timestamp()) { ++ errs() << "kernel does not support CLOCK_MONOTONIC\n"; ++ return; ++ } ++ ++ if (!InitDebuggingDir()) { ++ errs() << "could not initialize debugging directory\n"; ++ return; ++ } ++ ++ std::string Filename; ++ raw_string_ostream FilenameBuf(Filename); ++ FilenameBuf << JitPath << "/jit-" << Pid << ".dump"; ++ ++ // Need to open ourselves, because we need to hand the FD to OpenMarker() and ++ // raw_fd_ostream doesn't expose the FD. ++ using sys::fs::openFileForWrite; ++ if (auto EC = ++ openFileForWrite(FilenameBuf.str(), DumpFd, sys::fs::F_RW, 0666)) { ++ errs() << "could not open JIT dump file " << FilenameBuf.str() << ": " ++ << EC.message() << "\n"; ++ return; ++ } ++ ++ Dumpstream = make_unique(DumpFd, true); ++ ++ LLVMPerfJitHeader Header = {0}; ++ if (!FillMachine(Header)) ++ return; ++ ++ // signal this process emits JIT information ++ if (!OpenMarker()) ++ return; ++ ++ // emit dumpstream header ++ Header.Magic = LLVM_PERF_JIT_MAGIC; ++ Header.Version = LLVM_PERF_JIT_VERSION; ++ Header.TotalSize = sizeof(Header); ++ Header.Pid = Pid; ++ Header.Timestamp = perf_get_timestamp(); ++ Dumpstream->write(reinterpret_cast(&Header), sizeof(Header)); ++ ++ // Everything initialized, can do profiling now. ++ if (!Dumpstream->has_error()) ++ SuccessfullyInitialized = true; ++} ++ ++void PerfJITEventListener::NotifyObjectEmitted( ++ const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) { ++ ++ if (!SuccessfullyInitialized) ++ return; ++ ++ OwningBinary DebugObjOwner = L.getObjectForDebug(Obj); ++ const ObjectFile &DebugObj = *DebugObjOwner.getBinary(); ++ ++ // Get the address of the object image for use as a unique identifier ++ std::unique_ptr Context = DWARFContext::create(DebugObj); ++ ++ // Use symbol info to iterate over functions in the object. ++ for (const std::pair &P : computeSymbolSizes(DebugObj)) { ++ SymbolRef Sym = P.first; ++ std::string SourceFileName; ++ ++ Expected SymTypeOrErr = Sym.getType(); ++ if (!SymTypeOrErr) { ++ // There's not much we can with errors here ++ consumeError(SymTypeOrErr.takeError()); ++ continue; ++ } ++ SymbolRef::Type SymType = *SymTypeOrErr; ++ if (SymType != SymbolRef::ST_Function) ++ continue; ++ ++ Expected Name = Sym.getName(); ++ if (!Name) { ++ consumeError(Name.takeError()); ++ continue; ++ } ++ ++ Expected AddrOrErr = Sym.getAddress(); ++ if (!AddrOrErr) { ++ consumeError(AddrOrErr.takeError()); ++ continue; ++ } ++ uint64_t Addr = *AddrOrErr; ++ uint64_t Size = P.second; ++ ++ // According to spec debugging info has to come before loading the ++ // corresonding code load. ++ DILineInfoTable Lines = Context->getLineInfoForAddressRange( ++ Addr, Size, FileLineInfoKind::AbsoluteFilePath); ++ ++ NotifyDebug(Addr, Lines); ++ NotifyCode(Name, Addr, Size); ++ } ++ ++ Dumpstream->flush(); ++} ++ ++void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) { ++ // perf currently doesn't have an interface for unloading. But munmap()ing the ++ // code section does, so that's ok. ++} ++ ++bool PerfJITEventListener::InitDebuggingDir() { ++ time_t Time; ++ struct tm LocalTime; ++ char TimeBuffer[sizeof("YYYYMMDD")]; ++ SmallString<64> Path; ++ ++ // search for location to dump data to ++ if (const char *BaseDir = getenv("JITDUMPDIR")) ++ Path.append(BaseDir); ++ else if (!sys::path::home_directory(Path)) ++ Path = "."; ++ ++ // create debug directory ++ Path += "/.debug/jit/"; ++ if (auto EC = sys::fs::create_directories(Path)) { ++ errs() << "could not create jit cache directory " << Path << ": " ++ << EC.message() << "\n"; ++ return false; ++ } ++ ++ // create unique directory for dump data related to this process ++ time(&Time); ++ localtime_r(&Time, &LocalTime); ++ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); ++ Path += JIT_LANG "-jit-"; ++ Path += TimeBuffer; ++ ++ SmallString<128> UniqueDebugDir; ++ ++ using sys::fs::createUniqueDirectory; ++ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { ++ errs() << "could not create unique jit cache directory " << UniqueDebugDir ++ << ": " << EC.message() << "\n"; ++ return false; ++ } ++ ++ JitPath = UniqueDebugDir.str(); ++ ++ return true; ++} ++ ++bool PerfJITEventListener::OpenMarker() { ++ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap ++ // is captured either live (perf record running when we mmap) or in deferred ++ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump ++ // file for more meta data info about the jitted code. Perf report/annotate ++ // detect this special filename and process the jitdump file. ++ // ++ // Mapping must be PROT_EXEC to ensure it is captured by perf record ++ // even when not using -d option. ++ MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC, ++ MAP_PRIVATE, DumpFd, 0); ++ ++ if (MarkerAddr == MAP_FAILED) { ++ errs() << "could not mmap JIT marker\n"; ++ return false; ++ } ++ return true; ++} ++ ++void PerfJITEventListener::CloseMarker() { ++ if (!MarkerAddr) ++ return; ++ ++ munmap(MarkerAddr, sys::Process::getPageSize()); ++ MarkerAddr = nullptr; ++} ++ ++bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) { ++ char id[16]; ++ struct { ++ uint16_t e_type; ++ uint16_t e_machine; ++ } info; ++ ++ size_t RequiredMemory = sizeof(id) + sizeof(info); ++ ++ ErrorOr> MB = ++ MemoryBuffer::getFileSlice("/proc/self/exe", ++ RequiredMemory, ++ 0); ++ ++ // This'll not guarantee that enough data was actually read from the ++ // underlying file. Instead the trailing part of the buffer would be ++ // zeroed. Given the ELF signature check below that seems ok though, ++ // it's unlikely that the file ends just after that, and the ++ // consequence would just be that perf wouldn't recognize the ++ // signature. ++ if (auto EC = MB.getError()) { ++ errs() << "could not open /proc/self/exe: " << EC.message() << "\n"; ++ return false; ++ } ++ ++ memcpy(&id, (*MB)->getBufferStart(), sizeof(id)); ++ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info)); ++ ++ // check ELF signature ++ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') { ++ errs() << "invalid elf signature\n"; ++ return false; ++ } ++ ++ hdr.ElfMach = info.e_machine; ++ ++ return true; ++} ++ ++void PerfJITEventListener::NotifyCode(Expected &Symbol, ++ uint64_t CodeAddr, uint64_t CodeSize) { ++ assert(SuccessfullyInitialized); ++ ++ // 0 length functions can't have samples. ++ if (CodeSize == 0) ++ return; ++ ++ LLVMPerfJitRecordCodeLoad rec; ++ rec.Prefix.Id = JIT_CODE_LOAD; ++ rec.Prefix.TotalSize = sizeof(rec) + // debug record itself ++ Symbol->size() + 1 + // symbol name ++ CodeSize; // and code ++ rec.Prefix.Timestamp = perf_get_timestamp(); ++ ++ rec.CodeSize = CodeSize; ++ rec.Vma = 0; ++ rec.CodeAddr = CodeAddr; ++ rec.Pid = Pid; ++ rec.Tid = get_threadid(); ++ ++ // avoid interspersing output ++ MutexGuard Guard(Mutex); ++ ++ rec.CodeIndex = CodeGeneration++; // under lock! ++ ++ Dumpstream->write(reinterpret_cast(&rec), sizeof(rec)); ++ Dumpstream->write(Symbol->data(), Symbol->size() + 1); ++ Dumpstream->write(reinterpret_cast(CodeAddr), CodeSize); ++} ++ ++void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, ++ DILineInfoTable Lines) { ++ assert(SuccessfullyInitialized); ++ ++ // Didn't get useful debug info. ++ if (Lines.empty()) ++ return; ++ ++ LLVMPerfJitRecordDebugInfo rec; ++ rec.Prefix.Id = JIT_CODE_DEBUG_INFO; ++ rec.Prefix.TotalSize = sizeof(rec); // will be increased further ++ rec.Prefix.Timestamp = perf_get_timestamp(); ++ rec.CodeAddr = CodeAddr; ++ rec.NrEntry = Lines.size(); ++ ++ // compute total size size of record (variable due to filenames) ++ DILineInfoTable::iterator Begin = Lines.begin(); ++ DILineInfoTable::iterator End = Lines.end(); ++ for (DILineInfoTable::iterator It = Begin; It != End; ++It) { ++ DILineInfo &line = It->second; ++ rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry); ++ rec.Prefix.TotalSize += line.FileName.size() + 1; ++ } ++ ++ // The debug_entry describes the source line information. It is defined as ++ // follows in order: ++ // * uint64_t code_addr: address of function for which the debug information ++ // is generated ++ // * uint32_t line : source file line number (starting at 1) ++ // * uint32_t discrim : column discriminator, 0 is default ++ // * char name[n] : source file name in ASCII, including null termination ++ ++ // avoid interspersing output ++ MutexGuard Guard(Mutex); ++ ++ Dumpstream->write(reinterpret_cast(&rec), sizeof(rec)); ++ ++ for (DILineInfoTable::iterator It = Begin; It != End; ++It) { ++ LLVMPerfJitDebugEntry LineInfo; ++ DILineInfo &Line = It->second; ++ ++ LineInfo.Addr = It->first; ++ // The function re-created by perf is preceded by a elf ++ // header. Need to adjust for that, otherwise the results are ++ // wrong. ++ LineInfo.Addr += 0x40; ++ LineInfo.Lineno = Line.Line; ++ LineInfo.Discrim = Line.Discriminator; ++ ++ Dumpstream->write(reinterpret_cast(&LineInfo), ++ sizeof(LineInfo)); ++ Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1); ++ } ++} ++ ++// There should be only a single event listener per process, otherwise perf gets ++// confused. ++llvm::ManagedStatic PerfListener; ++ ++} // end anonymous namespace ++ ++namespace llvm { ++JITEventListener *JITEventListener::createPerfJITEventListener() { ++ return &*PerfListener; ++} ++ ++} // namespace llvm ++ +-- +2.17.1 + diff --git a/debian/patches/julia/llvm-D50010-VNCoercion-ni.patch b/debian/patches/julia/llvm-D50010-VNCoercion-ni.patch new file mode 100644 index 00000000..cb658d1b --- /dev/null +++ b/debian/patches/julia/llvm-D50010-VNCoercion-ni.patch @@ -0,0 +1,89 @@ +commit 8eb2b102a203d83fb713f3bf79acf235dabdd8cd +Author: Keno Fischer +Date: Mon Jul 30 16:59:08 2018 -0400 + + [VNCoercion] Disallow coercion between different ni addrspaces + + Summary: + I'm not sure if it would be legal by the IR reference to introduce + an addrspacecast here, since the IR reference is a bit vague on + the exact semantics, but at least for our usage of it (and I + suspect for many other's usage) it is not. For us, addrspacecasts + between non-integral address spaces carry frontend information that the + optimizer cannot deduce afterwards in a generic way (though we + have frontend specific passes in our pipline that do propagate + these). In any case, I'm sure nobody is using it this way at + the moment, since it would have introduced inttoptrs, which + are definitely illegal. + + Fixes PR38375 + + Reviewers: sanjoy, reames, dberlin + + Subscribers: llvm-commits + + Differential Revision: https://reviews.llvm.org/D50010 + +diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp +index c3feea6a0a4..735d1e7b792 100644 +--- a/lib/Transforms/Utils/VNCoercion.cpp ++++ b/lib/Transforms/Utils/VNCoercion.cpp +@@ -20,14 +20,21 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, + StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) + return false; + ++ Type *StoredValTy = StoredVal->getType(); ++ + // The store has to be at least as big as the load. + if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) + return false; + +- // Don't coerce non-integral pointers to integers or vice versa. +- if (DL.isNonIntegralPointerType(StoredVal->getType()) != +- DL.isNonIntegralPointerType(LoadTy)) ++ bool StoredNI = DL.isNonIntegralPointerType(StoredValTy); ++ bool LoadNI = DL.isNonIntegralPointerType(LoadTy); ++ if (StoredNI != LoadNI) { + return false; ++ } else if (StoredNI && LoadNI && ++ cast(StoredValTy)->getAddressSpace() != ++ cast(LoadTy)->getAddressSpace()) { ++ return false; ++ } + + return true; + } +diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll +index 9ae4132231d..5217fc1a06a 100644 +--- a/test/Transforms/GVN/non-integral-pointers.ll ++++ b/test/Transforms/GVN/non-integral-pointers.ll +@@ -1,6 +1,6 @@ + ; RUN: opt -gvn -S < %s | FileCheck %s + +-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" ++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5" + target triple = "x86_64-unknown-linux-gnu" + + define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) { +@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) { + alwaysTaken: + ret i64 42 + } ++ ++ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) { ++ ; CHECK-LABEL: @multini( ++ ; CHECK-NOT: inttoptr ++ ; CHECK-NOT: ptrtoint ++ ; CHECK-NOT: addrspacecast ++ entry: ++ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc ++ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken ++ ++ neverTaken: ++ %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)** ++ %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc ++ ret i8 addrspace(5)* %differentas ++ ++ alwaysTaken: ++ ret i8 addrspace(5)* null ++ } diff --git a/debian/patches/julia/llvm-D50167-scev-umin.patch b/debian/patches/julia/llvm-D50167-scev-umin.patch new file mode 100644 index 00000000..fd3d4db2 --- /dev/null +++ b/debian/patches/julia/llvm-D50167-scev-umin.patch @@ -0,0 +1,1143 @@ +commit 556c30af1c797be294edde0ce621884f5acf11f0 +Author: Keno Fischer +Date: Wed Aug 1 20:45:11 2018 -0400 + + RFC: [SCEV] Add explicit representations of umin/smin + + Summary: + Currently we express umin as `~umax(~x, ~y)`. However, this becomes + a problem for operands in non-integral pointer spaces, because `~x` + is not something we can compute for `x` non-integral. However, since + comparisons are generally still allowed, we are actually able to + express `umin(x, y)` directly as long as we don't try to express is + as a umax. Support this by adding an explicit umin/smin representation + to SCEV. We do this by factoring the existing getUMax/getSMax functions + into a new function that does all four. The previous two functions + were largely identical, except that the SMax variant used `isKnownPredicate` + while the UMax variant used `isKnownViaNonRecursiveReasoning`. + + Trying to make the UMax variant also use `isKnownPredicate` yields to + an infinite recursion, while trying to make the `SMax` variant use + `isKnownViaNonRecursiveReasoning` causes + `Transforms/IndVarSimplify/backedge-on-min-max.ll` to fail. + + I would appreciate any insight into which predicate is correct here. + + Reviewers: reames, sanjoy + + Subscribers: javed.absar, llvm-commits + + Differential Revision: https://reviews.llvm.org/D50167 + +Index: llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolution.h +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/include/llvm/Analysis/ScalarEvolution.h ++++ llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolution.h +@@ -582,12 +582,15 @@ public: + /// \p IndexExprs The expressions for the indices. + const SCEV *getGEPExpr(GEPOperator *GEP, + const SmallVectorImpl &IndexExprs); ++ const SCEV *getUSMinMaxExpr(unsigned Kind, SmallVectorImpl &Operands); + const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getSMaxExpr(SmallVectorImpl &Operands); + const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getUMaxExpr(SmallVectorImpl &Operands); + const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS); ++ const SCEV *getSMinExpr(SmallVectorImpl &Operands); + const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS); ++ const SCEV *getUMinExpr(SmallVectorImpl &Operands); + const SCEV *getUnknown(Value *V); + const SCEV *getCouldNotCompute(); + +Index: llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolutionExpander.h +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/include/llvm/Analysis/ScalarEvolutionExpander.h ++++ llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolutionExpander.h +@@ -367,6 +367,10 @@ namespace llvm { + + Value *visitUMaxExpr(const SCEVUMaxExpr *S); + ++ Value *visitSMinExpr(const SCEVSMinExpr *S); ++ ++ Value *visitUMinExpr(const SCEVUMinExpr *S); ++ + Value *visitUnknown(const SCEVUnknown *S) { + return S->getValue(); + } +Index: llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolutionExpressions.h +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/include/llvm/Analysis/ScalarEvolutionExpressions.h ++++ llvm-toolchain-6.0-6.0.1/include/llvm/Analysis/ScalarEvolutionExpressions.h +@@ -40,7 +40,7 @@ class Type; + // These should be ordered in terms of increasing complexity to make the + // folders simpler. + scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr, +- scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, ++ scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr, + scUnknown, scCouldNotCompute + }; + +@@ -187,6 +187,8 @@ class Type; + S->getSCEVType() == scMulExpr || + S->getSCEVType() == scSMaxExpr || + S->getSCEVType() == scUMaxExpr || ++ S->getSCEVType() == scSMinExpr || ++ S->getSCEVType() == scUMinExpr || + S->getSCEVType() == scAddRecExpr; + } + }; +@@ -204,7 +206,9 @@ class Type; + return S->getSCEVType() == scAddExpr || + S->getSCEVType() == scMulExpr || + S->getSCEVType() == scSMaxExpr || +- S->getSCEVType() == scUMaxExpr; ++ S->getSCEVType() == scUMaxExpr || ++ S->getSCEVType() == scSMinExpr || ++ S->getSCEVType() == scUMinExpr; + } + + /// Set flags for a non-recurrence without clearing previously set flags. +@@ -396,6 +400,42 @@ class Type; + } + }; + ++ /// This class represents a signed minimum selection. ++ class SCEVSMinExpr : public SCEVCommutativeExpr { ++ friend class ScalarEvolution; ++ ++ SCEVSMinExpr(const FoldingSetNodeIDRef ID, ++ const SCEV *const *O, size_t N) ++ : SCEVCommutativeExpr(ID, scSMinExpr, O, N) { ++ // Min never overflows. ++ setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); ++ } ++ ++ public: ++ /// Methods for support type inquiry through isa, cast, and dyn_cast: ++ static bool classof(const SCEV *S) { ++ return S->getSCEVType() == scSMinExpr; ++ } ++ }; ++ ++ /// This class represents an unsigned minimum selection. ++ class SCEVUMinExpr : public SCEVCommutativeExpr { ++ friend class ScalarEvolution; ++ ++ SCEVUMinExpr(const FoldingSetNodeIDRef ID, ++ const SCEV *const *O, size_t N) ++ : SCEVCommutativeExpr(ID, scUMinExpr, O, N) { ++ // Min never overflows. ++ setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); ++ } ++ ++ public: ++ /// Methods for support type inquiry through isa, cast, and dyn_cast: ++ static bool classof(const SCEV *S) { ++ return S->getSCEVType() == scUMinExpr; ++ } ++ }; ++ + /// This means that we are dealing with an entirely unknown SCEV + /// value, and only represent it as its LLVM Value. This is the + /// "bottom" value for the analysis. +@@ -468,6 +508,10 @@ class Type; + return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S); + case scUMaxExpr: + return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S); ++ case scSMinExpr: ++ return ((SC*)this)->visitSMinExpr((const SCEVSMinExpr*)S); ++ case scUMinExpr: ++ return ((SC*)this)->visitUMinExpr((const SCEVUMinExpr*)S); + case scUnknown: + return ((SC*)this)->visitUnknown((const SCEVUnknown*)S); + case scCouldNotCompute: +@@ -521,6 +565,8 @@ class Type; + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: ++ case scSMinExpr: ++ case scUMinExpr: + case scAddRecExpr: + for (const auto *Op : cast(S)->operands()) + push(Op); +@@ -683,6 +729,26 @@ class Type; + return !Changed ? Expr : SE.getUMaxExpr(Operands); + } + ++ const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) { ++ SmallVector Operands; ++ bool Changed = false; ++ for (auto *Op : Expr->operands()) { ++ Operands.push_back(((SC *)this)->visit(Op)); ++ Changed |= Op != Operands.back(); ++ } ++ return !Changed ? Expr : SE.getSMinExpr(Operands); ++ } ++ ++ const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) { ++ SmallVector Operands; ++ bool Changed = false; ++ for (auto *Op : Expr->operands()) { ++ Operands.push_back(((SC*)this)->visit(Op)); ++ Changed |= Op != Operands.back(); ++ } ++ return !Changed ? Expr : SE.getUMinExpr(Operands); ++ } ++ + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + return Expr; + } +Index: llvm-toolchain-6.0-6.0.1/lib/Analysis/ScalarEvolution.cpp +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/Analysis/ScalarEvolution.cpp ++++ llvm-toolchain-6.0-6.0.1/lib/Analysis/ScalarEvolution.cpp +@@ -271,7 +271,9 @@ void SCEV::print(raw_ostream &OS) const + case scAddExpr: + case scMulExpr: + case scUMaxExpr: +- case scSMaxExpr: { ++ case scSMaxExpr: ++ case scUMinExpr: ++ case scSMinExpr: { + const SCEVNAryExpr *NAry = cast(this); + const char *OpStr = nullptr; + switch (NAry->getSCEVType()) { +@@ -279,6 +281,8 @@ void SCEV::print(raw_ostream &OS) const + case scMulExpr: OpStr = " * "; break; + case scUMaxExpr: OpStr = " umax "; break; + case scSMaxExpr: OpStr = " smax "; break; ++ case scUMinExpr: OpStr = " umin "; break; ++ case scSMinExpr: OpStr = " smin "; break; + } + OS << "("; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); +@@ -347,6 +351,8 @@ Type *SCEV::getType() const { + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: ++ case scUMinExpr: ++ case scSMinExpr: + return cast(this)->getType(); + case scAddExpr: + return cast(this)->getType(); +@@ -718,7 +724,9 @@ static int CompareSCEVComplexity( + case scAddExpr: + case scMulExpr: + case scSMaxExpr: +- case scUMaxExpr: { ++ case scUMaxExpr: ++ case scSMinExpr: ++ case scUMinExpr: { + const SCEVNAryExpr *LC = cast(LHS); + const SCEVNAryExpr *RC = cast(RHS); + +@@ -922,6 +930,8 @@ public: + void visitUDivExpr(const SCEVUDivExpr *Numerator) {} + void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} + void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} ++ void visitSMinExpr(const SCEVSMinExpr *Numerator) {} ++ void visitUMinExpr(const SCEVUMinExpr *Numerator) {} + void visitUnknown(const SCEVUnknown *Numerator) {} + void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} + +@@ -2276,6 +2286,8 @@ bool ScalarEvolution::isAvailableAtLoopE + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: ++ case scUMinExpr: ++ case scSMinExpr: + case scUDivExpr: + return true; + case scUnknown: +@@ -3405,23 +3417,20 @@ ScalarEvolution::getGEPExpr(GEPOperator + return getAddExpr(BaseExpr, TotalOffset, Wrap); + } + +-const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, +- const SCEV *RHS) { +- SmallVector Ops = {LHS, RHS}; +- return getSMaxExpr(Ops); +-} +- + const SCEV * +-ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { +- assert(!Ops.empty() && "Cannot get empty smax!"); ++ScalarEvolution::getUSMinMaxExpr(unsigned Kind, SmallVectorImpl &Ops) { ++ assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); + if (Ops.size() == 1) return Ops[0]; + #ifndef NDEBUG + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && +- "SCEVSMaxExpr operand types don't match!"); ++ "Operand types don't match!"); + #endif + ++ bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr; ++ bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr; ++ + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, &LI, DT); + +@@ -3430,61 +3439,85 @@ ScalarEvolution::getSMaxExpr(SmallVector + if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); ++ auto &FoldOp = ++ Kind == scSMaxExpr ? APIntOps::smax : ++ Kind == scSMinExpr ? APIntOps::smin : ++ Kind == scUMaxExpr ? APIntOps::umax : ++ APIntOps::umin; + while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get( +- getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt())); ++ getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt())); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast(Ops[0]); + } + +- // If we are left with a constant minimum-int, strip it off. +- if (cast(Ops[0])->getValue()->isMinValue(true)) { +- Ops.erase(Ops.begin()); +- --Idx; +- } else if (cast(Ops[0])->getValue()->isMaxValue(true)) { +- // If we have an smax with a constant maximum-int, it will always be +- // maximum-int. +- return Ops[0]; ++ if (IsMax) { ++ // If we are left with a constant minimum-int, strip it off. ++ if (cast(Ops[0])->getValue()->isMinValue(IsSigned)) { ++ Ops.erase(Ops.begin()); ++ --Idx; ++ } else if (cast(Ops[0])->getValue()->isMaxValue(IsSigned)) { ++ // If we have an smax with a constant maximum-int, it will always be ++ // maximum-int. ++ return Ops[0]; ++ } ++ } else { ++ // If we are left with a constant maximum-int, strip it off. ++ if (cast(Ops[0])->getValue()->isMaxValue(IsSigned)) { ++ Ops.erase(Ops.begin()); ++ --Idx; ++ } else if (cast(Ops[0])->getValue()->isMinValue(IsSigned)) { ++ // If we have an smax with a constant minimum-int, it will always be ++ // maximum-int. ++ return Ops[0]; ++ } + } + + if (Ops.size() == 1) return Ops[0]; + } + +- // Find the first SMax +- while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) ++ // Find the first operation of the same kind ++ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() != Kind) + ++Idx; + + // Check to see if one of the operands is an SMax. If so, expand its operands + // onto our operand list, and recurse to simplify. + if (Idx < Ops.size()) { +- bool DeletedSMax = false; +- while (const SCEVSMaxExpr *SMax = dyn_cast(Ops[Idx])) { ++ bool DeletedAny = false; ++ while (Ops[Idx]->getSCEVType() == Kind) { ++ const SCEVCommutativeExpr *SCE = cast(Ops[Idx]); + Ops.erase(Ops.begin()+Idx); +- Ops.append(SMax->op_begin(), SMax->op_end()); +- DeletedSMax = true; ++ Ops.append(SCE->op_begin(), SCE->op_end()); ++ DeletedAny = true; + } + +- if (DeletedSMax) +- return getSMaxExpr(Ops); ++ if (DeletedAny) ++ return getUSMinMaxExpr(Kind, Ops); + } + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, delete one. Since we sorted the list, these values are required to + // be adjacent. +- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) +- // X smax Y smax Y --> X smax Y +- // X smax Y --> X, if X is always greater than Y +- if (Ops[i] == Ops[i+1] || +- isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { +- Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); +- --i; --e; +- } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { +- Ops.erase(Ops.begin()+i, Ops.begin()+i+1); +- --i; --e; +- } ++ llvm::CmpInst::Predicate GEPred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; ++ llvm::CmpInst::Predicate LEPred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; ++ llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred; ++ llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred; ++ for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) { ++ if (Ops[i] == Ops[i+1] || ++ isKnownPredicate(FirstPred, Ops[i], Ops[i+1])) { ++ // X op Y op Y --> X op Y ++ // X op Y --> X, if we know X, Y are ordered appropriately ++ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); ++ --i; --e; ++ } else if (isKnownPredicate(SecondPred, Ops[i], Ops[i+1])) { ++ // X op Y --> Y, if we know X, Y are ordered appropriately ++ Ops.erase(Ops.begin()+i, Ops.begin()+i+1); ++ --i; --e; ++ } ++ } + + if (Ops.size() == 1) return Ops[0]; + +@@ -3493,132 +3526,73 @@ ScalarEvolution::getSMaxExpr(SmallVector + // Okay, it looks like we really DO need an smax expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; +- ID.AddInteger(scSMaxExpr); ++ ID.AddInteger(Kind); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = nullptr; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + const SCEV **O = SCEVAllocator.Allocate(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); +- SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), +- O, Ops.size()); ++ SCEV *S = nullptr; ++ ++ if (Kind == scSMaxExpr) { ++ S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), ++ O, Ops.size()); ++ } else if (Kind == scUMaxExpr) { ++ S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), ++ O, Ops.size()); ++ } else if (Kind == scSMinExpr) { ++ S = new (SCEVAllocator) SCEVSMinExpr(ID.Intern(SCEVAllocator), ++ O, Ops.size()); ++ } else { ++ assert(Kind == scUMinExpr); ++ S = new (SCEVAllocator) SCEVUMinExpr(ID.Intern(SCEVAllocator), ++ O, Ops.size()); ++ } ++ + UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); + return S; + } + +-const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, ++const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, + const SCEV *RHS) { + SmallVector Ops = {LHS, RHS}; +- return getUMaxExpr(Ops); ++ return getSMaxExpr(Ops); + } + +-const SCEV * +-ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { +- assert(!Ops.empty() && "Cannot get empty umax!"); +- if (Ops.size() == 1) return Ops[0]; +-#ifndef NDEBUG +- Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); +- for (unsigned i = 1, e = Ops.size(); i != e; ++i) +- assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && +- "SCEVUMaxExpr operand types don't match!"); +-#endif +- +- // Sort by complexity, this groups all similar expression types together. +- GroupByComplexity(Ops, &LI, DT); +- +- // If there are any constants, fold them together. +- unsigned Idx = 0; +- if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { +- ++Idx; +- assert(Idx < Ops.size()); +- while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { +- // We found two constants, fold them together! +- ConstantInt *Fold = ConstantInt::get( +- getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt())); +- Ops[0] = getConstant(Fold); +- Ops.erase(Ops.begin()+1); // Erase the folded element +- if (Ops.size() == 1) return Ops[0]; +- LHSC = cast(Ops[0]); +- } +- +- // If we are left with a constant minimum-int, strip it off. +- if (cast(Ops[0])->getValue()->isMinValue(false)) { +- Ops.erase(Ops.begin()); +- --Idx; +- } else if (cast(Ops[0])->getValue()->isMaxValue(false)) { +- // If we have an umax with a constant maximum-int, it will always be +- // maximum-int. +- return Ops[0]; +- } +- +- if (Ops.size() == 1) return Ops[0]; +- } +- +- // Find the first UMax +- while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) +- ++Idx; +- +- // Check to see if one of the operands is a UMax. If so, expand its operands +- // onto our operand list, and recurse to simplify. +- if (Idx < Ops.size()) { +- bool DeletedUMax = false; +- while (const SCEVUMaxExpr *UMax = dyn_cast(Ops[Idx])) { +- Ops.erase(Ops.begin()+Idx); +- Ops.append(UMax->op_begin(), UMax->op_end()); +- DeletedUMax = true; +- } +- +- if (DeletedUMax) +- return getUMaxExpr(Ops); +- } +- +- // Okay, check to see if the same value occurs in the operand list twice. If +- // so, delete one. Since we sorted the list, these values are required to +- // be adjacent. +- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) +- // X umax Y umax Y --> X umax Y +- // X umax Y --> X, if X is always greater than Y +- if (Ops[i] == Ops[i+1] || +- isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) { +- Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); +- --i; --e; +- } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) { +- Ops.erase(Ops.begin()+i, Ops.begin()+i+1); +- --i; --e; +- } +- +- if (Ops.size() == 1) return Ops[0]; ++const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { ++ return getUSMinMaxExpr(scSMaxExpr, Ops); ++} + +- assert(!Ops.empty() && "Reduced umax down to nothing!"); ++const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, ++ const SCEV *RHS) { ++ SmallVector Ops = {LHS, RHS}; ++ return getUMaxExpr(Ops); ++} + +- // Okay, it looks like we really DO need a umax expr. Check to see if we +- // already have one, otherwise create a new one. +- FoldingSetNodeID ID; +- ID.AddInteger(scUMaxExpr); +- for (unsigned i = 0, e = Ops.size(); i != e; ++i) +- ID.AddPointer(Ops[i]); +- void *IP = nullptr; +- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; +- const SCEV **O = SCEVAllocator.Allocate(Ops.size()); +- std::uninitialized_copy(Ops.begin(), Ops.end(), O); +- SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), +- O, Ops.size()); +- UniqueSCEVs.InsertNode(S, IP); +- addToLoopUseLists(S); +- return S; ++const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { ++ return getUSMinMaxExpr(scUMaxExpr, Ops); + } + + const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, + const SCEV *RHS) { +- // ~smax(~x, ~y) == smin(x, y). +- return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); ++ SmallVector Ops = { LHS, RHS }; ++ return getSMinExpr(Ops); ++} ++ ++const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl &Ops) { ++ return getUSMinMaxExpr(scSMinExpr, Ops); + } + + const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, + const SCEV *RHS) { +- // ~umax(~x, ~y) == umin(x, y) +- return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); ++ SmallVector Ops = { LHS, RHS }; ++ return getUMinExpr(Ops); ++} ++ ++const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl &Ops) { ++ return getUSMinMaxExpr(scUMinExpr, Ops); + } + + const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { +@@ -5002,6 +4976,7 @@ static bool IsAvailableOnEntry(const Loo + switch (S->getSCEVType()) { + case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: + case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: ++ case scUMinExpr: case scSMinExpr: + // These expressions are available if their operand(s) is/are. + return true; + +@@ -7885,7 +7860,9 @@ static Constant *BuildConstantFromSCEV(c + } + case scSMaxExpr: + case scUMaxExpr: +- break; // TODO: smax, umax. ++ case scSMinExpr: ++ case scUMinExpr: ++ break; // TODO: smax, umax, smin, umax. + } + return nullptr; + } +@@ -8015,6 +7992,10 @@ const SCEV *ScalarEvolution::computeSCEV + return getSMaxExpr(NewOps); + if (isa(Comm)) + return getUMaxExpr(NewOps); ++ if (isa(Comm)) ++ return getSMinExpr(NewOps); ++ if (isa(Comm)) ++ return getUMinExpr(NewOps); + llvm_unreachable("Unknown commutative SCEV type!"); + } + } +@@ -10998,7 +10979,9 @@ ScalarEvolution::computeLoopDisposition( + case scAddExpr: + case scMulExpr: + case scUMaxExpr: +- case scSMaxExpr: { ++ case scSMaxExpr: ++ case scUMinExpr: ++ case scSMinExpr: { + bool HasVarying = false; + for (auto *Op : cast(S)->operands()) { + LoopDisposition D = getLoopDisposition(Op, L); +@@ -11085,7 +11068,9 @@ ScalarEvolution::computeBlockDisposition + case scAddExpr: + case scMulExpr: + case scUMaxExpr: +- case scSMaxExpr: { ++ case scSMaxExpr: ++ case scUMinExpr: ++ case scSMinExpr: { + const SCEVNAryExpr *NAry = cast(S); + bool Proper = true; + for (const SCEV *NAryOp : NAry->operands()) { +Index: llvm-toolchain-6.0-6.0.1/lib/Analysis/ScalarEvolutionExpander.cpp +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/Analysis/ScalarEvolutionExpander.cpp ++++ llvm-toolchain-6.0-6.0.1/lib/Analysis/ScalarEvolutionExpander.cpp +@@ -1629,14 +1629,15 @@ Value *SCEVExpander::visitSMaxExpr(const + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. +- if (S->getOperand(i)->getType() != Ty) { ++ Type *OpTy = S->getOperand(i)->getType(); ++ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); + rememberInstruction(ICmp); +- Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); ++ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); + rememberInstruction(Sel); + LHS = Sel; + } +@@ -1653,13 +1654,64 @@ Value *SCEVExpander::visitUMaxExpr(const + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. +- if (S->getOperand(i)->getType() != Ty) { ++ Type *OpTy = S->getOperand(i)->getType(); ++ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); + rememberInstruction(ICmp); ++ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); ++ rememberInstruction(Sel); ++ LHS = Sel; ++ } ++ // In the case of mixed integer and pointer types, cast the ++ // final result back to the pointer type. ++ if (LHS->getType() != S->getType()) ++ LHS = InsertNoopCastOfTo(LHS, S->getType()); ++ return LHS; ++} ++ ++Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { ++ Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); ++ Type *Ty = LHS->getType(); ++ for (int i = S->getNumOperands()-2; i >= 0; --i) { ++ // In the case of mixed integer and pointer types, do the ++ // rest of the comparisons as integer. ++ Type *OpTy = S->getOperand(i)->getType(); ++ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { ++ Ty = SE.getEffectiveSCEVType(Ty); ++ LHS = InsertNoopCastOfTo(LHS, Ty); ++ } ++ Value *RHS = expandCodeFor(S->getOperand(i), Ty); ++ Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); ++ rememberInstruction(ICmp); ++ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); ++ rememberInstruction(Sel); ++ LHS = Sel; ++ } ++ // In the case of mixed integer and pointer types, cast the ++ // final result back to the pointer type. ++ if (LHS->getType() != S->getType()) ++ LHS = InsertNoopCastOfTo(LHS, S->getType()); ++ return LHS; ++} ++ ++Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { ++ Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); ++ Type *Ty = LHS->getType(); ++ for (int i = S->getNumOperands()-2; i >= 0; --i) { ++ // In the case of mixed integer and pointer types, do the ++ // rest of the comparisons as integer. ++ Type *OpTy = S->getOperand(i)->getType(); ++ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { ++ Ty = SE.getEffectiveSCEVType(Ty); ++ LHS = InsertNoopCastOfTo(LHS, Ty); ++ } ++ Value *RHS = expandCodeFor(S->getOperand(i), Ty); ++ Value *ICmp = Builder.CreateICmpULT(LHS, RHS); ++ rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + rememberInstruction(Sel); + LHS = Sel; +Index: llvm-toolchain-6.0-6.0.1/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll +=================================================================== +--- /dev/null ++++ llvm-toolchain-6.0-6.0.1/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll +@@ -0,0 +1,50 @@ ++; RUN: opt -loop-versioning -S < %s | FileCheck %s ++ ++; NB: addrspaces 10-13 are non-integral ++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" ++ ++%jl_value_t = type opaque ++%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 } ++ ++define void @"japi1_permutedims!_33509"(%jl_value_t addrspace(10)**) { ++; CHECK: [[CMP:%[^ ]*]] = icmp ult double addrspace(13)* [[A:%[^ ]*]], [[B:%[^ ]*]] ++; CHECK: [[SELECT:%[^ ]*]] = select i1 %18, double addrspace(13)* [[A]], double addrspace(13)* [[B]] ++top: ++ %1 = alloca [3 x i64], align 8 ++ %2 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8 ++ %3 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, i64 1 ++ %4 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %3, align 8 ++ %5 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 0 ++ store i64 1, i64* %5, align 8 ++ %6 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 1 ++ %7 = load i64, i64* inttoptr (i64 24 to i64*), align 8 ++ %8 = addrspacecast %jl_value_t addrspace(10)* %4 to %jl_value_t addrspace(11)* ++ %9 = bitcast %jl_value_t addrspace(11)* %8 to double addrspace(13)* addrspace(11)* ++ %10 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %9, align 8 ++ %11 = addrspacecast %jl_value_t addrspace(10)* %2 to %jl_value_t addrspace(11)* ++ %12 = bitcast %jl_value_t addrspace(11)* %11 to double addrspace(13)* addrspace(11)* ++ %13 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %12, align 8 ++ %14 = load i64, i64* %6, align 8 ++ br label %L74 ++ ++L74: ++ %value_phi20 = phi i64 [ 1, %top ], [ %22, %L74 ] ++ %value_phi21 = phi i64 [ 1, %top ], [ %23, %L74 ] ++ %value_phi22 = phi i64 [ 1, %top ], [ %25, %L74 ] ++ %15 = add i64 %value_phi21, -1 ++ %16 = getelementptr inbounds double, double addrspace(13)* %10, i64 %15 ++ %17 = bitcast double addrspace(13)* %16 to i64 addrspace(13)* ++ %18 = load i64, i64 addrspace(13)* %17, align 8 ++ %19 = add i64 %value_phi20, -1 ++ %20 = getelementptr inbounds double, double addrspace(13)* %13, i64 %19 ++ %21 = bitcast double addrspace(13)* %20 to i64 addrspace(13)* ++ store i64 %18, i64 addrspace(13)* %21, align 8 ++ %22 = add i64 %value_phi20, 1 ++ %23 = add i64 %14, %value_phi21 ++ %24 = icmp eq i64 %value_phi22, %7 ++ %25 = add i64 %value_phi22, 1 ++ br i1 %24, label %L94, label %L74 ++ ++L94: ++ ret void ++} +Index: llvm-toolchain-6.0-6.0.1/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll ++++ llvm-toolchain-6.0-6.0.1/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll +@@ -58,7 +58,7 @@ for.end: + + ; Here it is not obvious what the limits are, since 'step' could be negative. + +-; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a))))) ++; CHECK: Low: ((60000 + %a) umin (60000 + (-40000 * %step) + %a)) + ; CHECK: High: (4 + ((60000 + %a) umax (60000 + (-40000 * %step) + %a))) + + define void @g(i64 %step) { +Index: llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll ++++ llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll +@@ -22,5 +22,5 @@ afterfor: ; preds = %forinc, %entry + ret i32 %j.0.lcssa + } + +-; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %{{[xy]}})) smax (-1 + (-1 * %{{[xy]}})))) ++; CHECK: backedge-taken count is (-2147483633 + (-1 * (%x smin %y))) + +Index: llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/min-max-exprs.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/ScalarEvolution/min-max-exprs.ll ++++ llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/min-max-exprs.ll +@@ -33,7 +33,7 @@ bb2: + %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6 + ; min(N, i+3) + ; CHECK: select i1 %tmp4, i64 %tmp5, i64 %tmp6 +-; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64)))))) ++; CHECK-NEXT: --> ((sext i32 {3,+,1}<%bb1> to i64) smin (sext i32 %N to i64)) + %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9 + %tmp12 = load i32, i32* %tmp11, align 4 + %tmp13 = shl nsw i32 %tmp12, 1 +Index: llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/pr28705.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/ScalarEvolution/pr28705.ll ++++ llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/pr28705.ll +@@ -5,7 +5,7 @@ + ; with "%.sroa.speculated + 1". + ; + ; CHECK-LABEL: @foo( +-; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1 ++; CHECK: %[[EXIT:.+]] = add i32 %.sroa.speculated, 1 + ; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ] + ; + define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr { +Index: llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/predicated-trip-count.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/ScalarEvolution/predicated-trip-count.ll ++++ llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/predicated-trip-count.ll +@@ -80,7 +80,7 @@ return: ; preds = %bb5 + ; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) + ; CHECK: Loop %bb3: Unpredictable backedge-taken count. + ; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. +-; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M)))) ++; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (sext i16 %Start to i32) + (-1 * ((1 + (sext i16 %Start to i32)) smin %M))) + ; CHECK-NEXT: Predicates: + ; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: + +Index: llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/trip-count3.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Analysis/ScalarEvolution/trip-count3.ll ++++ llvm-toolchain-6.0-6.0.1/test/Analysis/ScalarEvolution/trip-count3.ll +@@ -4,7 +4,7 @@ + ; dividing by the stride will have a remainder. This could theoretically + ; be teaching it how to use a more elaborate trip count computation. + +-; CHECK: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64) ++; CHECK: Loop %bb3.i: backedge-taken count is ((63 + (-1 * (63 smin %0)) + %0) /u 64) + ; CHECK: Loop %bb3.i: max backedge-taken count is 33554431 + + %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/conjunctive-checks.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/conjunctive-checks.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/conjunctive-checks.ll +@@ -4,16 +4,6 @@ define void @f_0(i32 *%arr, i32 *%a_len_ + ; CHECK-LABEL: @f_0( + + ; CHECK: loop.preheader: +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len +-; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] +-; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] +-; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] +-; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 +-; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 +-; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] +-; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit +- + ; CHECK: loop.preheader2: + ; CHECK: br label %loop + +@@ -57,14 +47,10 @@ define void @f_1( + ; CHECK-LABEL: @f_1( + + ; CHECK: loop.preheader: +-; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b +-; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a +-; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] +-; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] +-; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] +-; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] ++; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a ++; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a ++; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n ++; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n + ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 + ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 + +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/decrementing-loop.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/decrementing-loop.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/decrementing-loop.ll +@@ -28,11 +28,8 @@ define void @decrementing_loop(i32 *%arr + ret void + + ; CHECK: loop.preheader: +-; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] +-; CHECK: [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]] +-; CHECK: [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]] ++; CHECK: [[len_hiclamp_cmp:[^ ]+]] = icmp slt i32 %len, %n ++; CHECK: [[len_hiclamp:[^ ]+]] = select i1 [[len_hiclamp_cmp]], i32 %len, i32 %n + ; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0 + ; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0 + ; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1 +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/multiple-access-no-preloop.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/multiple-access-no-preloop.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/multiple-access-no-preloop.ll +@@ -37,14 +37,10 @@ define void @multiple_access_no_preloop( + ; CHECK-LABEL: @multiple_access_no_preloop( + + ; CHECK: loop.preheader: +-; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b +-; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a +-; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] +-; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] +-; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] +-; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] ++; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a ++; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a ++; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n ++; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n + ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 + ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 + +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/ranges_of_different_types.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/ranges_of_different_types.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/ranges_of_different_types.ll +@@ -22,12 +22,11 @@ define void @test_01(i32* %arr, i32* %a_ + ; CHECK-NOT: preloop + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +-; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13 ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 ++; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 + ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at + ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit + ; CHECK: loop +@@ -82,13 +81,11 @@ define void @test_02(i32* %arr, i32* %a_ + ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 + ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 + ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 +-; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] +-; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +-; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 ++; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101 ++; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0 + ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader + ; CHECK: loop.preloop: + ; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ] +@@ -150,14 +147,11 @@ define void @test_03(i32* %arr, i32* %a_ + ; CHECK-NOT: preloop + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 +-; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 +-; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 +-; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 +-; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 ++; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 ++; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 + ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at + ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit + ; CHECK: postloop: +@@ -207,10 +201,9 @@ define void @test_04(i32* %arr, i32* %a_ + ; CHECK-LABEL: test_04( + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13 ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 + ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader + ; CHECK: in.bounds.preloop: + ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +@@ -251,12 +244,11 @@ define void @test_05(i32* %arr, i32* %a_ + ; CHECK-NOT: preloop + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +-; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13 ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 ++; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 + ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at + ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit + ; CHECK: loop +@@ -296,13 +288,11 @@ define void @test_06(i32* %arr, i32* %a_ + ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 + ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 + ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 +-; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] +-; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +-; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 ++; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101 ++; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0 + ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader + ; CHECK: in.bounds.preloop: + ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +@@ -343,14 +333,11 @@ define void @test_07(i32* %arr, i32* %a_ + ; CHECK-NOT: preloop + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 +-; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 +-; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 +-; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 +-; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 ++; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 ++; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 + ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at + ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit + ; CHECK: loop +@@ -387,10 +374,9 @@ define void @test_08(i32* %arr, i32* %a_ + ; CHECK-LABEL: test_08( + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13 ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 + ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader + ; CHECK: in.bounds.preloop: + ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/single-access-no-preloop.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/single-access-no-preloop.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/single-access-no-preloop.ll +@@ -85,11 +85,9 @@ define void @single_access_no_preloop_wi + ; CHECK-LABEL: @single_access_no_preloop_with_offset( + + ; CHECK: loop.preheader: +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len +-; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] +-; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] +-; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ++; CHECK: [[safe_range_end:[^ ]+]] = add i32 %len, -4 ++; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]] ++; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[safe_range_end]] + ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 + ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 + ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/single-access-with-preloop.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/IRCE/single-access-with-preloop.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/IRCE/single-access-with-preloop.ll +@@ -33,11 +33,9 @@ define void @single_access_with_preloop( + ; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647 + ; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647 + ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. +-; CHECK: [[not_safe_start:[^ ]+]] = add i32 [[safe_offset_preloop]], -1 +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]] +-; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]] +-; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]] ++; CHECK: [[safe_start:[^ ]+]] = sub i32 0, [[safe_offset_preloop]] ++; CHECK: [[exit_preloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_start]] ++; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = select i1 [[exit_preloop_at_cond_loclamp]], i32 %n, i32 [[safe_start]] + ; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0 + ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0 + +@@ -45,17 +43,15 @@ define void @single_access_with_preloop( + ; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647 + ; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]] + ; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]] +-; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1 + ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. +-; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len +-; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]] +-; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]] ++; CHECK: [[safe_upper_end:[^ ]+]] = sub i32 %len, [[safe_offset_mainloop]] ++; CHECK: [[exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_upper_end]] ++; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_loclamp]], i32 %n, i32 [[safe_upper_end]] + ; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0 + ; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0 +-; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648 +-; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]] +-; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]] +-; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]] ++; CHECK: [[safe_lower_end:[^ ]+]] = sub i32 2147483647, [[safe_offset_mainloop_2]] ++; CHECK: [[exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp slt i32 [[exit_mainloop_at_loclamp]], [[safe_lower_end]] ++; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_hiclamp]], i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]] + ; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0 + ; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_hiclamp]], i32 0 + +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +@@ -14,8 +14,6 @@ target datalayout = "e-p:64:64:64-i1:8:8 + ; current LSR cost model. + ; CHECK-NOT: = ptrtoint i8* undef to i64 + ; CHECK: .lr.ph +-; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1 +-; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}} + ; CHECK: ret void + define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 { + bb: diff --git a/debian/patches/julia/llvm-PPC-addrspaces.patch b/debian/patches/julia/llvm-PPC-addrspaces.patch new file mode 100644 index 00000000..74836576 --- /dev/null +++ b/debian/patches/julia/llvm-PPC-addrspaces.patch @@ -0,0 +1,26 @@ +From 15899eaab58e96bb7bbe7a14099674e255656a50 Mon Sep 17 00:00:00 2001 +From: Valentin Churavy +Date: Fri, 23 Feb 2018 14:41:20 -0500 +Subject: [PATCH] Make AddrSpaceCast noops on PPC + +PPC as AArch64 doesn't have address-spaces so we can drop them in the backend +--- + lib/Target/PowerPC/PPCISelLowering.h | 5 +++++ + 1 file changed, 5 insertions(+) + +Index: llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/Target/PowerPC/PPCISelLowering.h ++++ llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h +@@ -889,6 +889,11 @@ namespace llvm { + return true; + } + ++ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { ++ // Addrspacecasts are always noops. ++ return true; ++ } ++ + bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, + SelectionDAG &DAG, + ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; diff --git a/debian/patches/julia/llvm-rL326967-aligned-load.patch b/debian/patches/julia/llvm-rL326967-aligned-load.patch new file mode 100644 index 00000000..62c11230 --- /dev/null +++ b/debian/patches/julia/llvm-rL326967-aligned-load.patch @@ -0,0 +1,301 @@ +commit b398d8e1fa5a5a914957fa22d0a64db97f6c265e +Author: Craig Topper +Date: Thu Mar 8 00:21:17 2018 +0000 + + [X86] Fix some isel patterns that used aligned vector load instructions with unaligned predicates. + + These patterns weren't checking the alignment of the load, but were using the aligned instructions. This will cause a GP fault if the data isn't aligned. + + I believe these were introduced in r312450. + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326967 91177308-0d34-0410-b5e6-96231b3b80d8 + +diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td +index db3dfe56531..50c7763a2c3 100644 +--- a/lib/Target/X86/X86InstrVecCompiler.td ++++ b/lib/Target/X86/X86InstrVecCompiler.td +@@ -261,10 +261,10 @@ let Predicates = [HasVLX] in { + // will zero the upper bits. + // TODO: Is there a safe way to detect whether the producing instruction + // already zeroed the upper bits? +-multiclass subvector_zero_lowering { ++multiclass subvector_zero_lowering { + def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)), + (SrcTy RC:$src), (iPTR 0))), + (SUBREG_TO_REG (i64 0), +@@ -274,91 +274,91 @@ multiclass subvector_zero_lowering("VMOV"#MoveStr#"rm") addr:$src), SubIdx)>; ++ (!cast("VMOV"#LoadStr#"rm") addr:$src), SubIdx)>; + } + + let Predicates = [HasAVX, NoVLX] in { +- defm : subvector_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, loadv2f64, +- sub_xmm>; +- defm : subvector_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, loadv4f32, +- sub_xmm>; +- defm : subvector_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, loadv2i64, +- sub_xmm>; +- defm : subvector_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, loadv2i64, +- sub_xmm>; +- defm : subvector_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, loadv2i64, +- sub_xmm>; +- defm : subvector_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, loadv2i64, +- sub_xmm>; +-} +- +-let Predicates = [HasVLX] in { +- defm : subvector_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, ++ defm : subvector_zero_lowering<"APD", "UPD", VR128, v4f64, v2f64, v8i32, + loadv2f64, sub_xmm>; +- defm : subvector_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, ++ defm : subvector_zero_lowering<"APS", "UPS", VR128, v8f32, v4f32, v8i32, + loadv4f32, sub_xmm>; +- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, ++ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v4i64, v2i64, v8i32, + loadv2i64, sub_xmm>; +- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, ++ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i32, v4i32, v8i32, + loadv2i64, sub_xmm>; +- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, ++ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i16, v8i16, v8i32, + loadv2i64, sub_xmm>; +- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, +- loadv2i64, sub_xmm>; +- +- defm : subvector_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, +- loadv2f64, sub_xmm>; +- defm : subvector_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, +- loadv4f32, sub_xmm>; +- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, +- loadv2i64, sub_xmm>; +- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, +- loadv2i64, sub_xmm>; +- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, +- loadv2i64, sub_xmm>; +- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, ++ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i8, v16i8, v8i32, + loadv2i64, sub_xmm>; ++} + +- defm : subvector_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, +- loadv4f64, sub_ymm>; +- defm : subvector_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, +- loadv8f32, sub_ymm>; +- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, +- loadv4i64, sub_ymm>; +- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, +- loadv4i64, sub_ymm>; +- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, +- loadv4i64, sub_ymm>; +- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, +- loadv4i64, sub_ymm>; ++let Predicates = [HasVLX] in { ++ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v4f64, ++ v2f64, v8i32, loadv2f64, sub_xmm>; ++ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v8f32, ++ v4f32, v8i32, loadv4f32, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v4i64, ++ v2i64, v8i32, loadv2i64, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i32, ++ v4i32, v8i32, loadv2i64, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i16, ++ v8i16, v8i32, loadv2i64, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i8, ++ v16i8, v8i32, loadv2i64, sub_xmm>; ++ ++ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v8f64, ++ v2f64, v16i32, loadv2f64, sub_xmm>; ++ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v16f32, ++ v4f32, v16i32, loadv4f32, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i64, ++ v2i64, v16i32, loadv2i64, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i32, ++ v4i32, v16i32, loadv2i64, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i16, ++ v8i16, v16i32, loadv2i64, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v64i8, ++ v16i8, v16i32, loadv2i64, sub_xmm>; ++ ++ defm : subvector_zero_lowering<"APDZ256", "UPDZ256", VR256X, v8f64, ++ v4f64, v16i32, loadv4f64, sub_ymm>; ++ defm : subvector_zero_lowering<"APSZ256", "UPDZ256", VR256X, v16f32, ++ v8f32, v16i32, loadv8f32, sub_ymm>; ++ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v8i64, ++ v4i64, v16i32, loadv4i64, sub_ymm>; ++ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v16i32, ++ v8i32, v16i32, loadv4i64, sub_ymm>; ++ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v32i16, ++ v16i16, v16i32, loadv4i64, sub_ymm>; ++ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v64i8, ++ v32i8, v16i32, loadv4i64, sub_ymm>; + } + + let Predicates = [HasAVX512, NoVLX] in { +- defm : subvector_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, loadv2f64, +- sub_xmm>; +- defm : subvector_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, loadv4f32, +- sub_xmm>; +- defm : subvector_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, loadv2i64, +- sub_xmm>; +- defm : subvector_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, loadv2i64, +- sub_xmm>; +- defm : subvector_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, loadv2i64, +- sub_xmm>; +- defm : subvector_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, loadv2i64, +- sub_xmm>; +- +- defm : subvector_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, +- loadv4f64, sub_ymm>; +- defm : subvector_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, +- loadv8f32, sub_ymm>; +- defm : subvector_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, +- loadv4i64, sub_ymm>; +- defm : subvector_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, +- loadv4i64, sub_ymm>; +- defm : subvector_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, +- loadv4i64, sub_ymm>; +- defm : subvector_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, +- loadv4i64, sub_ymm>; ++ defm : subvector_zero_lowering<"APD", "UPD", VR128, v8f64, v2f64, ++ v16i32,loadv2f64, sub_xmm>; ++ defm : subvector_zero_lowering<"APS", "UPS", VR128, v16f32, v4f32, ++ v16i32, loadv4f32, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i64, v2i64, ++ v16i32, loadv2i64, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i32, v4i32, ++ v16i32, loadv2i64, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i16, v8i16, ++ v16i32, loadv2i64, sub_xmm>; ++ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v64i8, v16i8, ++ v16i32, loadv2i64, sub_xmm>; ++ ++ defm : subvector_zero_lowering<"APDY", "UPDY", VR256, v8f64, v4f64, ++ v16i32, loadv4f64, sub_ymm>; ++ defm : subvector_zero_lowering<"APSY", "UPSY", VR256, v16f32, v8f32, ++ v16i32, loadv8f32, sub_ymm>; ++ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v8i64, v4i64, ++ v16i32, loadv4i64, sub_ymm>; ++ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v16i32, v8i32, ++ v16i32, loadv4i64, sub_ymm>; ++ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v32i16, v16i16, ++ v16i32, loadv4i64, sub_ymm>; ++ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v64i8, v32i8, ++ v16i32, loadv4i64, sub_ymm>; + } + + // List of opcodes that guaranteed to zero the upper elements of vector regs. +diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll +index 6ecd8116443..0f2cf594b1c 100644 +--- a/test/CodeGen/X86/merge-consecutive-loads-256.ll ++++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll +@@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi + define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp { + ; AVX-LABEL: merge_4f64_2f64_2z: + ; AVX: # %bb.0: +-; AVX-NEXT: vmovaps 32(%rdi), %xmm0 ++; AVX-NEXT: vmovups 32(%rdi), %xmm0 + ; AVX-NEXT: retq + ; + ; X32-AVX-LABEL: merge_4f64_2f64_2z: + ; X32-AVX: # %bb.0: + ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0 ++; X32-AVX-NEXT: vmovups 32(%eax), %xmm0 + ; X32-AVX-NEXT: retl + %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2 + %val0 = load <2 x double>, <2 x double>* %ptr0 +@@ -109,13 +109,13 @@ define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline + define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp { + ; AVX-LABEL: merge_4f64_f64_45zz: + ; AVX: # %bb.0: +-; AVX-NEXT: vmovaps 32(%rdi), %xmm0 ++; AVX-NEXT: vmovups 32(%rdi), %xmm0 + ; AVX-NEXT: retq + ; + ; X32-AVX-LABEL: merge_4f64_f64_45zz: + ; X32-AVX: # %bb.0: + ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0 ++; X32-AVX-NEXT: vmovups 32(%eax), %xmm0 + ; X32-AVX-NEXT: retl + %ptr0 = getelementptr inbounds double, double* %ptr, i64 4 + %ptr1 = getelementptr inbounds double, double* %ptr, i64 5 +@@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline + define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp { + ; AVX-LABEL: merge_4i64_2i64_3z: + ; AVX: # %bb.0: +-; AVX-NEXT: vmovaps 48(%rdi), %xmm0 ++; AVX-NEXT: vmovups 48(%rdi), %xmm0 + ; AVX-NEXT: retq + ; + ; X32-AVX-LABEL: merge_4i64_2i64_3z: + ; X32-AVX: # %bb.0: + ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +-; X32-AVX-NEXT: vmovaps 48(%eax), %xmm0 ++; X32-AVX-NEXT: vmovups 48(%eax), %xmm0 + ; X32-AVX-NEXT: retl + %ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3 + %val0 = load <2 x i64>, <2 x i64>* %ptr0 +@@ -217,13 +217,13 @@ define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp { + define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp { + ; AVX-LABEL: merge_4i64_i64_23zz: + ; AVX: # %bb.0: +-; AVX-NEXT: vmovaps 16(%rdi), %xmm0 ++; AVX-NEXT: vmovups 16(%rdi), %xmm0 + ; AVX-NEXT: retq + ; + ; X32-AVX-LABEL: merge_4i64_i64_23zz: + ; X32-AVX: # %bb.0: + ; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +-; X32-AVX-NEXT: vmovaps 16(%eax), %xmm0 ++; X32-AVX-NEXT: vmovups 16(%eax), %xmm0 + ; X32-AVX-NEXT: retl + %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2 + %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3 +diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll +index 62102eb382c..3c6eaf65292 100644 +--- a/test/CodeGen/X86/merge-consecutive-loads-512.ll ++++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll +@@ -106,13 +106,13 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin + define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp { + ; ALL-LABEL: merge_8f64_f64_12zzuuzz: + ; ALL: # %bb.0: +-; ALL-NEXT: vmovaps 8(%rdi), %xmm0 ++; ALL-NEXT: vmovups 8(%rdi), %xmm0 + ; ALL-NEXT: retq + ; + ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz: + ; X32-AVX512F: # %bb.0: + ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax +-; X32-AVX512F-NEXT: vmovaps 8(%eax), %xmm0 ++; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0 + ; X32-AVX512F-NEXT: retl + %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 + %ptr1 = getelementptr inbounds double, double* %ptr, i64 2 +@@ -190,7 +190,7 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline + define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp { + ; ALL-LABEL: merge_8i64_i64_56zz9uzz: + ; ALL: # %bb.0: +-; ALL-NEXT: vmovaps 40(%rdi), %xmm0 ++; ALL-NEXT: vmovups 40(%rdi), %xmm0 + ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero + ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 + ; ALL-NEXT: retq +@@ -198,7 +198,7 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s + ; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz: + ; X32-AVX512F: # %bb.0: + ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax +-; X32-AVX512F-NEXT: vmovaps 40(%eax), %xmm0 ++; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0 + ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero + ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 + ; X32-AVX512F-NEXT: retl diff --git a/debian/patches/julia/llvm-rL327898.patch b/debian/patches/julia/llvm-rL327898.patch new file mode 100644 index 00000000..f4d9a430 --- /dev/null +++ b/debian/patches/julia/llvm-rL327898.patch @@ -0,0 +1,6131 @@ +commit 64c3384f94a1eb3e3510d6f66c3bccdfc9d9050b +Author: Nirav Dave +Date: Thu Feb 1 16:11:59 2018 +0000 + + r327898/dependencies roll up + + This is a squash of 13 commits required in the lead up to r327898, + which fixes https://github.com/JuliaLang/julia/issues/27603. The squashed + commits are: + + 332d15e981e86b9e058087174bb288ba18a15807 + b659d3fca5d24c25ee73f979edb382f7f24e05e2 + c01d1363ea080170fc5143d72f26eecd9270f03b + eab8a177a4caef9e42ef1d2aeb4ba15dc788d3f2 + bedb1391781b009ace95f5586e7fae5f03fe0689 + 11d041a905f82ac78e7ccf2394773e80b93d147c + e1ec36c55a0127988f42a3329ca835617b30de09 + b8d2903300c13d8fd151c8e5dc71017269617539 + 00884fea345f47ab05174a8f314ecd60d1676d02 + 28ab04cec0d9888af9d29946b3a048b8340abe0f + 3dd52e62ea3087efcca63c3772183d9471abc742 + bd3649ff6d6b4d18b3c6de253179d987a120518a + aea03035b9c633e6d745b6d3fc5b6378699f576c + + Their commit messages follow below: + + [SelectionDAG] Fix UpdateChains handling of TokenFactors + + Summary: + In Instruction Selection UpdateChains replaces all matched Nodes' + chain references including interior token factors and deletes them. + This may allow nodes which depend on these interior nodes but are not + part of the set of matched nodes to be left with a dangling dependence. + Avoid this by doing the replacement for matched non-TokenFactor nodes. + + Fixes PR36164. + + Reviewers: jonpa, RKSimon, bogner + + Subscribers: llvm-commits, hiraditya + + Differential Revision: https://reviews.llvm.org/D42754 + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323977 91177308-0d34-0410-b5e6-96231b3b80d8 + + Regenerate test result for vastart-defs-eflags.ll. NFC. + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323596 91177308-0d34-0410-b5e6-96231b3b80d8 + + Regenerate test result for testb-je-fusion.ll. NFC. + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323595 91177308-0d34-0410-b5e6-96231b3b80d8 + + [X86] Avoid using high register trick for test instruction + + Summary: + It seems it's main effect is to create addition copies when values are inr register that do not support this trick, which increase register pressure and makes the code bigger. + + Reviewers: craig.topper, niravd, spatel, hfinkel + + Subscribers: llvm-commits + + Differential Revision: https://reviews.llvm.org/D42646 + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323888 91177308-0d34-0410-b5e6-96231b3b80d8 + + Add a regression test for problems caused by D42646 . NFC + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323868 91177308-0d34-0410-b5e6-96231b3b80d8 + + Add test case for truncated and promotion to test. NFC + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323663 91177308-0d34-0410-b5e6-96231b3b80d8 + + [X86] Add test case to ensure testw is generated when optimizing for size. NFC + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323687 91177308-0d34-0410-b5e6-96231b3b80d8 + + [X86] Generate testl instruction through truncates. + + Summary: + This was introduced in D42646 but ended up being reverted because the original implementation was buggy. + + Depends on D42646 + + Reviewers: craig.topper, niravd, spatel, hfinkel + + Subscribers: llvm-commits + + Differential Revision: https://reviews.llvm.org/D42741 + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323899 91177308-0d34-0410-b5e6-96231b3b80d8 + + [X86] Don't look for TEST instruction shrinking opportunities when the root node is a X86ISD::SUB. + + I don't believe we ever create an X86ISD::SUB with a 0 constant which is what the TEST handling needs. The ternary operator at the end of this code shows up as only going one way in the llvm-cov report from the bots. + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@324865 91177308-0d34-0410-b5e6-96231b3b80d8 + + [X86] Teach LowerBUILD_VECTOR to recognize pair-wise splats of 32-bit elements and use a 64-bit broadcast + + If we are splatting pairs of 32-bit elements, we can use a 64-bit broadcast to get the job done. + + We could probably could probably do this with other sizes too, for example four 16-bit elements. Or we could broadcast pairs of 16-bit elements using a 32-bit element broadcast. But I've left that as a future improvement. + + I've also restricted this to AVX2 only because we can only broadcast loads under AVX. + + Differential Revision: https://reviews.llvm.org/D42086 + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@322730 91177308-0d34-0410-b5e6-96231b3b80d8 + + [DAG, X86] Revert r327197 "Revert r327170, r327171, r327172" + + Reland ISel cycle checking improvements after simplifying node id + invariant traversal and correcting typo. + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@327898 91177308-0d34-0410-b5e6-96231b3b80d8 + + [ Modified for cherry-pick: Dropped Hexagon and SystemZ changes" + + [DAG, X86] Fix ISel-time node insertion ids + + As in SystemZ backend, correctly propagate node ids when inserting new + unselected nodes into the DAG during instruction Seleciton for X86 + target. + + Fixes PR36865. + + Reviewers: jyknight, craig.topper + + Subscribers: hiraditya, llvm-commits + + Differential Revision: https://reviews.llvm.org/D44797 + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@328233 91177308-0d34-0410-b5e6-96231b3b80d8 + + [DAG] Fix node id invalidation in Instruction Selection. + + Invalidation should be bit negation. Add missing negation. + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@328287 91177308-0d34-0410-b5e6-96231b3b80d8 + + Remove failing tests + + This removes tests that are failing due to codegen differences, + after the latest set of backports. Fixing thse for the backport + branch does not seem worth it. + +diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h +index de6849a1eae..e56eafc437c 100644 +--- a/include/llvm/CodeGen/SelectionDAGISel.h ++++ b/include/llvm/CodeGen/SelectionDAGISel.h +@@ -110,6 +110,11 @@ public: + CodeGenOpt::Level OptLevel, + bool IgnoreChains = false); + ++ static void InvalidateNodeId(SDNode *N); ++ static int getUninvalidatedNodeId(SDNode *N); ++ ++ static void EnforceNodeIdInvariant(SDNode *N); ++ + // Opcodes used by the DAG state machine: + enum BuiltinOpcodes { + OPC_Scope, +@@ -199,23 +204,28 @@ protected: + /// of the new node T. + void ReplaceUses(SDValue F, SDValue T) { + CurDAG->ReplaceAllUsesOfValueWith(F, T); ++ EnforceNodeIdInvariant(T.getNode()); + } + + /// ReplaceUses - replace all uses of the old nodes F with the use + /// of the new nodes T. + void ReplaceUses(const SDValue *F, const SDValue *T, unsigned Num) { + CurDAG->ReplaceAllUsesOfValuesWith(F, T, Num); ++ for (unsigned i = 0; i < Num; ++i) ++ EnforceNodeIdInvariant(T[i].getNode()); + } + + /// ReplaceUses - replace all uses of the old node F with the use + /// of the new node T. + void ReplaceUses(SDNode *F, SDNode *T) { + CurDAG->ReplaceAllUsesWith(F, T); ++ EnforceNodeIdInvariant(T); + } + + /// Replace all uses of \c F with \c T, then remove \c F from the DAG. + void ReplaceNode(SDNode *F, SDNode *T) { + CurDAG->ReplaceAllUsesWith(F, T); ++ EnforceNodeIdInvariant(T); + CurDAG->RemoveDeadNode(F); + } + +diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h +index 522c2f1b2cb..2d974234abf 100644 +--- a/include/llvm/CodeGen/SelectionDAGNodes.h ++++ b/include/llvm/CodeGen/SelectionDAGNodes.h +@@ -796,16 +796,44 @@ public: + /// searches to be performed in parallel, caching of results across + /// queries and incremental addition to Worklist. Stops early if N is + /// found but will resume. Remember to clear Visited and Worklists +- /// if DAG changes. ++ /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before ++ /// giving up. The TopologicalPrune flag signals that positive NodeIds are ++ /// topologically ordered (Operands have strictly smaller node id) and search ++ /// can be pruned leveraging this. + static bool hasPredecessorHelper(const SDNode *N, + SmallPtrSetImpl &Visited, + SmallVectorImpl &Worklist, +- unsigned int MaxSteps = 0) { ++ unsigned int MaxSteps = 0, ++ bool TopologicalPrune = false) { ++ SmallVector DeferredNodes; + if (Visited.count(N)) + return true; ++ ++ // Node Id's are assigned in three places: As a topological ++ // ordering (> 0), during legalization (results in values set to ++ // 0), new nodes (set to -1). If N has a topolgical id then we ++ // know that all nodes with ids smaller than it cannot be ++ // successors and we need not check them. Filter out all node ++ // that can't be matches. We add them to the worklist before exit ++ // in case of multiple calls. Note that during selection the topological id ++ // may be violated if a node's predecessor is selected before it. We mark ++ // this at selection negating the id of unselected successors and ++ // restricting topological pruning to positive ids. ++ ++ int NId = N->getNodeId(); ++ // If we Invalidated the Id, reconstruct original NId. ++ if (NId < -1) ++ NId = -(NId + 1); ++ ++ bool Found = false; + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); +- bool Found = false; ++ int MId = M->getNodeId(); ++ if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) && ++ (MId > 0) && (MId < NId)) { ++ DeferredNodes.push_back(M); ++ continue; ++ } + for (const SDValue &OpV : M->op_values()) { + SDNode *Op = OpV.getNode(); + if (Visited.insert(Op).second) +@@ -814,11 +842,16 @@ public: + Found = true; + } + if (Found) +- return true; ++ break; + if (MaxSteps != 0 && Visited.size() >= MaxSteps) +- return false; ++ break; + } +- return false; ++ // Push deferred nodes back on worklist. ++ Worklist.append(DeferredNodes.begin(), DeferredNodes.end()); ++ // If we bailed early, conservatively return found. ++ if (MaxSteps != 0 && Visited.size() >= MaxSteps) ++ return true; ++ return Found; + } + + /// Return true if all the users of N are contained in Nodes. +diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +index bd9fcfb5c1e..17e42240133 100644 +--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp ++++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +@@ -937,6 +937,58 @@ public: + + } // end anonymous namespace + ++// This function is used to enforce the topological node id property ++// property leveraged during Instruction selection. Before selection all ++// nodes are given a non-negative id such that all nodes have a larger id than ++// their operands. As this holds transitively we can prune checks that a node N ++// is a predecessor of M another by not recursively checking through M's ++// operands if N's ID is larger than M's ID. This is significantly improves ++// performance of for various legality checks (e.g. IsLegalToFold / ++// UpdateChains). ++ ++// However, when we fuse multiple nodes into a single node ++// during selection we may induce a predecessor relationship between inputs and ++// outputs of distinct nodes being merged violating the topological property. ++// Should a fused node have a successor which has yet to be selected, our ++// legality checks would be incorrect. To avoid this we mark all unselected ++// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x => ++// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M. ++// We use bit-negation to more clearly enforce that node id -1 can only be ++// achieved by selected nodes). As the conversion is reversable the original Id, ++// topological pruning can still be leveraged when looking for unselected nodes. ++// This method is call internally in all ISel replacement calls. ++void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { ++ SmallVector Nodes; ++ Nodes.push_back(Node); ++ ++ while (!Nodes.empty()) { ++ SDNode *N = Nodes.pop_back_val(); ++ for (auto *U : N->uses()) { ++ auto UId = U->getNodeId(); ++ if (UId > 0) { ++ InvalidateNodeId(U); ++ Nodes.push_back(U); ++ } ++ } ++ } ++} ++ ++// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a ++// NodeId with the equivalent node id which is invalid for topological ++// pruning. ++void SelectionDAGISel::InvalidateNodeId(SDNode *N) { ++ int InvalidId = -(N->getNodeId() + 1); ++ N->setNodeId(InvalidId); ++} ++ ++// getUninvalidatedNodeId - get original uninvalidated node id. ++int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) { ++ int Id = N->getNodeId(); ++ if (Id < -1) ++ return -(Id + 1); ++ return Id; ++} ++ + void SelectionDAGISel::DoInstructionSelection() { + DEBUG(dbgs() << "===== Instruction selection begins: " + << printMBBReference(*FuncInfo->MBB) << " '" +@@ -972,6 +1024,33 @@ void SelectionDAGISel::DoInstructionSelection() { + if (Node->use_empty()) + continue; + ++#ifndef NDEBUG ++ SmallVector Nodes; ++ Nodes.push_back(Node); ++ ++ while (!Nodes.empty()) { ++ auto N = Nodes.pop_back_val(); ++ if (N->getOpcode() == ISD::TokenFactor || N->getNodeId() < 0) ++ continue; ++ for (const SDValue &Op : N->op_values()) { ++ if (Op->getOpcode() == ISD::TokenFactor) ++ Nodes.push_back(Op.getNode()); ++ else { ++ // We rely on topological ordering of node ids for checking for ++ // cycles when fusing nodes during selection. All unselected nodes ++ // successors of an already selected node should have a negative id. ++ // This assertion will catch such cases. If this assertion triggers ++ // it is likely you using DAG-level Value/Node replacement functions ++ // (versus equivalent ISEL replacement) in backend-specific ++ // selections. See comment in EnforceNodeIdInvariant for more ++ // details. ++ assert(Op->getNodeId() != -1 && ++ "Node has already selected predecessor node"); ++ } ++ } ++ } ++#endif ++ + // When we are using non-default rounding modes or FP exception behavior + // FP operations are represented by StrictFP pseudo-operations. They + // need to be simplified here so that the target-specific instruction +@@ -2134,52 +2213,44 @@ static SDNode *findGlueUse(SDNode *N) { + return nullptr; + } + +-/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". +-/// This function iteratively traverses up the operand chain, ignoring +-/// certain nodes. +-static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, +- SDNode *Root, SmallPtrSetImpl &Visited, ++/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path ++/// beyond "ImmedUse". We may ignore chains as they are checked separately. ++static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, + bool IgnoreChains) { +- // The NodeID's are given uniques ID's where a node ID is guaranteed to be +- // greater than all of its (recursive) operands. If we scan to a point where +- // 'use' is smaller than the node we're scanning for, then we know we will +- // never find it. +- // +- // The Use may be -1 (unassigned) if it is a newly allocated node. This can +- // happen because we scan down to newly selected nodes in the case of glue +- // uses. +- std::vector WorkList; +- WorkList.push_back(Use); +- +- while (!WorkList.empty()) { +- Use = WorkList.back(); +- WorkList.pop_back(); +- if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1) +- continue; ++ SmallPtrSet Visited; ++ SmallVector WorkList; ++ // Only check if we have non-immediate uses of Def. ++ if (ImmedUse->isOnlyUserOf(Def)) ++ return false; + +- // Don't revisit nodes if we already scanned it and didn't fail, we know we +- // won't fail if we scan it again. +- if (!Visited.insert(Use).second) ++ // We don't care about paths to Def that go through ImmedUse so mark it ++ // visited and mark non-def operands as used. ++ Visited.insert(ImmedUse); ++ for (const SDValue &Op : ImmedUse->op_values()) { ++ SDNode *N = Op.getNode(); ++ // Ignore chain deps (they are validated by ++ // HandleMergeInputChains) and immediate uses ++ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) + continue; ++ if (!Visited.insert(N).second) ++ continue; ++ WorkList.push_back(N); ++ } + +- for (const SDValue &Op : Use->op_values()) { +- // Ignore chain uses, they are validated by HandleMergeInputChains. +- if (Op.getValueType() == MVT::Other && IgnoreChains) +- continue; +- ++ // Initialize worklist to operands of Root. ++ if (Root != ImmedUse) { ++ for (const SDValue &Op : Root->op_values()) { + SDNode *N = Op.getNode(); +- if (N == Def) { +- if (Use == ImmedUse || Use == Root) +- continue; // We are not looking for immediate use. +- assert(N != Root); +- return true; +- } +- +- // Traverse up the operand chain. ++ // Ignore chains (they are validated by HandleMergeInputChains) ++ if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) ++ continue; ++ if (!Visited.insert(N).second) ++ continue; + WorkList.push_back(N); + } + } +- return false; ++ ++ return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true); + } + + /// IsProfitableToFold - Returns true if it's profitable to fold the specific +@@ -2251,13 +2322,12 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, + + // If our query node has a glue result with a use, we've walked up it. If + // the user (which has already been selected) has a chain or indirectly uses +- // the chain, our WalkChainUsers predicate will not consider it. Because of ++ // the chain, HandleMergeInputChains will not consider it. Because of + // this, we cannot ignore chains in this predicate. + IgnoreChains = false; + } + +- SmallPtrSet Visited; +- return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); ++ return !findNonImmUse(Root, N.getNode(), U, IgnoreChains); + } + + void SelectionDAGISel::Select_INLINEASM(SDNode *N) { +@@ -2360,7 +2430,8 @@ void SelectionDAGISel::UpdateChains( + std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N, + static_cast(nullptr)); + }); +- CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); ++ if (ChainNode->getOpcode() != ISD::TokenFactor) ++ ReplaceUses(ChainVal, InputChain); + + // If the node became dead and we haven't already seen it, delete it. + if (ChainNode != NodeToMatch && ChainNode->use_empty() && +@@ -2375,143 +2446,6 @@ void SelectionDAGISel::UpdateChains( + DEBUG(dbgs() << "ISEL: Match complete!\n"); + } + +-enum ChainResult { +- CR_Simple, +- CR_InducesCycle, +- CR_LeadsToInteriorNode +-}; +- +-/// WalkChainUsers - Walk down the users of the specified chained node that is +-/// part of the pattern we're matching, looking at all of the users we find. +-/// This determines whether something is an interior node, whether we have a +-/// non-pattern node in between two pattern nodes (which prevent folding because +-/// it would induce a cycle) and whether we have a TokenFactor node sandwiched +-/// between pattern nodes (in which case the TF becomes part of the pattern). +-/// +-/// The walk we do here is guaranteed to be small because we quickly get down to +-/// already selected nodes "below" us. +-static ChainResult +-WalkChainUsers(const SDNode *ChainedNode, +- SmallVectorImpl &ChainedNodesInPattern, +- DenseMap &TokenFactorResult, +- SmallVectorImpl &InteriorChainedNodes) { +- ChainResult Result = CR_Simple; +- +- for (SDNode::use_iterator UI = ChainedNode->use_begin(), +- E = ChainedNode->use_end(); UI != E; ++UI) { +- // Make sure the use is of the chain, not some other value we produce. +- if (UI.getUse().getValueType() != MVT::Other) continue; +- +- SDNode *User = *UI; +- +- if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph. +- continue; +- +- // If we see an already-selected machine node, then we've gone beyond the +- // pattern that we're selecting down into the already selected chunk of the +- // DAG. +- unsigned UserOpcode = User->getOpcode(); +- if (User->isMachineOpcode() || +- UserOpcode == ISD::CopyToReg || +- UserOpcode == ISD::CopyFromReg || +- UserOpcode == ISD::INLINEASM || +- UserOpcode == ISD::EH_LABEL || +- UserOpcode == ISD::LIFETIME_START || +- UserOpcode == ISD::LIFETIME_END) { +- // If their node ID got reset to -1 then they've already been selected. +- // Treat them like a MachineOpcode. +- if (User->getNodeId() == -1) +- continue; +- } +- +- // If we have a TokenFactor, we handle it specially. +- if (User->getOpcode() != ISD::TokenFactor) { +- // If the node isn't a token factor and isn't part of our pattern, then it +- // must be a random chained node in between two nodes we're selecting. +- // This happens when we have something like: +- // x = load ptr +- // call +- // y = x+4 +- // store y -> ptr +- // Because we structurally match the load/store as a read/modify/write, +- // but the call is chained between them. We cannot fold in this case +- // because it would induce a cycle in the graph. +- if (!std::count(ChainedNodesInPattern.begin(), +- ChainedNodesInPattern.end(), User)) +- return CR_InducesCycle; +- +- // Otherwise we found a node that is part of our pattern. For example in: +- // x = load ptr +- // y = x+4 +- // store y -> ptr +- // This would happen when we're scanning down from the load and see the +- // store as a user. Record that there is a use of ChainedNode that is +- // part of the pattern and keep scanning uses. +- Result = CR_LeadsToInteriorNode; +- InteriorChainedNodes.push_back(User); +- continue; +- } +- +- // If we found a TokenFactor, there are two cases to consider: first if the +- // TokenFactor is just hanging "below" the pattern we're matching (i.e. no +- // uses of the TF are in our pattern) we just want to ignore it. Second, +- // the TokenFactor can be sandwiched in between two chained nodes, like so: +- // [Load chain] +- // ^ +- // | +- // [Load] +- // ^ ^ +- // | \ DAG's like cheese +- // / \ do you? +- // / | +- // [TokenFactor] [Op] +- // ^ ^ +- // | | +- // \ / +- // \ / +- // [Store] +- // +- // In this case, the TokenFactor becomes part of our match and we rewrite it +- // as a new TokenFactor. +- // +- // To distinguish these two cases, do a recursive walk down the uses. +- auto MemoizeResult = TokenFactorResult.find(User); +- bool Visited = MemoizeResult != TokenFactorResult.end(); +- // Recursively walk chain users only if the result is not memoized. +- if (!Visited) { +- auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult, +- InteriorChainedNodes); +- MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first; +- } +- switch (MemoizeResult->second) { +- case CR_Simple: +- // If the uses of the TokenFactor are just already-selected nodes, ignore +- // it, it is "below" our pattern. +- continue; +- case CR_InducesCycle: +- // If the uses of the TokenFactor lead to nodes that are not part of our +- // pattern that are not selected, folding would turn this into a cycle, +- // bail out now. +- return CR_InducesCycle; +- case CR_LeadsToInteriorNode: +- break; // Otherwise, keep processing. +- } +- +- // Okay, we know we're in the interesting interior case. The TokenFactor +- // is now going to be considered part of the pattern so that we rewrite its +- // uses (it may have uses that are not part of the pattern) with the +- // ultimate chain result of the generated code. We will also add its chain +- // inputs as inputs to the ultimate TokenFactor we create. +- Result = CR_LeadsToInteriorNode; +- if (!Visited) { +- ChainedNodesInPattern.push_back(User); +- InteriorChainedNodes.push_back(User); +- } +- } +- +- return Result; +-} +- + /// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains + /// operation for when the pattern matched at least one node with a chains. The + /// input vector contains a list of all of the chained nodes that we match. We +@@ -2521,47 +2455,56 @@ WalkChainUsers(const SDNode *ChainedNode, + static SDValue + HandleMergeInputChains(SmallVectorImpl &ChainNodesMatched, + SelectionDAG *CurDAG) { +- // Used for memoization. Without it WalkChainUsers could take exponential +- // time to run. +- DenseMap TokenFactorResult; +- // Walk all of the chained nodes we've matched, recursively scanning down the +- // users of the chain result. This adds any TokenFactor nodes that are caught +- // in between chained nodes to the chained and interior nodes list. +- SmallVector InteriorChainedNodes; +- for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { +- if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, +- TokenFactorResult, +- InteriorChainedNodes) == CR_InducesCycle) +- return SDValue(); // Would induce a cycle. +- } + +- // Okay, we have walked all the matched nodes and collected TokenFactor nodes +- // that we are interested in. Form our input TokenFactor node. ++ SmallPtrSet Visited; ++ SmallVector Worklist; + SmallVector InputChains; +- for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { +- // Add the input chain of this node to the InputChains list (which will be +- // the operands of the generated TokenFactor) if it's not an interior node. +- SDNode *N = ChainNodesMatched[i]; +- if (N->getOpcode() != ISD::TokenFactor) { +- if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) +- continue; ++ unsigned int Max = 8192; + +- // Otherwise, add the input chain. +- SDValue InChain = ChainNodesMatched[i]->getOperand(0); +- assert(InChain.getValueType() == MVT::Other && "Not a chain"); +- InputChains.push_back(InChain); +- continue; +- } ++ // Quick exit on trivial merge. ++ if (ChainNodesMatched.size() == 1) ++ return ChainNodesMatched[0]->getOperand(0); + +- // If we have a token factor, we want to add all inputs of the token factor +- // that are not part of the pattern we're matching. +- for (const SDValue &Op : N->op_values()) { +- if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), +- Op.getNode())) +- InputChains.push_back(Op); +- } ++ // Add chains that aren't already added (internal). Peek through ++ // token factors. ++ std::function AddChains = [&](const SDValue V) { ++ if (V.getValueType() != MVT::Other) ++ return; ++ if (V->getOpcode() == ISD::EntryToken) ++ return; ++ if (!Visited.insert(V.getNode()).second) ++ return; ++ if (V->getOpcode() == ISD::TokenFactor) { ++ for (const SDValue &Op : V->op_values()) ++ AddChains(Op); ++ } else ++ InputChains.push_back(V); ++ }; ++ ++ for (auto *N : ChainNodesMatched) { ++ Worklist.push_back(N); ++ Visited.insert(N); + } + ++ while (!Worklist.empty()) ++ AddChains(Worklist.pop_back_val()->getOperand(0)); ++ ++ // Skip the search if there are no chain dependencies. ++ if (InputChains.size() == 0) ++ return CurDAG->getEntryNode(); ++ ++ // If one of these chains is a successor of input, we must have a ++ // node that is both the predecessor and successor of the ++ // to-be-merged nodes. Fail. ++ Visited.clear(); ++ for (SDValue V : InputChains) ++ Worklist.push_back(V.getNode()); ++ ++ for (auto *N : ChainNodesMatched) ++ if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true)) ++ return SDValue(); ++ ++ // Return merged chain. + if (InputChains.size() == 1) + return InputChains[0]; + return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), +@@ -2606,8 +2549,8 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, + // Move the glue if needed. + if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && + (unsigned)OldGlueResultNo != ResNumResults-1) +- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), +- SDValue(Res, ResNumResults-1)); ++ ReplaceUses(SDValue(Node, OldGlueResultNo), ++ SDValue(Res, ResNumResults - 1)); + + if ((EmitNodeInfo & OPFL_GlueOutput) != 0) + --ResNumResults; +@@ -2615,14 +2558,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, + // Move the chain reference if needed. + if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && + (unsigned)OldChainResultNo != ResNumResults-1) +- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), +- SDValue(Res, ResNumResults-1)); ++ ReplaceUses(SDValue(Node, OldChainResultNo), ++ SDValue(Res, ResNumResults - 1)); + + // Otherwise, no replacement happened because the node already exists. Replace + // Uses of the old node with the new one. + if (Res != Node) { +- CurDAG->ReplaceAllUsesWith(Node, Res); +- CurDAG->RemoveDeadNode(Node); ++ ReplaceNode(Node, Res); ++ } else { ++ EnforceNodeIdInvariant(Res); + } + + return Res; +@@ -2939,8 +2883,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, + return; + case ISD::AssertSext: + case ISD::AssertZext: +- CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), +- NodeToMatch->getOperand(0)); ++ ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); + CurDAG->RemoveDeadNode(NodeToMatch); + return; + case ISD::INLINEASM: +@@ -3702,7 +3645,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, + NodeToMatch->getValueType(i).getSizeInBits() == + Res.getValueSizeInBits()) && + "invalid replacement"); +- CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); ++ ReplaceUses(SDValue(NodeToMatch, i), Res); + } + + // Update chain uses. +@@ -3715,8 +3658,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, + if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) == + MVT::Glue && + InputGlue.getNode()) +- CurDAG->ReplaceAllUsesOfValueWith( +- SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue); ++ ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), ++ InputGlue); + + assert(NodeToMatch->use_empty() && + "Didn't replace all uses of the node?"); +diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +index f4776adb069..be5345e422d 100644 +--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp ++++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +@@ -759,12 +759,11 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { + + if (ProduceCarry) { + // Replace the carry-use +- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1)); ++ ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); + } + + // Replace the remaining uses. +- CurDAG->ReplaceAllUsesWith(N, RegSequence); +- CurDAG->RemoveDeadNode(N); ++ ReplaceNode(N, RegSequence); + } + + void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { +diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp +index 8d32510e200..0f504718f28 100644 +--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp ++++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp +@@ -498,7 +498,7 @@ bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, + + void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { + CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); +- CurDAG->ReplaceAllUsesWith(N, M); ++ ReplaceUses(N, M); + } + + bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, +diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +index a6ac4e3df74..3721856ff45 100644 +--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp ++++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +@@ -777,7 +777,7 @@ void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) { + return; + } + +- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N,0), N->getOperand(0)); ++ ReplaceUses(SDValue(N, 0), N->getOperand(0)); + CurDAG->RemoveDeadNode(N); + } + +@@ -2182,4 +2182,3 @@ void HexagonDAGToDAGISel::rebalanceAddressTrees() { + RootHeights.clear(); + RootWeights.clear(); + } +- +diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +index f08c5054065..0608f06ef7e 100644 +--- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp ++++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +@@ -1914,7 +1914,6 @@ void HvxSelector::selectShuffle(SDNode *N) { + // If the mask is all -1's, generate "undef". + if (!UseLeft && !UseRight) { + ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode()); +- DAG.RemoveDeadNode(N); + return; + } + +@@ -1970,7 +1969,6 @@ void HvxSelector::selectRor(SDNode *N) { + NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV}); + + ISel.ReplaceNode(N, NewN); +- DAG.RemoveDeadNode(N); + } + + void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) { +@@ -2017,8 +2015,7 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { + MemOp[0] = cast(N)->getMemOperand(); + cast(Result)->setMemRefs(MemOp, MemOp + 1); + +- ReplaceUses(N, Result); +- CurDAG->RemoveDeadNode(N); ++ ReplaceNode(N, Result); + } + + void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { +@@ -2056,8 +2053,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { + MemOp[0] = cast(N)->getMemOperand(); + cast(Result)->setMemRefs(MemOp, MemOp + 1); + +- ReplaceUses(N, Result); +- CurDAG->RemoveDeadNode(N); ++ ReplaceNode(N, Result); + } + + void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { +@@ -2100,5 +2096,3 @@ void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { + ReplaceUses(SDValue(N, 1), SDValue(Result, 1)); + CurDAG->RemoveDeadNode(N); + } +- +- +diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +index ce6f3d37f5c..fe59d820c88 100644 +--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp ++++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +@@ -589,10 +589,16 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr, + // The selection DAG must no longer depend on their uniqueness when this + // function is used. + static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) { +- if (N.getNode()->getNodeId() == -1 || +- N.getNode()->getNodeId() > Pos->getNodeId()) { ++ if (N->getNodeId() == -1 || ++ (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > ++ SelectionDAGISel::getUninvalidatedNodeId(Pos))) { + DAG->RepositionNode(Pos->getIterator(), N.getNode()); +- N.getNode()->setNodeId(Pos->getNodeId()); ++ // Mark Node as invalid for pruning as after this it may be a successor to a ++ // selected node but otherwise be in the same position of Pos. ++ // Conservatively mark it with the same -abs(Id) to assure node id ++ // invariant is preserved. ++ N->setNodeId(Pos->getNodeId()); ++ SelectionDAGISel::InvalidateNodeId(N.getNode()); + } + } + +@@ -1022,8 +1028,7 @@ bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { + }; + SDValue New = convertTo( + DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, OpcodeVT, Ops), 0)); +- ReplaceUses(N, New.getNode()); +- CurDAG->RemoveDeadNode(N); ++ ReplaceNode(N, New.getNode()); + return true; + } + +@@ -1114,8 +1119,7 @@ void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, + SDValue Lower = CurDAG->getConstant(LowerVal, DL, VT); + SDValue Or = CurDAG->getNode(Opcode, DL, VT, Upper, Lower); + +- ReplaceUses(Node, Or.getNode()); +- CurDAG->RemoveDeadNode(Node); ++ ReplaceNode(Node, Or.getNode()); + + SelectCode(Or.getNode()); + } +diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp +index d79fd0ca4da..ee2d221e31c 100644 +--- a/lib/Target/X86/X86ISelDAGToDAG.cpp ++++ b/lib/Target/X86/X86ISelDAGToDAG.cpp +@@ -988,10 +988,16 @@ bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM, + // IDs! The selection DAG must no longer depend on their uniqueness when this + // is used. + static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { +- if (N.getNode()->getNodeId() == -1 || +- N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) { +- DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode()); +- N.getNode()->setNodeId(Pos.getNode()->getNodeId()); ++ if (N->getNodeId() == -1 || ++ (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > ++ SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) { ++ DAG.RepositionNode(Pos->getIterator(), N.getNode()); ++ // Mark Node as invalid for pruning as after this it may be a successor to a ++ // selected node but otherwise be in the same position of Pos. ++ // Conservatively mark it with the same -abs(Id) to assure node id ++ // invariant is preserved. ++ N->setNodeId(Pos->getNodeId()); ++ SelectionDAGISel::InvalidateNodeId(N.getNode()); + } + } + +@@ -2092,50 +2098,84 @@ static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, + LoadNode->getOffset() != StoreNode->getOffset()) + return false; + +- // Check if the chain is produced by the load or is a TokenFactor with +- // the load output chain as an operand. Return InputChain by reference. ++ bool FoundLoad = false; ++ SmallVector ChainOps; ++ SmallVector LoopWorklist; ++ SmallPtrSet Visited; ++ const unsigned int Max = 1024; ++ ++ // Visualization of Load-Op-Store fusion: ++ // ------------------------- ++ // Legend: ++ // *-lines = Chain operand dependencies. ++ // |-lines = Normal operand dependencies. ++ // Dependencies flow down and right. n-suffix references multiple nodes. ++ // ++ // C Xn C ++ // * * * ++ // * * * ++ // Xn A-LD Yn TF Yn ++ // * * \ | * | ++ // * * \ | * | ++ // * * \ | => A--LD_OP_ST ++ // * * \| \ ++ // TF OP \ ++ // * | \ Zn ++ // * | \ ++ // A-ST Zn ++ // ++ ++ // This merge induced dependences from: #1: Xn -> LD, OP, Zn ++ // #2: Yn -> LD ++ // #3: ST -> Zn ++ ++ // Ensure the transform is safe by checking for the dual ++ // dependencies to make sure we do not induce a loop. ++ ++ // As LD is a predecessor to both OP and ST we can do this by checking: ++ // a). if LD is a predecessor to a member of Xn or Yn. ++ // b). if a Zn is a predecessor to ST. ++ ++ // However, (b) can only occur through being a chain predecessor to ++ // ST, which is the same as Zn being a member or predecessor of Xn, ++ // which is a subset of LD being a predecessor of Xn. So it's ++ // subsumed by check (a). ++ + SDValue Chain = StoreNode->getChain(); + +- bool ChainCheck = false; ++ // Gather X elements in ChainOps. + if (Chain == Load.getValue(1)) { +- ChainCheck = true; +- InputChain = LoadNode->getChain(); ++ FoundLoad = true; ++ ChainOps.push_back(Load.getOperand(0)); + } else if (Chain.getOpcode() == ISD::TokenFactor) { +- SmallVector ChainOps; + for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { + SDValue Op = Chain.getOperand(i); + if (Op == Load.getValue(1)) { +- ChainCheck = true; ++ FoundLoad = true; + // Drop Load, but keep its chain. No cycle check necessary. + ChainOps.push_back(Load.getOperand(0)); + continue; + } +- +- // Make sure using Op as part of the chain would not cause a cycle here. +- // In theory, we could check whether the chain node is a predecessor of +- // the load. But that can be very expensive. Instead visit the uses and +- // make sure they all have smaller node id than the load. +- int LoadId = LoadNode->getNodeId(); +- for (SDNode::use_iterator UI = Op.getNode()->use_begin(), +- UE = UI->use_end(); UI != UE; ++UI) { +- if (UI.getUse().getResNo() != 0) +- continue; +- if (UI->getNodeId() > LoadId) +- return false; +- } +- ++ LoopWorklist.push_back(Op.getNode()); + ChainOps.push_back(Op); + } +- +- if (ChainCheck) +- // Make a new TokenFactor with all the other input chains except +- // for the load. +- InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), +- MVT::Other, ChainOps); + } +- if (!ChainCheck) ++ ++ if (!FoundLoad) ++ return false; ++ ++ // Worklist is currently Xn. Add Yn to worklist. ++ for (SDValue Op : StoredVal->ops()) ++ if (Op.getNode() != LoadNode) ++ LoopWorklist.push_back(Op.getNode()); ++ ++ // Check (a) if Load is a predecessor to Xn + Yn ++ if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max, ++ true)) + return false; + ++ InputChain = ++ CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps); + return true; + } + +@@ -2335,6 +2375,8 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { + MemOp[1] = LoadNode->getMemOperand(); + Result->setMemRefs(MemOp, MemOp + 2); + ++ // Update Load Chain uses as well. ++ ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1)); + ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); + ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); + CurDAG->RemoveDeadNode(Node); +@@ -2946,12 +2988,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { + return; + } + +- case X86ISD::CMP: +- case X86ISD::SUB: { +- // Sometimes a SUB is used to perform comparison. +- if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0)) +- // This node is not a CMP. +- break; ++ case X86ISD::CMP: { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + +@@ -2971,95 +3008,52 @@ void X86DAGToDAGISel::Select(SDNode *Node) { + if (!C) break; + uint64_t Mask = C->getZExtValue(); + +- // For example, convert "testl %eax, $8" to "testb %al, $8" ++ MVT VT; ++ int SubRegOp; ++ unsigned Op; ++ + if (isUInt<8>(Mask) && + (!(Mask & 0x80) || hasNoSignedComparisonUses(Node))) { +- SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i8); +- SDValue Reg = N0.getOperand(0); +- +- // Extract the l-register. +- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, +- MVT::i8, Reg); +- +- // Emit a testb. +- SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, +- Subreg, Imm); +- // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has +- // one, do not call ReplaceAllUsesWith. +- ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), +- SDValue(NewNode, 0)); +- CurDAG->RemoveDeadNode(Node); +- return; ++ // For example, convert "testl %eax, $8" to "testb %al, $8" ++ VT = MVT::i8; ++ SubRegOp = X86::sub_8bit; ++ Op = X86::TEST8ri; ++ } else if (OptForMinSize && isUInt<16>(Mask) && ++ (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { ++ // For example, "testl %eax, $32776" to "testw %ax, $32776". ++ // NOTE: We only want to form TESTW instructions if optimizing for ++ // min size. Otherwise we only save one byte and possibly get a length ++ // changing prefix penalty in the decoders. ++ VT = MVT::i16; ++ SubRegOp = X86::sub_16bit; ++ Op = X86::TEST16ri; ++ } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 && ++ (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) { ++ // For example, "testq %rax, $268468232" to "testl %eax, $268468232". ++ // NOTE: We only want to run that transform if N0 is 32 or 64 bits. ++ // Otherwize, we find ourselves in a position where we have to do ++ // promotion. If previous passes did not promote the and, we assume ++ // they had a good reason not to and do not promote here. ++ VT = MVT::i32; ++ SubRegOp = X86::sub_32bit; ++ Op = X86::TEST32ri; ++ } else { ++ // No eligible transformation was found. ++ break; + } + +- // For example, "testl %eax, $2048" to "testb %ah, $8". +- if (isShiftedUInt<8, 8>(Mask) && +- (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { +- // Shift the immediate right by 8 bits. +- SDValue ShiftedImm = CurDAG->getTargetConstant(Mask >> 8, dl, MVT::i8); +- SDValue Reg = N0.getOperand(0); +- +- // Extract the h-register. +- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, +- MVT::i8, Reg); +- +- // Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only +- // target GR8_NOREX registers, so make sure the register class is +- // forced. +- SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl, +- MVT::i32, Subreg, ShiftedImm); +- // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has +- // one, do not call ReplaceAllUsesWith. +- ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), +- SDValue(NewNode, 0)); +- CurDAG->RemoveDeadNode(Node); +- return; +- } ++ SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT); ++ SDValue Reg = N0.getOperand(0); + +- // For example, "testl %eax, $32776" to "testw %ax, $32776". +- // NOTE: We only want to form TESTW instructions if optimizing for +- // min size. Otherwise we only save one byte and possibly get a length +- // changing prefix penalty in the decoders. +- if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 && +- (!(Mask & 0x8000) || hasNoSignedComparisonUses(Node))) { +- SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16); +- SDValue Reg = N0.getOperand(0); +- +- // Extract the 16-bit subregister. +- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, +- MVT::i16, Reg); +- +- // Emit a testw. +- SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, +- Subreg, Imm); +- // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has +- // one, do not call ReplaceAllUsesWith. +- ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), +- SDValue(NewNode, 0)); +- CurDAG->RemoveDeadNode(Node); +- return; +- } ++ // Extract the subregister if necessary. ++ if (N0.getValueType() != VT) ++ Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg); + +- // For example, "testq %rax, $268468232" to "testl %eax, $268468232". +- if (isUInt<32>(Mask) && N0.getValueType() == MVT::i64 && +- (!(Mask & 0x80000000) || hasNoSignedComparisonUses(Node))) { +- SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i32); +- SDValue Reg = N0.getOperand(0); +- +- // Extract the 32-bit subregister. +- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl, +- MVT::i32, Reg); +- +- // Emit a testl. +- SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, +- Subreg, Imm); +- // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has +- // one, do not call ReplaceAllUsesWith. +- ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), +- SDValue(NewNode, 0)); +- CurDAG->RemoveDeadNode(Node); +- return; +- } ++ // Emit a testl or testw. ++ SDNode *NewNode = CurDAG->getMachineNode(Op, dl, MVT::i32, Reg, Imm); ++ // Replace CMP with TEST. ++ ReplaceNode(Node, NewNode); ++ return; + } + break; + } +diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp +index c1ddb771e2f..86e71cba87b 100644 +--- a/lib/Target/X86/X86ISelLowering.cpp ++++ b/lib/Target/X86/X86ISelLowering.cpp +@@ -8131,6 +8131,32 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { + return LD; + } + ++ // If this is a splat of pairs of 32-bit elements, we can use a narrower ++ // build_vector and broadcast it. ++ // TODO: We could probably generalize this more. ++ if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) { ++ SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), ++ DAG.getUNDEF(ExtVT), DAG.getUNDEF(ExtVT) }; ++ auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef Ops) { ++ // Make sure all the even/odd operands match. ++ for (unsigned i = 2; i != NumElems; ++i) ++ if (Ops[i % 2] != Op.getOperand(i)) ++ return false; ++ return true; ++ }; ++ if (CanSplat(Op, NumElems, Ops)) { ++ MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64; ++ MVT NarrowVT = MVT::getVectorVT(ExtVT, 4); ++ // Create a new build vector and cast to v2i64/v2f64. ++ SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2), ++ DAG.getBuildVector(NarrowVT, dl, Ops)); ++ // Broadcast from v2i64/v2f64 and cast to final VT. ++ MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems/2); ++ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, ++ NewBV)); ++ } ++ } ++ + // For AVX-length vectors, build the individual 128-bit pieces and use + // shuffles to put them in place. + if (VT.is256BitVector() || VT.is512BitVector()) { +diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td +index 98cc8fb7439..3d5de637da2 100644 +--- a/lib/Target/X86/X86InstrArithmetic.td ++++ b/lib/Target/X86/X86InstrArithmetic.td +@@ -1257,14 +1257,6 @@ let isCompare = 1 in { + def TEST32mi : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>; + let Predicates = [In64BitMode] in + def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>; +- +- // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the +- // register class is constrained to GR8_NOREX. This pseudo is explicitly +- // marked side-effect free, since it doesn't have an isel pattern like +- // other test instructions. +- let isPseudo = 1, hasSideEffects = 0 in +- def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), +- "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; + } // Defs = [EFLAGS] + + def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL, +diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp +index 11ada51a870..84a9200a0ef 100644 +--- a/lib/Target/X86/X86InstrInfo.cpp ++++ b/lib/Target/X86/X86InstrInfo.cpp +@@ -7854,9 +7854,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + case X86::VMOVUPSZ256mr_NOVLX: + return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr), + get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm); +- case X86::TEST8ri_NOREX: +- MI.setDesc(get(X86::TEST8ri)); +- return true; + case X86::MOV32ri64: + MI.setDesc(get(X86::MOV32ri)); + return true; +diff --git a/lib/Target/X86/X86MacroFusion.cpp b/lib/Target/X86/X86MacroFusion.cpp +index 67d95c2233d..4e11397dec4 100644 +--- a/lib/Target/X86/X86MacroFusion.cpp ++++ b/lib/Target/X86/X86MacroFusion.cpp +@@ -86,7 +86,6 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + case X86::TEST16mr: + case X86::TEST32mr: + case X86::TEST64mr: +- case X86::TEST8ri_NOREX: + case X86::AND16i16: + case X86::AND16ri: + case X86::AND16ri8: +diff --git a/test/CodeGen/SystemZ/pr36164.ll b/test/CodeGen/SystemZ/pr36164.ll +new file mode 100644 +index 00000000000..0c850091d31 +--- /dev/null ++++ b/test/CodeGen/SystemZ/pr36164.ll +@@ -0,0 +1,113 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc %s -o - -mtriple=s390x-linux-gnu -mcpu=z13 -disable-basicaa | FileCheck %s ++ ++; This test checks that we do not a reference to a deleted node. ++ ++%0 = type { i32 } ++ ++@g_11 = external dso_local unnamed_addr global i1, align 4 ++@g_69 = external dso_local global i32, align 4 ++@g_73 = external dso_local unnamed_addr global i32, align 4 ++@g_832 = external dso_local constant %0, align 4 ++@g_938 = external dso_local unnamed_addr global i64, align 8 ++ ++; Function Attrs: nounwind ++define void @main() local_unnamed_addr #0 { ++; CHECK-LABEL: main: ++; CHECK: # %bb.0: ++; CHECK-NEXT: stmg %r12, %r15, 96(%r15) ++; CHECK-NEXT: .cfi_offset %r12, -64 ++; CHECK-NEXT: .cfi_offset %r13, -56 ++; CHECK-NEXT: .cfi_offset %r14, -48 ++; CHECK-NEXT: .cfi_offset %r15, -40 ++; CHECK-NEXT: lhi %r0, 1 ++; CHECK-NEXT: larl %r1, g_938 ++; CHECK-NEXT: lhi %r2, 2 ++; CHECK-NEXT: lhi %r3, 3 ++; CHECK-NEXT: lhi %r4, 0 ++; CHECK-NEXT: lhi %r5, 4 ++; CHECK-NEXT: larl %r14, g_11 ++; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: strl %r0, g_73 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: strl %r0, g_69 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: lghi %r13, 24 ++; CHECK-NEXT: strl %r2, g_69 ++; CHECK-NEXT: ag %r13, 0(%r1) ++; CHECK-NEXT: lrl %r12, g_832 ++; CHECK-NEXT: strl %r3, g_69 ++; CHECK-NEXT: lrl %r12, g_832 ++; CHECK-NEXT: strl %r4, g_69 ++; CHECK-NEXT: lrl %r12, g_832 ++; CHECK-NEXT: strl %r0, g_69 ++; CHECK-NEXT: lrl %r12, g_832 ++; CHECK-NEXT: strl %r2, g_69 ++; CHECK-NEXT: lrl %r12, g_832 ++; CHECK-NEXT: strl %r3, g_69 ++; CHECK-NEXT: stgrl %r13, g_938 ++; CHECK-NEXT: lrl %r13, g_832 ++; CHECK-NEXT: strl %r5, g_69 ++; CHECK-NEXT: mvi 0(%r14), 1 ++; CHECK-NEXT: j .LBB0_1 ++ br label %1 ++ ++;