Merge remote-tracking branch 'origin/6.0' into 7

This commit is contained in:
Sylvestre Ledru 2019-02-09 17:27:05 +01:00
commit 3a82f68a05
12 changed files with 8610 additions and 3 deletions

9
debian/changelog vendored
View File

@ -671,6 +671,15 @@ llvm-toolchain-snapshot (1:7~svn322880-1) unstable; urgency=medium
-- Sylvestre Ledru <sylvestre@debian.org> Thu, 18 Jan 2018 20:50:03 +0100
llvm-toolchain-6.0 (1:6.0.1-11) unstable; urgency=medium
* Remove 'Multi-Arch: same' in libclang
(Closes: #874248)
* Cherry-pick various llvm fixes for Julia
(Closes: #919628)
-- Sylvestre Ledru <sylvestre@debian.org> Sat, 09 Feb 2019 17:22:59 +0100
llvm-toolchain-6.0 (1:6.0.1-10) unstable; urgency=medium
* Fix a baseline violation on armhf (Closes: #914268)

6
debian/orig-tar.sh vendored
View File

@ -18,18 +18,18 @@ set -e
# To create an rc1 release:
# sh 4.0/debian/orig-tar.sh RELEASE_40 rc1
SVN_BASE_URL=http://llvm.org/svn/llvm-project/
SVN_BASE_URL=https://llvm.org/svn/llvm-project/
MAJOR_VERSION=7
CURRENT_VERSION=7.1.0 # Should be changed to 3.5.1 later
if test -n "$1"; then
# http://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/branches/google/stable/
# https://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/branches/google/stable/
# For example: sh 4.0/debian/orig-tar.sh release_400
BRANCH=$1
fi
if test -n "$1" -a -n "$2"; then
# http://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/tags/RELEASE_34/rc1/
# https://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/tags/RELEASE_34/rc1/
# For example: sh 4.0/debian/orig-tar.sh RELEASE_401 rc3 4.0.1
BRANCH=$1
TAG=$2

View File

@ -0,0 +1,72 @@
From 6e7b660ee185445640110c80d80aafd436682fca Mon Sep 17 00:00:00 2001
From: Yichao Yu <yyc1992@gmail.com>
Date: Fri, 9 Dec 2016 15:59:46 -0500
Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
---
lib/MC/MCObjectFileInfo.cpp | 2 ++
.../AArch64/ELF_ARM64_BE-large-relocations.s | 18 ++++++++++++++++++
.../RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s | 18 ++++++++++++++++++
3 files changed, 38 insertions(+)
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
===================================================================
--- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp
+++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
@@ -328,6 +328,8 @@ void MCObjectFileInfo::initELFMCObjectFi
dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
break;
+ case Triple::aarch64:
+ case Triple::aarch64_be:
case Triple::x86_64:
if (PositionIndependent) {
PersonalityEncoding =
Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
===================================================================
--- /dev/null
+++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -code-model=large -filetype=obj -o %T/be-large-reloc.o %s
+# RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o
+
+ .text
+ .globl g
+ .p2align 2
+ .type g,@function
+g:
+ .cfi_startproc
+ mov x0, xzr
+ ret
+ .Lfunc_end0:
+ .size g, .Lfunc_end0-g
+ .cfi_endproc
+
+# Skip the CIE and load the 8 bytes PC begin pointer.
+# Assuming the CIE and the FDE length are both 4 bytes.
+# rtdyld-check: *{8}(section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc)
Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
===================================================================
--- /dev/null
+++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc -triple=arm64-none-linux-gnu -code-model=large -filetype=obj -o %T/large-reloc.o %s
+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o
+
+ .text
+ .globl g
+ .p2align 2
+ .type g,@function
+g:
+ .cfi_startproc
+ mov x0, xzr
+ ret
+ .Lfunc_end0:
+ .size g, .Lfunc_end0-g
+ .cfi_endproc
+
+# Skip the CIE and load the 8 bytes PC begin pointer.
+# Assuming the CIE and the FDE length are both 4 bytes.
+# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc)

View File

@ -0,0 +1,24 @@
From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Thu, 5 Jul 2018 12:37:50 -0400
Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
---
lib/MC/MCObjectFileInfo.cpp | 2 ++
.../AArch64/ELF_ARM64_large-relocations.s | 20 +++++++++++++++++++
2 files changed, 22 insertions(+)
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
===================================================================
--- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp
+++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFi
break;
case Triple::ppc64:
case Triple::ppc64le:
+ case Triple::aarch64:
+ case Triple::aarch64_be:
case Triple::x86_64:
FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
(Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);

View File

@ -0,0 +1,53 @@
From f94d12b6108b944199b715f31f25a022f75d2feb Mon Sep 17 00:00:00 2001
From: Yichao Yu <yyc1992@gmail.com>
Date: Sat, 10 Jun 2017 08:45:13 -0400
Subject: [PATCH 4/4] Enable support for floating-point division reductions
Similar to fsub, fdiv can also be vectorized using fmul.
---
lib/Transforms/Utils/LoopUtils.cpp | 1 +
test/Transforms/LoopVectorize/float-reduction.ll | 22 ++++++++++++++++++++++
2 files changed, 23 insertions(+)
Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp
===================================================================
--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Utils/LoopUtils.cpp
+++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp
@@ -513,6 +513,7 @@ RecurrenceDescriptor::isRecurrenceInstr(
return InstDesc(Kind == RK_IntegerOr, I);
case Instruction::Xor:
return InstDesc(Kind == RK_IntegerXor, I);
+ case Instruction::FDiv:
case Instruction::FMul:
return InstDesc(Kind == RK_FloatMult, I, UAI);
case Instruction::FSub:
Index: llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll
===================================================================
--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/LoopVectorize/float-reduction.ll
+++ llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll
@@ -44,3 +44,25 @@ for.body:
for.end: ; preds = %for.body
ret float %sub
}
+
+;CHECK-LABEL: @foodiv(
+;CHECK: fdiv fast <4 x float>
+;CHECK: ret
+define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %sub = fdiv fast float %sum.04, %0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 200
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret float %sub
+}

View File

@ -0,0 +1,82 @@
commit 6a311a7a804831fea43cfb2f61322adcb407a1af
Author: Keno Fischer <keno@juliacomputing.com>
Date: Thu Jan 18 15:57:05 2018 -0500
[JumpThreading] Don't restrict cast-traversal to i1
Summary:
In D17663, JumpThreading learned to look trough simple cast instructions,
but only if the source of those cast instructions was a phi/cmp i1
(in an effort to limit compile time effects). I think this condition
is too restrictive. For switches with limited value range, InstCombine
will readily introduce an extra `trunc` instruction to a smaller
integer type (e.g. from i8 to i2), leaving us in the somewhat perverse
situation that jump-threading would work before running instcombine,
but not after. Since instcombine produces this pattern, I think we
need to consider it canonical and support it in JumpThreading.
In general, for limiting recursion, I think the existing restriction
to phi and cmp nodes should be sufficient to avoid looking through
unprofitable chains of instructions.
Reviewers: haicheng, gberry, bmakam, mcrosier
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D42262
Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp
===================================================================
--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Scalar/JumpThreading.cpp
+++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp
@@ -656,11 +656,9 @@ bool JumpThreadingPass::ComputeValueKnow
}
// Handle Cast instructions. Only see through Cast when the source operand is
- // PHI or Cmp and the source type is i1 to save the compilation time.
+ // PHI or Cmp to save the compilation time.
if (CastInst *CI = dyn_cast<CastInst>(I)) {
Value *Source = CI->getOperand(0);
- if (!Source->getType()->isIntegerTy(1))
- return false;
if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
return false;
ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
Index: llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll
===================================================================
--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/JumpThreading/basic.ll
+++ llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll
@@ -547,6 +547,34 @@ l5:
; CHECK: }
}
+define i1 @trunc_switch(i1 %arg) {
+; CHECK-LABEL: @trunc_switch
+top:
+; CHECK: br i1 %arg, label %exitA, label %exitB
+ br i1 %arg, label %common, label %B
+
+B:
+ br label %common
+
+common:
+ %phi = phi i8 [ 2, %B ], [ 1, %top ]
+ %trunc = trunc i8 %phi to i2
+; CHECK-NOT: switch
+ switch i2 %trunc, label %unreach [
+ i2 1, label %exitA
+ i2 -2, label %exitB
+ ]
+
+unreach:
+ unreachable
+
+exitA:
+ ret i1 true
+
+exitB:
+ ret i1 false
+}
+
; CHECK-LABEL: define void @h_con(i32 %p) {
define void @h_con(i32 %p) {
%x = icmp ult i32 %p, 5

View File

@ -0,0 +1,677 @@
From 45bc0f0badbdbabaed7d204757c2aad7ab49a3fe Mon Sep 17 00:00:00 2001
From: DokFaust <rodia@autistici.org>
Date: Mon, 11 Jun 2018 12:59:42 +0200
Subject: [PATCH] PerfJITEventListener integration, requires compile flag
LLVM_USE_PERF
---
CMakeLists.txt | 13 +
include/llvm/Config/config.h.cmake | 3 +
include/llvm/Config/llvm-config.h.cmake | 3 +
.../llvm/ExecutionEngine/JITEventListener.h | 9 +
lib/ExecutionEngine/CMakeLists.txt | 4 +
lib/ExecutionEngine/LLVMBuild.txt | 2 +-
lib/ExecutionEngine/Orc/LLVMBuild.txt | 2 +-
.../PerfJITEvents/CMakeLists.txt | 5 +
.../PerfJITEvents/LLVMBuild.txt | 23 +
.../PerfJITEvents/PerfJITEventListener.cpp | 492 ++++++++++++++++++
10 files changed, 554 insertions(+), 2 deletions(-)
create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
create mode 100644 lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f8da6cf9211..fb92c825a46 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -426,6 +426,16 @@ if( LLVM_USE_OPROFILE )
endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
endif( LLVM_USE_OPROFILE )
+option(LLVM_USE_PERF
+ "Use perf JIT interface to inform perf about JIT code" OFF)
+
+# If enabled, verify we are on a platform that supports perf.
+if( LLVM_USE_PERF )
+ if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+ message(FATAL_ERROR "perf support is available on Linux only.")
+ endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+endif( LLVM_USE_PERF )
+
set(LLVM_USE_SANITIZER "" CACHE STRING
"Define the sanitizer used to build binaries and tests.")
set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
@@ -634,6 +644,9 @@ endif (LLVM_USE_INTEL_JITEVENTS)
if (LLVM_USE_OPROFILE)
set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT)
endif (LLVM_USE_OPROFILE)
+if (LLVM_USE_PERF)
+ set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents)
+endif (LLVM_USE_PERF)
message(STATUS "Constructing LLVMBuild project information")
execute_process(
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 940f8420304..17787ed779b 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -377,6 +377,9 @@
/* Define if we have the oprofile JIT-support library */
#cmakedefine01 LLVM_USE_OPROFILE
+/* Define if we have the perf JIT-support library */
+#cmakedefine01 LLVM_USE_PERF
+
/* LLVM version information */
#cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}"
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
index 4daa00f3bc4..8d9c3b24d52 100644
--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -65,6 +65,9 @@
/* Define if we have the oprofile JIT-support library */
#cmakedefine01 LLVM_USE_OPROFILE
+/* Define if we have the perf JIT-support library */
+#cmakedefine01 LLVM_USE_PERF
+
/* Major version of the LLVM API */
#define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index ff7840f00a4..1cc2c423a8b 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -115,6 +115,15 @@ public:
}
#endif // USE_OPROFILE
+#ifdef LLVM_USE_PERF
+ static JITEventListener *createPerfJITEventListener();
+#else
+ static JITEventListener *createPerfJITEventListener()
+ {
+ return nullptr;
+ }
+#endif //USE_PERF
+
private:
virtual void anchor();
};
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index 84b34919e44..893d113a685 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -30,3 +30,7 @@ endif( LLVM_USE_OPROFILE )
if( LLVM_USE_INTEL_JITEVENTS )
add_subdirectory(IntelJITEvents)
endif( LLVM_USE_INTEL_JITEVENTS )
+
+if( LLVM_USE_PERF )
+ add_subdirectory(PerfJITEvents)
+endif( LLVM_USE_PERF )
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
index 9d29a41f504..b6e1bda6a51 100644
--- a/lib/ExecutionEngine/LLVMBuild.txt
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc
+subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents
[component_0]
type = Library
diff --git a/lib/ExecutionEngine/Orc/LLVMBuild.txt b/lib/ExecutionEngine/Orc/LLVMBuild.txt
index 8f05172e77a..ef4ae64e823 100644
--- a/lib/ExecutionEngine/Orc/LLVMBuild.txt
+++ b/lib/ExecutionEngine/Orc/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = OrcJIT
parent = ExecutionEngine
-required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
+required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
new file mode 100644
index 00000000000..136cc429d02
--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMPerfJITEvents
+ PerfJITEventListener.cpp
+ )
+
+add_dependencies(LLVMPerfJITEvents LLVMCodeGen)
diff --git a/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
new file mode 100644
index 00000000000..b1958a69260
--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = OptionalLibrary
+name = PerfJITEvents
+parent = ExecutionEngine
+required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support TransformUtils
+
diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
new file mode 100644
index 00000000000..c2b97dd59f3
--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -0,0 +1,492 @@
+//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that tells perf about JITted
+// functions, including source line information.
+//
+// Documentation for perf jit integration is available at:
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <sys/mman.h> // mmap()
+#include <sys/types.h> // getpid()
+#include <time.h> // clock_gettime(), time(), localtime_r() */
+#include <unistd.h> // for getpid(), read(), close()
+
+using namespace llvm;
+using namespace llvm::object;
+typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
+
+namespace {
+
+// language identifier (XXX: should we generate something better from debug
+// info?)
+#define JIT_LANG "llvm-IR"
+#define LLVM_PERF_JIT_MAGIC \
+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
+ (uint32_t)'D')
+#define LLVM_PERF_JIT_VERSION 1
+
+// bit 0: set if the jitdump file is using an architecture-specific timestamp
+// clock source
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
+
+struct LLVMPerfJitHeader;
+
+class PerfJITEventListener : public JITEventListener {
+public:
+ PerfJITEventListener();
+ ~PerfJITEventListener() {
+ if (MarkerAddr)
+ CloseMarker();
+ }
+
+ void NotifyObjectEmitted(const ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L) override;
+ void NotifyFreeingObject(const ObjectFile &Obj) override;
+
+private:
+ bool InitDebuggingDir();
+ bool OpenMarker();
+ void CloseMarker();
+ static bool FillMachine(LLVMPerfJitHeader &hdr);
+
+ void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
+ uint64_t CodeSize);
+ void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
+
+ // cache lookups
+ pid_t Pid;
+
+ // base directory for output data
+ std::string JitPath;
+
+ // output data stream, closed via Dumpstream
+ int DumpFd = -1;
+
+ // output data stream
+ std::unique_ptr<raw_fd_ostream> Dumpstream;
+
+ // prevent concurrent dumps from messing up the output file
+ sys::Mutex Mutex;
+
+ // perf mmap marker
+ void *MarkerAddr = NULL;
+
+ // perf support ready
+ bool SuccessfullyInitialized = false;
+
+ // identifier for functions, primarily to identify when moving them around
+ uint64_t CodeGeneration = 1;
+};
+
+// The following are POD struct definitions from the perf jit specification
+
+enum LLVMPerfJitRecordType {
+ JIT_CODE_LOAD = 0,
+ JIT_CODE_MOVE = 1, // not emitted, code isn't moved
+ JIT_CODE_DEBUG_INFO = 2,
+ JIT_CODE_CLOSE = 3, // not emitted, unnecessary
+ JIT_CODE_UNWINDING_INFO = 4, // not emitted
+
+ JIT_CODE_MAX
+};
+
+struct LLVMPerfJitHeader {
+ uint32_t Magic; // characters "JiTD"
+ uint32_t Version; // header version
+ uint32_t TotalSize; // total size of header
+ uint32_t ElfMach; // elf mach target
+ uint32_t Pad1; // reserved
+ uint32_t Pid;
+ uint64_t Timestamp; // timestamp
+ uint64_t Flags; // flags
+};
+
+// record prefix (mandatory in each record)
+struct LLVMPerfJitRecordPrefix {
+ uint32_t Id; // record type identifier
+ uint32_t TotalSize;
+ uint64_t Timestamp;
+};
+
+struct LLVMPerfJitRecordCodeLoad {
+ LLVMPerfJitRecordPrefix Prefix;
+
+ uint32_t Pid;
+ uint32_t Tid;
+ uint64_t Vma;
+ uint64_t CodeAddr;
+ uint64_t CodeSize;
+ uint64_t CodeIndex;
+};
+
+struct LLVMPerfJitDebugEntry {
+ uint64_t Addr;
+ int Lineno; // source line number starting at 1
+ int Discrim; // column discriminator, 0 is default
+ // followed by null terminated filename, \xff\0 if same as previous entry
+};
+
+struct LLVMPerfJitRecordDebugInfo {
+ LLVMPerfJitRecordPrefix Prefix;
+
+ uint64_t CodeAddr;
+ uint64_t NrEntry;
+ // followed by NrEntry LLVMPerfJitDebugEntry records
+};
+
+static inline uint64_t timespec_to_ns(const struct timespec *ts) {
+ const uint64_t NanoSecPerSec = 1000000000;
+ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
+}
+
+static inline uint64_t perf_get_timestamp(void) {
+ struct timespec ts;
+ int ret;
+
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (ret)
+ return 0;
+
+ return timespec_to_ns(&ts);
+}
+
+PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
+ // check if clock-source is supported
+ if (!perf_get_timestamp()) {
+ errs() << "kernel does not support CLOCK_MONOTONIC\n";
+ return;
+ }
+
+ if (!InitDebuggingDir()) {
+ errs() << "could not initialize debugging directory\n";
+ return;
+ }
+
+ std::string Filename;
+ raw_string_ostream FilenameBuf(Filename);
+ FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
+
+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and
+ // raw_fd_ostream doesn't expose the FD.
+ using sys::fs::openFileForWrite;
+ if (auto EC =
+ openFileForWrite(FilenameBuf.str(), DumpFd, sys::fs::F_RW, 0666)) {
+ errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
+ << EC.message() << "\n";
+ return;
+ }
+
+ Dumpstream = make_unique<raw_fd_ostream>(DumpFd, true);
+
+ LLVMPerfJitHeader Header = {0};
+ if (!FillMachine(Header))
+ return;
+
+ // signal this process emits JIT information
+ if (!OpenMarker())
+ return;
+
+ // emit dumpstream header
+ Header.Magic = LLVM_PERF_JIT_MAGIC;
+ Header.Version = LLVM_PERF_JIT_VERSION;
+ Header.TotalSize = sizeof(Header);
+ Header.Pid = Pid;
+ Header.Timestamp = perf_get_timestamp();
+ Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
+
+ // Everything initialized, can do profiling now.
+ if (!Dumpstream->has_error())
+ SuccessfullyInitialized = true;
+}
+
+void PerfJITEventListener::NotifyObjectEmitted(
+ const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) {
+
+ if (!SuccessfullyInitialized)
+ return;
+
+ OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
+ const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
+
+ // Get the address of the object image for use as a unique identifier
+ std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
+
+ // Use symbol info to iterate over functions in the object.
+ for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
+ SymbolRef Sym = P.first;
+ std::string SourceFileName;
+
+ Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
+ if (!SymTypeOrErr) {
+ // There's not much we can with errors here
+ consumeError(SymTypeOrErr.takeError());
+ continue;
+ }
+ SymbolRef::Type SymType = *SymTypeOrErr;
+ if (SymType != SymbolRef::ST_Function)
+ continue;
+
+ Expected<StringRef> Name = Sym.getName();
+ if (!Name) {
+ consumeError(Name.takeError());
+ continue;
+ }
+
+ Expected<uint64_t> AddrOrErr = Sym.getAddress();
+ if (!AddrOrErr) {
+ consumeError(AddrOrErr.takeError());
+ continue;
+ }
+ uint64_t Addr = *AddrOrErr;
+ uint64_t Size = P.second;
+
+ // According to spec debugging info has to come before loading the
+ // corresonding code load.
+ DILineInfoTable Lines = Context->getLineInfoForAddressRange(
+ Addr, Size, FileLineInfoKind::AbsoluteFilePath);
+
+ NotifyDebug(Addr, Lines);
+ NotifyCode(Name, Addr, Size);
+ }
+
+ Dumpstream->flush();
+}
+
+void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
+ // perf currently doesn't have an interface for unloading. But munmap()ing the
+ // code section does, so that's ok.
+}
+
+bool PerfJITEventListener::InitDebuggingDir() {
+ time_t Time;
+ struct tm LocalTime;
+ char TimeBuffer[sizeof("YYYYMMDD")];
+ SmallString<64> Path;
+
+ // search for location to dump data to
+ if (const char *BaseDir = getenv("JITDUMPDIR"))
+ Path.append(BaseDir);
+ else if (!sys::path::home_directory(Path))
+ Path = ".";
+
+ // create debug directory
+ Path += "/.debug/jit/";
+ if (auto EC = sys::fs::create_directories(Path)) {
+ errs() << "could not create jit cache directory " << Path << ": "
+ << EC.message() << "\n";
+ return false;
+ }
+
+ // create unique directory for dump data related to this process
+ time(&Time);
+ localtime_r(&Time, &LocalTime);
+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
+ Path += JIT_LANG "-jit-";
+ Path += TimeBuffer;
+
+ SmallString<128> UniqueDebugDir;
+
+ using sys::fs::createUniqueDirectory;
+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
+ errs() << "could not create unique jit cache directory " << UniqueDebugDir
+ << ": " << EC.message() << "\n";
+ return false;
+ }
+
+ JitPath = UniqueDebugDir.str();
+
+ return true;
+}
+
+bool PerfJITEventListener::OpenMarker() {
+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
+ // is captured either live (perf record running when we mmap) or in deferred
+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
+ // file for more meta data info about the jitted code. Perf report/annotate
+ // detect this special filename and process the jitdump file.
+ //
+ // Mapping must be PROT_EXEC to ensure it is captured by perf record
+ // even when not using -d option.
+ MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, DumpFd, 0);
+
+ if (MarkerAddr == MAP_FAILED) {
+ errs() << "could not mmap JIT marker\n";
+ return false;
+ }
+ return true;
+}
+
+void PerfJITEventListener::CloseMarker() {
+ if (!MarkerAddr)
+ return;
+
+ munmap(MarkerAddr, sys::Process::getPageSize());
+ MarkerAddr = nullptr;
+}
+
+bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
+ char id[16];
+ struct {
+ uint16_t e_type;
+ uint16_t e_machine;
+ } info;
+
+ size_t RequiredMemory = sizeof(id) + sizeof(info);
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileSlice("/proc/self/exe",
+ RequiredMemory,
+ 0);
+
+ // This'll not guarantee that enough data was actually read from the
+ // underlying file. Instead the trailing part of the buffer would be
+ // zeroed. Given the ELF signature check below that seems ok though,
+ // it's unlikely that the file ends just after that, and the
+ // consequence would just be that perf wouldn't recognize the
+ // signature.
+ if (auto EC = MB.getError()) {
+ errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
+ return false;
+ }
+
+ memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
+ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
+
+ // check ELF signature
+ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
+ errs() << "invalid elf signature\n";
+ return false;
+ }
+
+ hdr.ElfMach = info.e_machine;
+
+ return true;
+}
+
+void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
+ uint64_t CodeAddr, uint64_t CodeSize) {
+ assert(SuccessfullyInitialized);
+
+ // 0 length functions can't have samples.
+ if (CodeSize == 0)
+ return;
+
+ LLVMPerfJitRecordCodeLoad rec;
+ rec.Prefix.Id = JIT_CODE_LOAD;
+ rec.Prefix.TotalSize = sizeof(rec) + // debug record itself
+ Symbol->size() + 1 + // symbol name
+ CodeSize; // and code
+ rec.Prefix.Timestamp = perf_get_timestamp();
+
+ rec.CodeSize = CodeSize;
+ rec.Vma = 0;
+ rec.CodeAddr = CodeAddr;
+ rec.Pid = Pid;
+ rec.Tid = get_threadid();
+
+ // avoid interspersing output
+ MutexGuard Guard(Mutex);
+
+ rec.CodeIndex = CodeGeneration++; // under lock!
+
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
+ Dumpstream->write(Symbol->data(), Symbol->size() + 1);
+ Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
+}
+
+void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
+ DILineInfoTable Lines) {
+ assert(SuccessfullyInitialized);
+
+ // Didn't get useful debug info.
+ if (Lines.empty())
+ return;
+
+ LLVMPerfJitRecordDebugInfo rec;
+ rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
+ rec.Prefix.TotalSize = sizeof(rec); // will be increased further
+ rec.Prefix.Timestamp = perf_get_timestamp();
+ rec.CodeAddr = CodeAddr;
+ rec.NrEntry = Lines.size();
+
+ // compute total size size of record (variable due to filenames)
+ DILineInfoTable::iterator Begin = Lines.begin();
+ DILineInfoTable::iterator End = Lines.end();
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+ DILineInfo &line = It->second;
+ rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
+ rec.Prefix.TotalSize += line.FileName.size() + 1;
+ }
+
+ // The debug_entry describes the source line information. It is defined as
+ // follows in order:
+ // * uint64_t code_addr: address of function for which the debug information
+ // is generated
+ // * uint32_t line : source file line number (starting at 1)
+ // * uint32_t discrim : column discriminator, 0 is default
+ // * char name[n] : source file name in ASCII, including null termination
+
+ // avoid interspersing output
+ MutexGuard Guard(Mutex);
+
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
+
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+ LLVMPerfJitDebugEntry LineInfo;
+ DILineInfo &Line = It->second;
+
+ LineInfo.Addr = It->first;
+ // The function re-created by perf is preceded by a elf
+ // header. Need to adjust for that, otherwise the results are
+ // wrong.
+ LineInfo.Addr += 0x40;
+ LineInfo.Lineno = Line.Line;
+ LineInfo.Discrim = Line.Discriminator;
+
+ Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
+ sizeof(LineInfo));
+ Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
+ }
+}
+
+// There should be only a single event listener per process, otherwise perf gets
+// confused.
+llvm::ManagedStatic<PerfJITEventListener> PerfListener;
+
+} // end anonymous namespace
+
+namespace llvm {
+JITEventListener *JITEventListener::createPerfJITEventListener() {
+ return &*PerfListener;
+}
+
+} // namespace llvm
+
--
2.17.1

View File

@ -0,0 +1,89 @@
commit 8eb2b102a203d83fb713f3bf79acf235dabdd8cd
Author: Keno Fischer <keno@juliacomputing.com>
Date: Mon Jul 30 16:59:08 2018 -0400
[VNCoercion] Disallow coercion between different ni addrspaces
Summary:
I'm not sure if it would be legal by the IR reference to introduce
an addrspacecast here, since the IR reference is a bit vague on
the exact semantics, but at least for our usage of it (and I
suspect for many other's usage) it is not. For us, addrspacecasts
between non-integral address spaces carry frontend information that the
optimizer cannot deduce afterwards in a generic way (though we
have frontend specific passes in our pipline that do propagate
these). In any case, I'm sure nobody is using it this way at
the moment, since it would have introduced inttoptrs, which
are definitely illegal.
Fixes PR38375
Reviewers: sanjoy, reames, dberlin
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D50010
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
index c3feea6a0a4..735d1e7b792 100644
--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
@@ -20,14 +20,21 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
return false;
+ Type *StoredValTy = StoredVal->getType();
+
// The store has to be at least as big as the load.
if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
return false;
- // Don't coerce non-integral pointers to integers or vice versa.
- if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
- DL.isNonIntegralPointerType(LoadTy))
+ bool StoredNI = DL.isNonIntegralPointerType(StoredValTy);
+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy);
+ if (StoredNI != LoadNI) {
return false;
+ } else if (StoredNI && LoadNI &&
+ cast<PointerType>(StoredValTy)->getAddressSpace() !=
+ cast<PointerType>(LoadTy)->getAddressSpace()) {
+ return false;
+ }
return true;
}
diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
index 9ae4132231d..5217fc1a06a 100644
--- a/test/Transforms/GVN/non-integral-pointers.ll
+++ b/test/Transforms/GVN/non-integral-pointers.ll
@@ -1,6 +1,6 @@
; RUN: opt -gvn -S < %s | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
target triple = "x86_64-unknown-linux-gnu"
define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
alwaysTaken:
ret i64 42
}
+
+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
+ ; CHECK-LABEL: @multini(
+ ; CHECK-NOT: inttoptr
+ ; CHECK-NOT: ptrtoint
+ ; CHECK-NOT: addrspacecast
+ entry:
+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+ neverTaken:
+ %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
+ %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
+ ret i8 addrspace(5)* %differentas
+
+ alwaysTaken:
+ ret i8 addrspace(5)* null
+ }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,26 @@
From 15899eaab58e96bb7bbe7a14099674e255656a50 Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Fri, 23 Feb 2018 14:41:20 -0500
Subject: [PATCH] Make AddrSpaceCast noops on PPC
PPC as AArch64 doesn't have address-spaces so we can drop them in the backend
---
lib/Target/PowerPC/PPCISelLowering.h | 5 +++++
1 file changed, 5 insertions(+)
Index: llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm-toolchain-6.0-6.0.1.orig/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h
@@ -889,6 +889,11 @@ namespace llvm {
return true;
}
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Addrspacecasts are always noops.
+ return true;
+ }
+
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
SelectionDAG &DAG,
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;

View File

@ -0,0 +1,301 @@
commit b398d8e1fa5a5a914957fa22d0a64db97f6c265e
Author: Craig Topper <craig.topper@intel.com>
Date: Thu Mar 8 00:21:17 2018 +0000
[X86] Fix some isel patterns that used aligned vector load instructions with unaligned predicates.
These patterns weren't checking the alignment of the load, but were using the aligned instructions. This will cause a GP fault if the data isn't aligned.
I believe these were introduced in r312450.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326967 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td
index db3dfe56531..50c7763a2c3 100644
--- a/lib/Target/X86/X86InstrVecCompiler.td
+++ b/lib/Target/X86/X86InstrVecCompiler.td
@@ -261,10 +261,10 @@ let Predicates = [HasVLX] in {
// will zero the upper bits.
// TODO: Is there a safe way to detect whether the producing instruction
// already zeroed the upper bits?
-multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
- ValueType DstTy, ValueType SrcTy,
- ValueType ZeroTy, PatFrag memop,
- SubRegIndex SubIdx> {
+multiclass subvector_zero_lowering<string MoveStr, string LoadStr,
+ RegisterClass RC, ValueType DstTy,
+ ValueType SrcTy, ValueType ZeroTy,
+ PatFrag memop, SubRegIndex SubIdx> {
def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
(SrcTy RC:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0),
@@ -274,91 +274,91 @@ multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
(SrcTy (bitconvert (memop addr:$src))),
(iPTR 0))),
(SUBREG_TO_REG (i64 0),
- (!cast<Instruction>("VMOV"#MoveStr#"rm") addr:$src), SubIdx)>;
+ (!cast<Instruction>("VMOV"#LoadStr#"rm") addr:$src), SubIdx)>;
}
let Predicates = [HasAVX, NoVLX] in {
- defm : subvector_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, loadv2f64,
- sub_xmm>;
- defm : subvector_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, loadv4f32,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, loadv2i64,
- sub_xmm>;
-}
-
-let Predicates = [HasVLX] in {
- defm : subvector_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32,
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v4f64, v2f64, v8i32,
loadv2f64, sub_xmm>;
- defm : subvector_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32,
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v8f32, v4f32, v8i32,
loadv4f32, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v4i64, v2i64, v8i32,
loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i32, v4i32, v8i32,
loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i16, v8i16, v8i32,
loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32,
- loadv2i64, sub_xmm>;
-
- defm : subvector_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32,
- loadv2f64, sub_xmm>;
- defm : subvector_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32,
- loadv4f32, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32,
- loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32,
- loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32,
- loadv2i64, sub_xmm>;
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32,
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i8, v16i8, v8i32,
loadv2i64, sub_xmm>;
+}
- defm : subvector_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32,
- loadv4f64, sub_ymm>;
- defm : subvector_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32,
- loadv8f32, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32,
- loadv4i64, sub_ymm>;
+let Predicates = [HasVLX] in {
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v4f64,
+ v2f64, v8i32, loadv2f64, sub_xmm>;
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v8f32,
+ v4f32, v8i32, loadv4f32, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v4i64,
+ v2i64, v8i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i32,
+ v4i32, v8i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i16,
+ v8i16, v8i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i8,
+ v16i8, v8i32, loadv2i64, sub_xmm>;
+
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v8f64,
+ v2f64, v16i32, loadv2f64, sub_xmm>;
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v16f32,
+ v4f32, v16i32, loadv4f32, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i64,
+ v2i64, v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i32,
+ v4i32, v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i16,
+ v8i16, v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v64i8,
+ v16i8, v16i32, loadv2i64, sub_xmm>;
+
+ defm : subvector_zero_lowering<"APDZ256", "UPDZ256", VR256X, v8f64,
+ v4f64, v16i32, loadv4f64, sub_ymm>;
+ defm : subvector_zero_lowering<"APSZ256", "UPDZ256", VR256X, v16f32,
+ v8f32, v16i32, loadv8f32, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v8i64,
+ v4i64, v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v16i32,
+ v8i32, v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v32i16,
+ v16i16, v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v64i8,
+ v32i8, v16i32, loadv4i64, sub_ymm>;
}
let Predicates = [HasAVX512, NoVLX] in {
- defm : subvector_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, loadv2f64,
- sub_xmm>;
- defm : subvector_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, loadv4f32,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, loadv2i64,
- sub_xmm>;
- defm : subvector_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, loadv2i64,
- sub_xmm>;
-
- defm : subvector_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32,
- loadv4f64, sub_ymm>;
- defm : subvector_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32,
- loadv8f32, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32,
- loadv4i64, sub_ymm>;
- defm : subvector_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32,
- loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v8f64, v2f64,
+ v16i32,loadv2f64, sub_xmm>;
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v16f32, v4f32,
+ v16i32, loadv4f32, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i64, v2i64,
+ v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i32, v4i32,
+ v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i16, v8i16,
+ v16i32, loadv2i64, sub_xmm>;
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v64i8, v16i8,
+ v16i32, loadv2i64, sub_xmm>;
+
+ defm : subvector_zero_lowering<"APDY", "UPDY", VR256, v8f64, v4f64,
+ v16i32, loadv4f64, sub_ymm>;
+ defm : subvector_zero_lowering<"APSY", "UPSY", VR256, v16f32, v8f32,
+ v16i32, loadv8f32, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v8i64, v4i64,
+ v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v16i32, v8i32,
+ v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v32i16, v16i16,
+ v16i32, loadv4i64, sub_ymm>;
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v64i8, v32i8,
+ v16i32, loadv4i64, sub_ymm>;
}
// List of opcodes that guaranteed to zero the upper elements of vector regs.
diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll
index 6ecd8116443..0f2cf594b1c 100644
--- a/test/CodeGen/X86/merge-consecutive-loads-256.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll
@@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi
define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_2f64_2z:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4f64_2f64_2z:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
%val0 = load <2 x double>, <2 x double>* %ptr0
@@ -109,13 +109,13 @@ define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline
define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4f64_f64_45zz:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4f64_f64_45zz:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 4
%ptr1 = getelementptr inbounds double, double* %ptr, i64 5
@@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline
define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4i64_2i64_3z:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 48(%rdi), %xmm0
+; AVX-NEXT: vmovups 48(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4i64_2i64_3z:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 48(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 48(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3
%val0 = load <2 x i64>, <2 x i64>* %ptr0
@@ -217,13 +217,13 @@ define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp {
define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp {
; AVX-LABEL: merge_4i64_i64_23zz:
; AVX: # %bb.0:
-; AVX-NEXT: vmovaps 16(%rdi), %xmm0
+; AVX-NEXT: vmovups 16(%rdi), %xmm0
; AVX-NEXT: retq
;
; X32-AVX-LABEL: merge_4i64_i64_23zz:
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX-NEXT: vmovaps 16(%eax), %xmm0
+; X32-AVX-NEXT: vmovups 16(%eax), %xmm0
; X32-AVX-NEXT: retl
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2
%ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3
diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll
index 62102eb382c..3c6eaf65292 100644
--- a/test/CodeGen/X86/merge-consecutive-loads-512.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll
@@ -106,13 +106,13 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin
define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_f64_12zzuuzz:
; ALL: # %bb.0:
-; ALL-NEXT: vmovaps 8(%rdi), %xmm0
+; ALL-NEXT: vmovups 8(%rdi), %xmm0
; ALL-NEXT: retq
;
; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovaps 8(%eax), %xmm0
+; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0
; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
%ptr1 = getelementptr inbounds double, double* %ptr, i64 2
@@ -190,7 +190,7 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline
define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8i64_i64_56zz9uzz:
; ALL: # %bb.0:
-; ALL-NEXT: vmovaps 40(%rdi), %xmm0
+; ALL-NEXT: vmovups 40(%rdi), %xmm0
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq
@@ -198,7 +198,7 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s
; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT: vmovaps 40(%eax), %xmm0
+; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl

6131
debian/patches/julia/llvm-rL327898.patch vendored Normal file

File diff suppressed because it is too large Load Diff