mirror of
https://git.proxmox.com/git/llvm-toolchain
synced 2025-06-14 21:10:25 +00:00
Merge remote-tracking branch 'origin/6.0' into 7
This commit is contained in:
commit
3a82f68a05
9
debian/changelog
vendored
9
debian/changelog
vendored
@ -671,6 +671,15 @@ llvm-toolchain-snapshot (1:7~svn322880-1) unstable; urgency=medium
|
||||
|
||||
-- Sylvestre Ledru <sylvestre@debian.org> Thu, 18 Jan 2018 20:50:03 +0100
|
||||
|
||||
llvm-toolchain-6.0 (1:6.0.1-11) unstable; urgency=medium
|
||||
|
||||
* Remove 'Multi-Arch: same' in libclang
|
||||
(Closes: #874248)
|
||||
* Cherry-pick various llvm fixes for Julia
|
||||
(Closes: #919628)
|
||||
|
||||
-- Sylvestre Ledru <sylvestre@debian.org> Sat, 09 Feb 2019 17:22:59 +0100
|
||||
|
||||
llvm-toolchain-6.0 (1:6.0.1-10) unstable; urgency=medium
|
||||
|
||||
* Fix a baseline violation on armhf (Closes: #914268)
|
||||
|
6
debian/orig-tar.sh
vendored
6
debian/orig-tar.sh
vendored
@ -18,18 +18,18 @@ set -e
|
||||
# To create an rc1 release:
|
||||
# sh 4.0/debian/orig-tar.sh RELEASE_40 rc1
|
||||
|
||||
SVN_BASE_URL=http://llvm.org/svn/llvm-project/
|
||||
SVN_BASE_URL=https://llvm.org/svn/llvm-project/
|
||||
MAJOR_VERSION=7
|
||||
CURRENT_VERSION=7.1.0 # Should be changed to 3.5.1 later
|
||||
|
||||
if test -n "$1"; then
|
||||
# http://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/branches/google/stable/
|
||||
# https://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/branches/google/stable/
|
||||
# For example: sh 4.0/debian/orig-tar.sh release_400
|
||||
BRANCH=$1
|
||||
fi
|
||||
|
||||
if test -n "$1" -a -n "$2"; then
|
||||
# http://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/tags/RELEASE_34/rc1/
|
||||
# https://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/tags/RELEASE_34/rc1/
|
||||
# For example: sh 4.0/debian/orig-tar.sh RELEASE_401 rc3 4.0.1
|
||||
BRANCH=$1
|
||||
TAG=$2
|
||||
|
72
debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch
vendored
Normal file
72
debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch
vendored
Normal file
@ -0,0 +1,72 @@
|
||||
From 6e7b660ee185445640110c80d80aafd436682fca Mon Sep 17 00:00:00 2001
|
||||
From: Yichao Yu <yyc1992@gmail.com>
|
||||
Date: Fri, 9 Dec 2016 15:59:46 -0500
|
||||
Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
|
||||
|
||||
---
|
||||
lib/MC/MCObjectFileInfo.cpp | 2 ++
|
||||
.../AArch64/ELF_ARM64_BE-large-relocations.s | 18 ++++++++++++++++++
|
||||
.../RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s | 18 ++++++++++++++++++
|
||||
3 files changed, 38 insertions(+)
|
||||
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
|
||||
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
||||
|
||||
Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
|
||||
===================================================================
|
||||
--- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp
|
||||
+++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
|
||||
@@ -328,6 +328,8 @@ void MCObjectFileInfo::initELFMCObjectFi
|
||||
dwarf::DW_EH_PE_sdata4
|
||||
: dwarf::DW_EH_PE_absptr;
|
||||
break;
|
||||
+ case Triple::aarch64:
|
||||
+ case Triple::aarch64_be:
|
||||
case Triple::x86_64:
|
||||
if (PositionIndependent) {
|
||||
PersonalityEncoding =
|
||||
Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
|
||||
@@ -0,0 +1,18 @@
|
||||
+# RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -code-model=large -filetype=obj -o %T/be-large-reloc.o %s
|
||||
+# RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o
|
||||
+
|
||||
+ .text
|
||||
+ .globl g
|
||||
+ .p2align 2
|
||||
+ .type g,@function
|
||||
+g:
|
||||
+ .cfi_startproc
|
||||
+ mov x0, xzr
|
||||
+ ret
|
||||
+ .Lfunc_end0:
|
||||
+ .size g, .Lfunc_end0-g
|
||||
+ .cfi_endproc
|
||||
+
|
||||
+# Skip the CIE and load the 8 bytes PC begin pointer.
|
||||
+# Assuming the CIE and the FDE length are both 4 bytes.
|
||||
+# rtdyld-check: *{8}(section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc)
|
||||
Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
||||
===================================================================
|
||||
--- /dev/null
|
||||
+++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
||||
@@ -0,0 +1,18 @@
|
||||
+# RUN: llvm-mc -triple=arm64-none-linux-gnu -code-model=large -filetype=obj -o %T/large-reloc.o %s
|
||||
+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o
|
||||
+
|
||||
+ .text
|
||||
+ .globl g
|
||||
+ .p2align 2
|
||||
+ .type g,@function
|
||||
+g:
|
||||
+ .cfi_startproc
|
||||
+ mov x0, xzr
|
||||
+ ret
|
||||
+ .Lfunc_end0:
|
||||
+ .size g, .Lfunc_end0-g
|
||||
+ .cfi_endproc
|
||||
+
|
||||
+# Skip the CIE and load the 8 bytes PC begin pointer.
|
||||
+# Assuming the CIE and the FDE length are both 4 bytes.
|
||||
+# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc)
|
24
debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch
vendored
Normal file
24
debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001
|
||||
From: Valentin Churavy <v.churavy@gmail.com>
|
||||
Date: Thu, 5 Jul 2018 12:37:50 -0400
|
||||
Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
|
||||
|
||||
---
|
||||
lib/MC/MCObjectFileInfo.cpp | 2 ++
|
||||
.../AArch64/ELF_ARM64_large-relocations.s | 20 +++++++++++++++++++
|
||||
2 files changed, 22 insertions(+)
|
||||
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
||||
|
||||
Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
|
||||
===================================================================
|
||||
--- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp
|
||||
+++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
|
||||
@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFi
|
||||
break;
|
||||
case Triple::ppc64:
|
||||
case Triple::ppc64le:
|
||||
+ case Triple::aarch64:
|
||||
+ case Triple::aarch64_be:
|
||||
case Triple::x86_64:
|
||||
FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
|
||||
(Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
|
53
debian/patches/julia/llvm-D34078-vectorize-fdiv.patch
vendored
Normal file
53
debian/patches/julia/llvm-D34078-vectorize-fdiv.patch
vendored
Normal file
@ -0,0 +1,53 @@
|
||||
From f94d12b6108b944199b715f31f25a022f75d2feb Mon Sep 17 00:00:00 2001
|
||||
From: Yichao Yu <yyc1992@gmail.com>
|
||||
Date: Sat, 10 Jun 2017 08:45:13 -0400
|
||||
Subject: [PATCH 4/4] Enable support for floating-point division reductions
|
||||
|
||||
Similar to fsub, fdiv can also be vectorized using fmul.
|
||||
---
|
||||
lib/Transforms/Utils/LoopUtils.cpp | 1 +
|
||||
test/Transforms/LoopVectorize/float-reduction.ll | 22 ++++++++++++++++++++++
|
||||
2 files changed, 23 insertions(+)
|
||||
|
||||
Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp
|
||||
===================================================================
|
||||
--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Utils/LoopUtils.cpp
|
||||
+++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp
|
||||
@@ -513,6 +513,7 @@ RecurrenceDescriptor::isRecurrenceInstr(
|
||||
return InstDesc(Kind == RK_IntegerOr, I);
|
||||
case Instruction::Xor:
|
||||
return InstDesc(Kind == RK_IntegerXor, I);
|
||||
+ case Instruction::FDiv:
|
||||
case Instruction::FMul:
|
||||
return InstDesc(Kind == RK_FloatMult, I, UAI);
|
||||
case Instruction::FSub:
|
||||
Index: llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll
|
||||
===================================================================
|
||||
--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/LoopVectorize/float-reduction.ll
|
||||
+++ llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll
|
||||
@@ -44,3 +44,25 @@ for.body:
|
||||
for.end: ; preds = %for.body
|
||||
ret float %sub
|
||||
}
|
||||
+
|
||||
+;CHECK-LABEL: @foodiv(
|
||||
+;CHECK: fdiv fast <4 x float>
|
||||
+;CHECK: ret
|
||||
+define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
|
||||
+entry:
|
||||
+ br label %for.body
|
||||
+
|
||||
+for.body: ; preds = %for.body, %entry
|
||||
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
+ %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
|
||||
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
|
||||
+ %0 = load float, float* %arrayidx, align 4
|
||||
+ %sub = fdiv fast float %sum.04, %0
|
||||
+ %indvars.iv.next = add i64 %indvars.iv, 1
|
||||
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
+ %exitcond = icmp eq i32 %lftr.wideiv, 200
|
||||
+ br i1 %exitcond, label %for.end, label %for.body
|
||||
+
|
||||
+for.end: ; preds = %for.body
|
||||
+ ret float %sub
|
||||
+}
|
82
debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch
vendored
Normal file
82
debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch
vendored
Normal file
@ -0,0 +1,82 @@
|
||||
commit 6a311a7a804831fea43cfb2f61322adcb407a1af
|
||||
Author: Keno Fischer <keno@juliacomputing.com>
|
||||
Date: Thu Jan 18 15:57:05 2018 -0500
|
||||
|
||||
[JumpThreading] Don't restrict cast-traversal to i1
|
||||
|
||||
Summary:
|
||||
In D17663, JumpThreading learned to look trough simple cast instructions,
|
||||
but only if the source of those cast instructions was a phi/cmp i1
|
||||
(in an effort to limit compile time effects). I think this condition
|
||||
is too restrictive. For switches with limited value range, InstCombine
|
||||
will readily introduce an extra `trunc` instruction to a smaller
|
||||
integer type (e.g. from i8 to i2), leaving us in the somewhat perverse
|
||||
situation that jump-threading would work before running instcombine,
|
||||
but not after. Since instcombine produces this pattern, I think we
|
||||
need to consider it canonical and support it in JumpThreading.
|
||||
In general, for limiting recursion, I think the existing restriction
|
||||
to phi and cmp nodes should be sufficient to avoid looking through
|
||||
unprofitable chains of instructions.
|
||||
|
||||
Reviewers: haicheng, gberry, bmakam, mcrosier
|
||||
|
||||
Subscribers: llvm-commits
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D42262
|
||||
|
||||
Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp
|
||||
===================================================================
|
||||
--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Scalar/JumpThreading.cpp
|
||||
+++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp
|
||||
@@ -656,11 +656,9 @@ bool JumpThreadingPass::ComputeValueKnow
|
||||
}
|
||||
|
||||
// Handle Cast instructions. Only see through Cast when the source operand is
|
||||
- // PHI or Cmp and the source type is i1 to save the compilation time.
|
||||
+ // PHI or Cmp to save the compilation time.
|
||||
if (CastInst *CI = dyn_cast<CastInst>(I)) {
|
||||
Value *Source = CI->getOperand(0);
|
||||
- if (!Source->getType()->isIntegerTy(1))
|
||||
- return false;
|
||||
if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
|
||||
return false;
|
||||
ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
|
||||
Index: llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll
|
||||
===================================================================
|
||||
--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/JumpThreading/basic.ll
|
||||
+++ llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll
|
||||
@@ -547,6 +547,34 @@ l5:
|
||||
; CHECK: }
|
||||
}
|
||||
|
||||
+define i1 @trunc_switch(i1 %arg) {
|
||||
+; CHECK-LABEL: @trunc_switch
|
||||
+top:
|
||||
+; CHECK: br i1 %arg, label %exitA, label %exitB
|
||||
+ br i1 %arg, label %common, label %B
|
||||
+
|
||||
+B:
|
||||
+ br label %common
|
||||
+
|
||||
+common:
|
||||
+ %phi = phi i8 [ 2, %B ], [ 1, %top ]
|
||||
+ %trunc = trunc i8 %phi to i2
|
||||
+; CHECK-NOT: switch
|
||||
+ switch i2 %trunc, label %unreach [
|
||||
+ i2 1, label %exitA
|
||||
+ i2 -2, label %exitB
|
||||
+ ]
|
||||
+
|
||||
+unreach:
|
||||
+ unreachable
|
||||
+
|
||||
+exitA:
|
||||
+ ret i1 true
|
||||
+
|
||||
+exitB:
|
||||
+ ret i1 false
|
||||
+}
|
||||
+
|
||||
; CHECK-LABEL: define void @h_con(i32 %p) {
|
||||
define void @h_con(i32 %p) {
|
||||
%x = icmp ult i32 %p, 5
|
677
debian/patches/julia/llvm-D44892-Perf-integration.patch
vendored
Normal file
677
debian/patches/julia/llvm-D44892-Perf-integration.patch
vendored
Normal file
@ -0,0 +1,677 @@
|
||||
From 45bc0f0badbdbabaed7d204757c2aad7ab49a3fe Mon Sep 17 00:00:00 2001
|
||||
From: DokFaust <rodia@autistici.org>
|
||||
Date: Mon, 11 Jun 2018 12:59:42 +0200
|
||||
Subject: [PATCH] PerfJITEventListener integration, requires compile flag
|
||||
LLVM_USE_PERF
|
||||
|
||||
---
|
||||
CMakeLists.txt | 13 +
|
||||
include/llvm/Config/config.h.cmake | 3 +
|
||||
include/llvm/Config/llvm-config.h.cmake | 3 +
|
||||
.../llvm/ExecutionEngine/JITEventListener.h | 9 +
|
||||
lib/ExecutionEngine/CMakeLists.txt | 4 +
|
||||
lib/ExecutionEngine/LLVMBuild.txt | 2 +-
|
||||
lib/ExecutionEngine/Orc/LLVMBuild.txt | 2 +-
|
||||
.../PerfJITEvents/CMakeLists.txt | 5 +
|
||||
.../PerfJITEvents/LLVMBuild.txt | 23 +
|
||||
.../PerfJITEvents/PerfJITEventListener.cpp | 492 ++++++++++++++++++
|
||||
10 files changed, 554 insertions(+), 2 deletions(-)
|
||||
create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
|
||||
create mode 100644 lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
|
||||
create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
|
||||
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index f8da6cf9211..fb92c825a46 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -426,6 +426,16 @@ if( LLVM_USE_OPROFILE )
|
||||
endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
||||
endif( LLVM_USE_OPROFILE )
|
||||
|
||||
+option(LLVM_USE_PERF
|
||||
+ "Use perf JIT interface to inform perf about JIT code" OFF)
|
||||
+
|
||||
+# If enabled, verify we are on a platform that supports perf.
|
||||
+if( LLVM_USE_PERF )
|
||||
+ if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
||||
+ message(FATAL_ERROR "perf support is available on Linux only.")
|
||||
+ endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
||||
+endif( LLVM_USE_PERF )
|
||||
+
|
||||
set(LLVM_USE_SANITIZER "" CACHE STRING
|
||||
"Define the sanitizer used to build binaries and tests.")
|
||||
set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
|
||||
@@ -634,6 +644,9 @@ endif (LLVM_USE_INTEL_JITEVENTS)
|
||||
if (LLVM_USE_OPROFILE)
|
||||
set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT)
|
||||
endif (LLVM_USE_OPROFILE)
|
||||
+if (LLVM_USE_PERF)
|
||||
+ set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents)
|
||||
+endif (LLVM_USE_PERF)
|
||||
|
||||
message(STATUS "Constructing LLVMBuild project information")
|
||||
execute_process(
|
||||
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
|
||||
index 940f8420304..17787ed779b 100644
|
||||
--- a/include/llvm/Config/config.h.cmake
|
||||
+++ b/include/llvm/Config/config.h.cmake
|
||||
@@ -377,6 +377,9 @@
|
||||
/* Define if we have the oprofile JIT-support library */
|
||||
#cmakedefine01 LLVM_USE_OPROFILE
|
||||
|
||||
+/* Define if we have the perf JIT-support library */
|
||||
+#cmakedefine01 LLVM_USE_PERF
|
||||
+
|
||||
/* LLVM version information */
|
||||
#cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}"
|
||||
|
||||
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
|
||||
index 4daa00f3bc4..8d9c3b24d52 100644
|
||||
--- a/include/llvm/Config/llvm-config.h.cmake
|
||||
+++ b/include/llvm/Config/llvm-config.h.cmake
|
||||
@@ -65,6 +65,9 @@
|
||||
/* Define if we have the oprofile JIT-support library */
|
||||
#cmakedefine01 LLVM_USE_OPROFILE
|
||||
|
||||
+/* Define if we have the perf JIT-support library */
|
||||
+#cmakedefine01 LLVM_USE_PERF
|
||||
+
|
||||
/* Major version of the LLVM API */
|
||||
#define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
|
||||
|
||||
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
|
||||
index ff7840f00a4..1cc2c423a8b 100644
|
||||
--- a/include/llvm/ExecutionEngine/JITEventListener.h
|
||||
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
|
||||
@@ -115,6 +115,15 @@ public:
|
||||
}
|
||||
#endif // USE_OPROFILE
|
||||
|
||||
+#ifdef LLVM_USE_PERF
|
||||
+ static JITEventListener *createPerfJITEventListener();
|
||||
+#else
|
||||
+ static JITEventListener *createPerfJITEventListener()
|
||||
+ {
|
||||
+ return nullptr;
|
||||
+ }
|
||||
+#endif //USE_PERF
|
||||
+
|
||||
private:
|
||||
virtual void anchor();
|
||||
};
|
||||
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
|
||||
index 84b34919e44..893d113a685 100644
|
||||
--- a/lib/ExecutionEngine/CMakeLists.txt
|
||||
+++ b/lib/ExecutionEngine/CMakeLists.txt
|
||||
@@ -30,3 +30,7 @@ endif( LLVM_USE_OPROFILE )
|
||||
if( LLVM_USE_INTEL_JITEVENTS )
|
||||
add_subdirectory(IntelJITEvents)
|
||||
endif( LLVM_USE_INTEL_JITEVENTS )
|
||||
+
|
||||
+if( LLVM_USE_PERF )
|
||||
+ add_subdirectory(PerfJITEvents)
|
||||
+endif( LLVM_USE_PERF )
|
||||
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
|
||||
index 9d29a41f504..b6e1bda6a51 100644
|
||||
--- a/lib/ExecutionEngine/LLVMBuild.txt
|
||||
+++ b/lib/ExecutionEngine/LLVMBuild.txt
|
||||
@@ -16,7 +16,7 @@
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[common]
|
||||
-subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc
|
||||
+subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
diff --git a/lib/ExecutionEngine/Orc/LLVMBuild.txt b/lib/ExecutionEngine/Orc/LLVMBuild.txt
|
||||
index 8f05172e77a..ef4ae64e823 100644
|
||||
--- a/lib/ExecutionEngine/Orc/LLVMBuild.txt
|
||||
+++ b/lib/ExecutionEngine/Orc/LLVMBuild.txt
|
||||
@@ -19,4 +19,4 @@
|
||||
type = Library
|
||||
name = OrcJIT
|
||||
parent = ExecutionEngine
|
||||
-required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
|
||||
+required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
|
||||
diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
|
||||
new file mode 100644
|
||||
index 00000000000..136cc429d02
|
||||
--- /dev/null
|
||||
+++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
|
||||
@@ -0,0 +1,5 @@
|
||||
+add_llvm_library(LLVMPerfJITEvents
|
||||
+ PerfJITEventListener.cpp
|
||||
+ )
|
||||
+
|
||||
+add_dependencies(LLVMPerfJITEvents LLVMCodeGen)
|
||||
diff --git a/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
|
||||
new file mode 100644
|
||||
index 00000000000..b1958a69260
|
||||
--- /dev/null
|
||||
+++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
|
||||
@@ -0,0 +1,23 @@
|
||||
+;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===;
|
||||
+;
|
||||
+; The LLVM Compiler Infrastructure
|
||||
+;
|
||||
+; This file is distributed under the University of Illinois Open Source
|
||||
+; License. See LICENSE.TXT for details.
|
||||
+;
|
||||
+;===------------------------------------------------------------------------===;
|
||||
+;
|
||||
+; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
+;
|
||||
+; For more information on the LLVMBuild system, please see:
|
||||
+;
|
||||
+; http://llvm.org/docs/LLVMBuild.html
|
||||
+;
|
||||
+;===------------------------------------------------------------------------===;
|
||||
+
|
||||
+[component_0]
|
||||
+type = OptionalLibrary
|
||||
+name = PerfJITEvents
|
||||
+parent = ExecutionEngine
|
||||
+required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support TransformUtils
|
||||
+
|
||||
diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
|
||||
new file mode 100644
|
||||
index 00000000000..c2b97dd59f3
|
||||
--- /dev/null
|
||||
+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
|
||||
@@ -0,0 +1,492 @@
|
||||
+//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
|
||||
+//
|
||||
+// The LLVM Compiler Infrastructure
|
||||
+//
|
||||
+// This file is distributed under the University of Illinois Open Source
|
||||
+// License. See LICENSE.TXT for details.
|
||||
+//
|
||||
+//===----------------------------------------------------------------------===//
|
||||
+//
|
||||
+// This file defines a JITEventListener object that tells perf about JITted
|
||||
+// functions, including source line information.
|
||||
+//
|
||||
+// Documentation for perf jit integration is available at:
|
||||
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
|
||||
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
|
||||
+//
|
||||
+//===----------------------------------------------------------------------===//
|
||||
+
|
||||
+#include "llvm/ADT/Twine.h"
|
||||
+#include "llvm/Config/config.h"
|
||||
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
||||
+#include "llvm/ExecutionEngine/JITEventListener.h"
|
||||
+#include "llvm/Object/ObjectFile.h"
|
||||
+#include "llvm/Object/SymbolSize.h"
|
||||
+#include "llvm/Support/Debug.h"
|
||||
+#include "llvm/Support/Errno.h"
|
||||
+#include "llvm/Support/FileSystem.h"
|
||||
+#include "llvm/Support/MemoryBuffer.h"
|
||||
+#include "llvm/Support/Mutex.h"
|
||||
+#include "llvm/Support/MutexGuard.h"
|
||||
+#include "llvm/Support/Path.h"
|
||||
+#include "llvm/Support/Process.h"
|
||||
+#include "llvm/Support/Threading.h"
|
||||
+#include "llvm/Support/raw_ostream.h"
|
||||
+
|
||||
+#include <sys/mman.h> // mmap()
|
||||
+#include <sys/types.h> // getpid()
|
||||
+#include <time.h> // clock_gettime(), time(), localtime_r() */
|
||||
+#include <unistd.h> // for getpid(), read(), close()
|
||||
+
|
||||
+using namespace llvm;
|
||||
+using namespace llvm::object;
|
||||
+typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
|
||||
+
|
||||
+namespace {
|
||||
+
|
||||
+// language identifier (XXX: should we generate something better from debug
|
||||
+// info?)
|
||||
+#define JIT_LANG "llvm-IR"
|
||||
+#define LLVM_PERF_JIT_MAGIC \
|
||||
+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
|
||||
+ (uint32_t)'D')
|
||||
+#define LLVM_PERF_JIT_VERSION 1
|
||||
+
|
||||
+// bit 0: set if the jitdump file is using an architecture-specific timestamp
|
||||
+// clock source
|
||||
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
|
||||
+
|
||||
+struct LLVMPerfJitHeader;
|
||||
+
|
||||
+class PerfJITEventListener : public JITEventListener {
|
||||
+public:
|
||||
+ PerfJITEventListener();
|
||||
+ ~PerfJITEventListener() {
|
||||
+ if (MarkerAddr)
|
||||
+ CloseMarker();
|
||||
+ }
|
||||
+
|
||||
+ void NotifyObjectEmitted(const ObjectFile &Obj,
|
||||
+ const RuntimeDyld::LoadedObjectInfo &L) override;
|
||||
+ void NotifyFreeingObject(const ObjectFile &Obj) override;
|
||||
+
|
||||
+private:
|
||||
+ bool InitDebuggingDir();
|
||||
+ bool OpenMarker();
|
||||
+ void CloseMarker();
|
||||
+ static bool FillMachine(LLVMPerfJitHeader &hdr);
|
||||
+
|
||||
+ void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
|
||||
+ uint64_t CodeSize);
|
||||
+ void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
|
||||
+
|
||||
+ // cache lookups
|
||||
+ pid_t Pid;
|
||||
+
|
||||
+ // base directory for output data
|
||||
+ std::string JitPath;
|
||||
+
|
||||
+ // output data stream, closed via Dumpstream
|
||||
+ int DumpFd = -1;
|
||||
+
|
||||
+ // output data stream
|
||||
+ std::unique_ptr<raw_fd_ostream> Dumpstream;
|
||||
+
|
||||
+ // prevent concurrent dumps from messing up the output file
|
||||
+ sys::Mutex Mutex;
|
||||
+
|
||||
+ // perf mmap marker
|
||||
+ void *MarkerAddr = NULL;
|
||||
+
|
||||
+ // perf support ready
|
||||
+ bool SuccessfullyInitialized = false;
|
||||
+
|
||||
+ // identifier for functions, primarily to identify when moving them around
|
||||
+ uint64_t CodeGeneration = 1;
|
||||
+};
|
||||
+
|
||||
+// The following are POD struct definitions from the perf jit specification
|
||||
+
|
||||
+enum LLVMPerfJitRecordType {
|
||||
+ JIT_CODE_LOAD = 0,
|
||||
+ JIT_CODE_MOVE = 1, // not emitted, code isn't moved
|
||||
+ JIT_CODE_DEBUG_INFO = 2,
|
||||
+ JIT_CODE_CLOSE = 3, // not emitted, unnecessary
|
||||
+ JIT_CODE_UNWINDING_INFO = 4, // not emitted
|
||||
+
|
||||
+ JIT_CODE_MAX
|
||||
+};
|
||||
+
|
||||
+struct LLVMPerfJitHeader {
|
||||
+ uint32_t Magic; // characters "JiTD"
|
||||
+ uint32_t Version; // header version
|
||||
+ uint32_t TotalSize; // total size of header
|
||||
+ uint32_t ElfMach; // elf mach target
|
||||
+ uint32_t Pad1; // reserved
|
||||
+ uint32_t Pid;
|
||||
+ uint64_t Timestamp; // timestamp
|
||||
+ uint64_t Flags; // flags
|
||||
+};
|
||||
+
|
||||
+// record prefix (mandatory in each record)
|
||||
+struct LLVMPerfJitRecordPrefix {
|
||||
+ uint32_t Id; // record type identifier
|
||||
+ uint32_t TotalSize;
|
||||
+ uint64_t Timestamp;
|
||||
+};
|
||||
+
|
||||
+struct LLVMPerfJitRecordCodeLoad {
|
||||
+ LLVMPerfJitRecordPrefix Prefix;
|
||||
+
|
||||
+ uint32_t Pid;
|
||||
+ uint32_t Tid;
|
||||
+ uint64_t Vma;
|
||||
+ uint64_t CodeAddr;
|
||||
+ uint64_t CodeSize;
|
||||
+ uint64_t CodeIndex;
|
||||
+};
|
||||
+
|
||||
+struct LLVMPerfJitDebugEntry {
|
||||
+ uint64_t Addr;
|
||||
+ int Lineno; // source line number starting at 1
|
||||
+ int Discrim; // column discriminator, 0 is default
|
||||
+ // followed by null terminated filename, \xff\0 if same as previous entry
|
||||
+};
|
||||
+
|
||||
+struct LLVMPerfJitRecordDebugInfo {
|
||||
+ LLVMPerfJitRecordPrefix Prefix;
|
||||
+
|
||||
+ uint64_t CodeAddr;
|
||||
+ uint64_t NrEntry;
|
||||
+ // followed by NrEntry LLVMPerfJitDebugEntry records
|
||||
+};
|
||||
+
|
||||
+static inline uint64_t timespec_to_ns(const struct timespec *ts) {
|
||||
+ const uint64_t NanoSecPerSec = 1000000000;
|
||||
+ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
|
||||
+}
|
||||
+
|
||||
+static inline uint64_t perf_get_timestamp(void) {
|
||||
+ struct timespec ts;
|
||||
+ int ret;
|
||||
+
|
||||
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
+ if (ret)
|
||||
+ return 0;
|
||||
+
|
||||
+ return timespec_to_ns(&ts);
|
||||
+}
|
||||
+
|
||||
+PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
|
||||
+ // check if clock-source is supported
|
||||
+ if (!perf_get_timestamp()) {
|
||||
+ errs() << "kernel does not support CLOCK_MONOTONIC\n";
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (!InitDebuggingDir()) {
|
||||
+ errs() << "could not initialize debugging directory\n";
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ std::string Filename;
|
||||
+ raw_string_ostream FilenameBuf(Filename);
|
||||
+ FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
|
||||
+
|
||||
+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and
|
||||
+ // raw_fd_ostream doesn't expose the FD.
|
||||
+ using sys::fs::openFileForWrite;
|
||||
+ if (auto EC =
|
||||
+ openFileForWrite(FilenameBuf.str(), DumpFd, sys::fs::F_RW, 0666)) {
|
||||
+ errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
|
||||
+ << EC.message() << "\n";
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ Dumpstream = make_unique<raw_fd_ostream>(DumpFd, true);
|
||||
+
|
||||
+ LLVMPerfJitHeader Header = {0};
|
||||
+ if (!FillMachine(Header))
|
||||
+ return;
|
||||
+
|
||||
+ // signal this process emits JIT information
|
||||
+ if (!OpenMarker())
|
||||
+ return;
|
||||
+
|
||||
+ // emit dumpstream header
|
||||
+ Header.Magic = LLVM_PERF_JIT_MAGIC;
|
||||
+ Header.Version = LLVM_PERF_JIT_VERSION;
|
||||
+ Header.TotalSize = sizeof(Header);
|
||||
+ Header.Pid = Pid;
|
||||
+ Header.Timestamp = perf_get_timestamp();
|
||||
+ Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
|
||||
+
|
||||
+ // Everything initialized, can do profiling now.
|
||||
+ if (!Dumpstream->has_error())
|
||||
+ SuccessfullyInitialized = true;
|
||||
+}
|
||||
+
|
||||
+void PerfJITEventListener::NotifyObjectEmitted(
|
||||
+ const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) {
|
||||
+
|
||||
+ if (!SuccessfullyInitialized)
|
||||
+ return;
|
||||
+
|
||||
+ OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
|
||||
+ const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
|
||||
+
|
||||
+ // Get the address of the object image for use as a unique identifier
|
||||
+ std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
|
||||
+
|
||||
+ // Use symbol info to iterate over functions in the object.
|
||||
+ for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
|
||||
+ SymbolRef Sym = P.first;
|
||||
+ std::string SourceFileName;
|
||||
+
|
||||
+ Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
|
||||
+ if (!SymTypeOrErr) {
|
||||
+ // There's not much we can with errors here
|
||||
+ consumeError(SymTypeOrErr.takeError());
|
||||
+ continue;
|
||||
+ }
|
||||
+ SymbolRef::Type SymType = *SymTypeOrErr;
|
||||
+ if (SymType != SymbolRef::ST_Function)
|
||||
+ continue;
|
||||
+
|
||||
+ Expected<StringRef> Name = Sym.getName();
|
||||
+ if (!Name) {
|
||||
+ consumeError(Name.takeError());
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ Expected<uint64_t> AddrOrErr = Sym.getAddress();
|
||||
+ if (!AddrOrErr) {
|
||||
+ consumeError(AddrOrErr.takeError());
|
||||
+ continue;
|
||||
+ }
|
||||
+ uint64_t Addr = *AddrOrErr;
|
||||
+ uint64_t Size = P.second;
|
||||
+
|
||||
+ // According to spec debugging info has to come before loading the
|
||||
+ // corresonding code load.
|
||||
+ DILineInfoTable Lines = Context->getLineInfoForAddressRange(
|
||||
+ Addr, Size, FileLineInfoKind::AbsoluteFilePath);
|
||||
+
|
||||
+ NotifyDebug(Addr, Lines);
|
||||
+ NotifyCode(Name, Addr, Size);
|
||||
+ }
|
||||
+
|
||||
+ Dumpstream->flush();
|
||||
+}
|
||||
+
|
||||
+void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
|
||||
+ // perf currently doesn't have an interface for unloading. But munmap()ing the
|
||||
+ // code section does, so that's ok.
|
||||
+}
|
||||
+
|
||||
+bool PerfJITEventListener::InitDebuggingDir() {
|
||||
+ time_t Time;
|
||||
+ struct tm LocalTime;
|
||||
+ char TimeBuffer[sizeof("YYYYMMDD")];
|
||||
+ SmallString<64> Path;
|
||||
+
|
||||
+ // search for location to dump data to
|
||||
+ if (const char *BaseDir = getenv("JITDUMPDIR"))
|
||||
+ Path.append(BaseDir);
|
||||
+ else if (!sys::path::home_directory(Path))
|
||||
+ Path = ".";
|
||||
+
|
||||
+ // create debug directory
|
||||
+ Path += "/.debug/jit/";
|
||||
+ if (auto EC = sys::fs::create_directories(Path)) {
|
||||
+ errs() << "could not create jit cache directory " << Path << ": "
|
||||
+ << EC.message() << "\n";
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ // create unique directory for dump data related to this process
|
||||
+ time(&Time);
|
||||
+ localtime_r(&Time, &LocalTime);
|
||||
+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
|
||||
+ Path += JIT_LANG "-jit-";
|
||||
+ Path += TimeBuffer;
|
||||
+
|
||||
+ SmallString<128> UniqueDebugDir;
|
||||
+
|
||||
+ using sys::fs::createUniqueDirectory;
|
||||
+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
|
||||
+ errs() << "could not create unique jit cache directory " << UniqueDebugDir
|
||||
+ << ": " << EC.message() << "\n";
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ JitPath = UniqueDebugDir.str();
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+bool PerfJITEventListener::OpenMarker() {
|
||||
+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
|
||||
+ // is captured either live (perf record running when we mmap) or in deferred
|
||||
+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
|
||||
+ // file for more meta data info about the jitted code. Perf report/annotate
|
||||
+ // detect this special filename and process the jitdump file.
|
||||
+ //
|
||||
+ // Mapping must be PROT_EXEC to ensure it is captured by perf record
|
||||
+ // even when not using -d option.
|
||||
+ MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC,
|
||||
+ MAP_PRIVATE, DumpFd, 0);
|
||||
+
|
||||
+ if (MarkerAddr == MAP_FAILED) {
|
||||
+ errs() << "could not mmap JIT marker\n";
|
||||
+ return false;
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+void PerfJITEventListener::CloseMarker() {
|
||||
+ if (!MarkerAddr)
|
||||
+ return;
|
||||
+
|
||||
+ munmap(MarkerAddr, sys::Process::getPageSize());
|
||||
+ MarkerAddr = nullptr;
|
||||
+}
|
||||
+
|
||||
+bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
|
||||
+ char id[16];
|
||||
+ struct {
|
||||
+ uint16_t e_type;
|
||||
+ uint16_t e_machine;
|
||||
+ } info;
|
||||
+
|
||||
+ size_t RequiredMemory = sizeof(id) + sizeof(info);
|
||||
+
|
||||
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
||||
+ MemoryBuffer::getFileSlice("/proc/self/exe",
|
||||
+ RequiredMemory,
|
||||
+ 0);
|
||||
+
|
||||
+ // This'll not guarantee that enough data was actually read from the
|
||||
+ // underlying file. Instead the trailing part of the buffer would be
|
||||
+ // zeroed. Given the ELF signature check below that seems ok though,
|
||||
+ // it's unlikely that the file ends just after that, and the
|
||||
+ // consequence would just be that perf wouldn't recognize the
|
||||
+ // signature.
|
||||
+ if (auto EC = MB.getError()) {
|
||||
+ errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
|
||||
+ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
|
||||
+
|
||||
+ // check ELF signature
|
||||
+ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
|
||||
+ errs() << "invalid elf signature\n";
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ hdr.ElfMach = info.e_machine;
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
|
||||
+ uint64_t CodeAddr, uint64_t CodeSize) {
|
||||
+ assert(SuccessfullyInitialized);
|
||||
+
|
||||
+ // 0 length functions can't have samples.
|
||||
+ if (CodeSize == 0)
|
||||
+ return;
|
||||
+
|
||||
+ LLVMPerfJitRecordCodeLoad rec;
|
||||
+ rec.Prefix.Id = JIT_CODE_LOAD;
|
||||
+ rec.Prefix.TotalSize = sizeof(rec) + // debug record itself
|
||||
+ Symbol->size() + 1 + // symbol name
|
||||
+ CodeSize; // and code
|
||||
+ rec.Prefix.Timestamp = perf_get_timestamp();
|
||||
+
|
||||
+ rec.CodeSize = CodeSize;
|
||||
+ rec.Vma = 0;
|
||||
+ rec.CodeAddr = CodeAddr;
|
||||
+ rec.Pid = Pid;
|
||||
+ rec.Tid = get_threadid();
|
||||
+
|
||||
+ // avoid interspersing output
|
||||
+ MutexGuard Guard(Mutex);
|
||||
+
|
||||
+ rec.CodeIndex = CodeGeneration++; // under lock!
|
||||
+
|
||||
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
|
||||
+ Dumpstream->write(Symbol->data(), Symbol->size() + 1);
|
||||
+ Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
|
||||
+}
|
||||
+
|
||||
+void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
|
||||
+ DILineInfoTable Lines) {
|
||||
+ assert(SuccessfullyInitialized);
|
||||
+
|
||||
+ // Didn't get useful debug info.
|
||||
+ if (Lines.empty())
|
||||
+ return;
|
||||
+
|
||||
+ LLVMPerfJitRecordDebugInfo rec;
|
||||
+ rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
|
||||
+ rec.Prefix.TotalSize = sizeof(rec); // will be increased further
|
||||
+ rec.Prefix.Timestamp = perf_get_timestamp();
|
||||
+ rec.CodeAddr = CodeAddr;
|
||||
+ rec.NrEntry = Lines.size();
|
||||
+
|
||||
+ // compute total size size of record (variable due to filenames)
|
||||
+ DILineInfoTable::iterator Begin = Lines.begin();
|
||||
+ DILineInfoTable::iterator End = Lines.end();
|
||||
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
|
||||
+ DILineInfo &line = It->second;
|
||||
+ rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
|
||||
+ rec.Prefix.TotalSize += line.FileName.size() + 1;
|
||||
+ }
|
||||
+
|
||||
+ // The debug_entry describes the source line information. It is defined as
|
||||
+ // follows in order:
|
||||
+ // * uint64_t code_addr: address of function for which the debug information
|
||||
+ // is generated
|
||||
+ // * uint32_t line : source file line number (starting at 1)
|
||||
+ // * uint32_t discrim : column discriminator, 0 is default
|
||||
+ // * char name[n] : source file name in ASCII, including null termination
|
||||
+
|
||||
+ // avoid interspersing output
|
||||
+ MutexGuard Guard(Mutex);
|
||||
+
|
||||
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
|
||||
+
|
||||
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
|
||||
+ LLVMPerfJitDebugEntry LineInfo;
|
||||
+ DILineInfo &Line = It->second;
|
||||
+
|
||||
+ LineInfo.Addr = It->first;
|
||||
+ // The function re-created by perf is preceded by a elf
|
||||
+ // header. Need to adjust for that, otherwise the results are
|
||||
+ // wrong.
|
||||
+ LineInfo.Addr += 0x40;
|
||||
+ LineInfo.Lineno = Line.Line;
|
||||
+ LineInfo.Discrim = Line.Discriminator;
|
||||
+
|
||||
+ Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
|
||||
+ sizeof(LineInfo));
|
||||
+ Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+// There should be only a single event listener per process, otherwise perf gets
|
||||
+// confused.
|
||||
+llvm::ManagedStatic<PerfJITEventListener> PerfListener;
|
||||
+
|
||||
+} // end anonymous namespace
|
||||
+
|
||||
+namespace llvm {
|
||||
+JITEventListener *JITEventListener::createPerfJITEventListener() {
|
||||
+ return &*PerfListener;
|
||||
+}
|
||||
+
|
||||
+} // namespace llvm
|
||||
+
|
||||
--
|
||||
2.17.1
|
||||
|
89
debian/patches/julia/llvm-D50010-VNCoercion-ni.patch
vendored
Normal file
89
debian/patches/julia/llvm-D50010-VNCoercion-ni.patch
vendored
Normal file
@ -0,0 +1,89 @@
|
||||
commit 8eb2b102a203d83fb713f3bf79acf235dabdd8cd
|
||||
Author: Keno Fischer <keno@juliacomputing.com>
|
||||
Date: Mon Jul 30 16:59:08 2018 -0400
|
||||
|
||||
[VNCoercion] Disallow coercion between different ni addrspaces
|
||||
|
||||
Summary:
|
||||
I'm not sure if it would be legal by the IR reference to introduce
|
||||
an addrspacecast here, since the IR reference is a bit vague on
|
||||
the exact semantics, but at least for our usage of it (and I
|
||||
suspect for many other's usage) it is not. For us, addrspacecasts
|
||||
between non-integral address spaces carry frontend information that the
|
||||
optimizer cannot deduce afterwards in a generic way (though we
|
||||
have frontend specific passes in our pipline that do propagate
|
||||
these). In any case, I'm sure nobody is using it this way at
|
||||
the moment, since it would have introduced inttoptrs, which
|
||||
are definitely illegal.
|
||||
|
||||
Fixes PR38375
|
||||
|
||||
Reviewers: sanjoy, reames, dberlin
|
||||
|
||||
Subscribers: llvm-commits
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D50010
|
||||
|
||||
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
|
||||
index c3feea6a0a4..735d1e7b792 100644
|
||||
--- a/lib/Transforms/Utils/VNCoercion.cpp
|
||||
+++ b/lib/Transforms/Utils/VNCoercion.cpp
|
||||
@@ -20,14 +20,21 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
|
||||
StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
|
||||
return false;
|
||||
|
||||
+ Type *StoredValTy = StoredVal->getType();
|
||||
+
|
||||
// The store has to be at least as big as the load.
|
||||
if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
|
||||
return false;
|
||||
|
||||
- // Don't coerce non-integral pointers to integers or vice versa.
|
||||
- if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
|
||||
- DL.isNonIntegralPointerType(LoadTy))
|
||||
+ bool StoredNI = DL.isNonIntegralPointerType(StoredValTy);
|
||||
+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy);
|
||||
+ if (StoredNI != LoadNI) {
|
||||
return false;
|
||||
+ } else if (StoredNI && LoadNI &&
|
||||
+ cast<PointerType>(StoredValTy)->getAddressSpace() !=
|
||||
+ cast<PointerType>(LoadTy)->getAddressSpace()) {
|
||||
+ return false;
|
||||
+ }
|
||||
|
||||
return true;
|
||||
}
|
||||
diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
|
||||
index 9ae4132231d..5217fc1a06a 100644
|
||||
--- a/test/Transforms/GVN/non-integral-pointers.ll
|
||||
+++ b/test/Transforms/GVN/non-integral-pointers.ll
|
||||
@@ -1,6 +1,6 @@
|
||||
; RUN: opt -gvn -S < %s | FileCheck %s
|
||||
|
||||
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
|
||||
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
|
||||
@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
|
||||
alwaysTaken:
|
||||
ret i64 42
|
||||
}
|
||||
+
|
||||
+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
|
||||
+ ; CHECK-LABEL: @multini(
|
||||
+ ; CHECK-NOT: inttoptr
|
||||
+ ; CHECK-NOT: ptrtoint
|
||||
+ ; CHECK-NOT: addrspacecast
|
||||
+ entry:
|
||||
+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
|
||||
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
|
||||
+
|
||||
+ neverTaken:
|
||||
+ %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
|
||||
+ %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
|
||||
+ ret i8 addrspace(5)* %differentas
|
||||
+
|
||||
+ alwaysTaken:
|
||||
+ ret i8 addrspace(5)* null
|
||||
+ }
|
1143
debian/patches/julia/llvm-D50167-scev-umin.patch
vendored
Normal file
1143
debian/patches/julia/llvm-D50167-scev-umin.patch
vendored
Normal file
File diff suppressed because it is too large
Load Diff
26
debian/patches/julia/llvm-PPC-addrspaces.patch
vendored
Normal file
26
debian/patches/julia/llvm-PPC-addrspaces.patch
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
From 15899eaab58e96bb7bbe7a14099674e255656a50 Mon Sep 17 00:00:00 2001
|
||||
From: Valentin Churavy <v.churavy@gmail.com>
|
||||
Date: Fri, 23 Feb 2018 14:41:20 -0500
|
||||
Subject: [PATCH] Make AddrSpaceCast noops on PPC
|
||||
|
||||
PPC as AArch64 doesn't have address-spaces so we can drop them in the backend
|
||||
---
|
||||
lib/Target/PowerPC/PPCISelLowering.h | 5 +++++
|
||||
1 file changed, 5 insertions(+)
|
||||
|
||||
Index: llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h
|
||||
===================================================================
|
||||
--- llvm-toolchain-6.0-6.0.1.orig/lib/Target/PowerPC/PPCISelLowering.h
|
||||
+++ llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h
|
||||
@@ -889,6 +889,11 @@ namespace llvm {
|
||||
return true;
|
||||
}
|
||||
|
||||
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
|
||||
+ // Addrspacecasts are always noops.
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
|
||||
SelectionDAG &DAG,
|
||||
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
|
301
debian/patches/julia/llvm-rL326967-aligned-load.patch
vendored
Normal file
301
debian/patches/julia/llvm-rL326967-aligned-load.patch
vendored
Normal file
@ -0,0 +1,301 @@
|
||||
commit b398d8e1fa5a5a914957fa22d0a64db97f6c265e
|
||||
Author: Craig Topper <craig.topper@intel.com>
|
||||
Date: Thu Mar 8 00:21:17 2018 +0000
|
||||
|
||||
[X86] Fix some isel patterns that used aligned vector load instructions with unaligned predicates.
|
||||
|
||||
These patterns weren't checking the alignment of the load, but were using the aligned instructions. This will cause a GP fault if the data isn't aligned.
|
||||
|
||||
I believe these were introduced in r312450.
|
||||
|
||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326967 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||
|
||||
diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td
|
||||
index db3dfe56531..50c7763a2c3 100644
|
||||
--- a/lib/Target/X86/X86InstrVecCompiler.td
|
||||
+++ b/lib/Target/X86/X86InstrVecCompiler.td
|
||||
@@ -261,10 +261,10 @@ let Predicates = [HasVLX] in {
|
||||
// will zero the upper bits.
|
||||
// TODO: Is there a safe way to detect whether the producing instruction
|
||||
// already zeroed the upper bits?
|
||||
-multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
|
||||
- ValueType DstTy, ValueType SrcTy,
|
||||
- ValueType ZeroTy, PatFrag memop,
|
||||
- SubRegIndex SubIdx> {
|
||||
+multiclass subvector_zero_lowering<string MoveStr, string LoadStr,
|
||||
+ RegisterClass RC, ValueType DstTy,
|
||||
+ ValueType SrcTy, ValueType ZeroTy,
|
||||
+ PatFrag memop, SubRegIndex SubIdx> {
|
||||
def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
|
||||
(SrcTy RC:$src), (iPTR 0))),
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
@@ -274,91 +274,91 @@ multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
|
||||
(SrcTy (bitconvert (memop addr:$src))),
|
||||
(iPTR 0))),
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
- (!cast<Instruction>("VMOV"#MoveStr#"rm") addr:$src), SubIdx)>;
|
||||
+ (!cast<Instruction>("VMOV"#LoadStr#"rm") addr:$src), SubIdx)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
- defm : subvector_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, loadv2f64,
|
||||
- sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, loadv4f32,
|
||||
- sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, loadv2i64,
|
||||
- sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, loadv2i64,
|
||||
- sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, loadv2i64,
|
||||
- sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, loadv2i64,
|
||||
- sub_xmm>;
|
||||
-}
|
||||
-
|
||||
-let Predicates = [HasVLX] in {
|
||||
- defm : subvector_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32,
|
||||
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v4f64, v2f64, v8i32,
|
||||
loadv2f64, sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32,
|
||||
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v8f32, v4f32, v8i32,
|
||||
loadv4f32, sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32,
|
||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v4i64, v2i64, v8i32,
|
||||
loadv2i64, sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32,
|
||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i32, v4i32, v8i32,
|
||||
loadv2i64, sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32,
|
||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i16, v8i16, v8i32,
|
||||
loadv2i64, sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32,
|
||||
- loadv2i64, sub_xmm>;
|
||||
-
|
||||
- defm : subvector_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32,
|
||||
- loadv2f64, sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32,
|
||||
- loadv4f32, sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32,
|
||||
- loadv2i64, sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32,
|
||||
- loadv2i64, sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32,
|
||||
- loadv2i64, sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32,
|
||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i8, v16i8, v8i32,
|
||||
loadv2i64, sub_xmm>;
|
||||
+}
|
||||
|
||||
- defm : subvector_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32,
|
||||
- loadv4f64, sub_ymm>;
|
||||
- defm : subvector_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32,
|
||||
- loadv8f32, sub_ymm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32,
|
||||
- loadv4i64, sub_ymm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32,
|
||||
- loadv4i64, sub_ymm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32,
|
||||
- loadv4i64, sub_ymm>;
|
||||
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32,
|
||||
- loadv4i64, sub_ymm>;
|
||||
+let Predicates = [HasVLX] in {
|
||||
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v4f64,
|
||||
+ v2f64, v8i32, loadv2f64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v8f32,
|
||||
+ v4f32, v8i32, loadv4f32, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v4i64,
|
||||
+ v2i64, v8i32, loadv2i64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i32,
|
||||
+ v4i32, v8i32, loadv2i64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i16,
|
||||
+ v8i16, v8i32, loadv2i64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i8,
|
||||
+ v16i8, v8i32, loadv2i64, sub_xmm>;
|
||||
+
|
||||
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v8f64,
|
||||
+ v2f64, v16i32, loadv2f64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v16f32,
|
||||
+ v4f32, v16i32, loadv4f32, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i64,
|
||||
+ v2i64, v16i32, loadv2i64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i32,
|
||||
+ v4i32, v16i32, loadv2i64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i16,
|
||||
+ v8i16, v16i32, loadv2i64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v64i8,
|
||||
+ v16i8, v16i32, loadv2i64, sub_xmm>;
|
||||
+
|
||||
+ defm : subvector_zero_lowering<"APDZ256", "UPDZ256", VR256X, v8f64,
|
||||
+ v4f64, v16i32, loadv4f64, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"APSZ256", "UPDZ256", VR256X, v16f32,
|
||||
+ v8f32, v16i32, loadv8f32, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v8i64,
|
||||
+ v4i64, v16i32, loadv4i64, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v16i32,
|
||||
+ v8i32, v16i32, loadv4i64, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v32i16,
|
||||
+ v16i16, v16i32, loadv4i64, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v64i8,
|
||||
+ v32i8, v16i32, loadv4i64, sub_ymm>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
- defm : subvector_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, loadv2f64,
|
||||
- sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, loadv4f32,
|
||||
- sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, loadv2i64,
|
||||
- sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, loadv2i64,
|
||||
- sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, loadv2i64,
|
||||
- sub_xmm>;
|
||||
- defm : subvector_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, loadv2i64,
|
||||
- sub_xmm>;
|
||||
-
|
||||
- defm : subvector_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32,
|
||||
- loadv4f64, sub_ymm>;
|
||||
- defm : subvector_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32,
|
||||
- loadv8f32, sub_ymm>;
|
||||
- defm : subvector_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32,
|
||||
- loadv4i64, sub_ymm>;
|
||||
- defm : subvector_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32,
|
||||
- loadv4i64, sub_ymm>;
|
||||
- defm : subvector_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32,
|
||||
- loadv4i64, sub_ymm>;
|
||||
- defm : subvector_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32,
|
||||
- loadv4i64, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v8f64, v2f64,
|
||||
+ v16i32,loadv2f64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v16f32, v4f32,
|
||||
+ v16i32, loadv4f32, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i64, v2i64,
|
||||
+ v16i32, loadv2i64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i32, v4i32,
|
||||
+ v16i32, loadv2i64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i16, v8i16,
|
||||
+ v16i32, loadv2i64, sub_xmm>;
|
||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v64i8, v16i8,
|
||||
+ v16i32, loadv2i64, sub_xmm>;
|
||||
+
|
||||
+ defm : subvector_zero_lowering<"APDY", "UPDY", VR256, v8f64, v4f64,
|
||||
+ v16i32, loadv4f64, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"APSY", "UPSY", VR256, v16f32, v8f32,
|
||||
+ v16i32, loadv8f32, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v8i64, v4i64,
|
||||
+ v16i32, loadv4i64, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v16i32, v8i32,
|
||||
+ v16i32, loadv4i64, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v32i16, v16i16,
|
||||
+ v16i32, loadv4i64, sub_ymm>;
|
||||
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v64i8, v32i8,
|
||||
+ v16i32, loadv4i64, sub_ymm>;
|
||||
}
|
||||
|
||||
// List of opcodes that guaranteed to zero the upper elements of vector regs.
|
||||
diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll
|
||||
index 6ecd8116443..0f2cf594b1c 100644
|
||||
--- a/test/CodeGen/X86/merge-consecutive-loads-256.ll
|
||||
+++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll
|
||||
@@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi
|
||||
define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX-LABEL: merge_4f64_2f64_2z:
|
||||
; AVX: # %bb.0:
|
||||
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
|
||||
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-AVX-LABEL: merge_4f64_2f64_2z:
|
||||
; X32-AVX: # %bb.0:
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
|
||||
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
|
||||
; X32-AVX-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
|
||||
%val0 = load <2 x double>, <2 x double>* %ptr0
|
||||
@@ -109,13 +109,13 @@ define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline
|
||||
define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX-LABEL: merge_4f64_f64_45zz:
|
||||
; AVX: # %bb.0:
|
||||
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
|
||||
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-AVX-LABEL: merge_4f64_f64_45zz:
|
||||
; X32-AVX: # %bb.0:
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
|
||||
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
|
||||
; X32-AVX-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds double, double* %ptr, i64 4
|
||||
%ptr1 = getelementptr inbounds double, double* %ptr, i64 5
|
||||
@@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline
|
||||
define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX-LABEL: merge_4i64_2i64_3z:
|
||||
; AVX: # %bb.0:
|
||||
-; AVX-NEXT: vmovaps 48(%rdi), %xmm0
|
||||
+; AVX-NEXT: vmovups 48(%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-AVX-LABEL: merge_4i64_2i64_3z:
|
||||
; X32-AVX: # %bb.0:
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
-; X32-AVX-NEXT: vmovaps 48(%eax), %xmm0
|
||||
+; X32-AVX-NEXT: vmovups 48(%eax), %xmm0
|
||||
; X32-AVX-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3
|
||||
%val0 = load <2 x i64>, <2 x i64>* %ptr0
|
||||
@@ -217,13 +217,13 @@ define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp {
|
||||
define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp {
|
||||
; AVX-LABEL: merge_4i64_i64_23zz:
|
||||
; AVX: # %bb.0:
|
||||
-; AVX-NEXT: vmovaps 16(%rdi), %xmm0
|
||||
+; AVX-NEXT: vmovups 16(%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-AVX-LABEL: merge_4i64_i64_23zz:
|
||||
; X32-AVX: # %bb.0:
|
||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
-; X32-AVX-NEXT: vmovaps 16(%eax), %xmm0
|
||||
+; X32-AVX-NEXT: vmovups 16(%eax), %xmm0
|
||||
; X32-AVX-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2
|
||||
%ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3
|
||||
diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll
|
||||
index 62102eb382c..3c6eaf65292 100644
|
||||
--- a/test/CodeGen/X86/merge-consecutive-loads-512.ll
|
||||
+++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll
|
||||
@@ -106,13 +106,13 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin
|
||||
define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
|
||||
; ALL-LABEL: merge_8f64_f64_12zzuuzz:
|
||||
; ALL: # %bb.0:
|
||||
-; ALL-NEXT: vmovaps 8(%rdi), %xmm0
|
||||
+; ALL-NEXT: vmovups 8(%rdi), %xmm0
|
||||
; ALL-NEXT: retq
|
||||
;
|
||||
; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
|
||||
; X32-AVX512F: # %bb.0:
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
-; X32-AVX512F-NEXT: vmovaps 8(%eax), %xmm0
|
||||
+; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0
|
||||
; X32-AVX512F-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
|
||||
%ptr1 = getelementptr inbounds double, double* %ptr, i64 2
|
||||
@@ -190,7 +190,7 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline
|
||||
define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
|
||||
; ALL-LABEL: merge_8i64_i64_56zz9uzz:
|
||||
; ALL: # %bb.0:
|
||||
-; ALL-NEXT: vmovaps 40(%rdi), %xmm0
|
||||
+; ALL-NEXT: vmovups 40(%rdi), %xmm0
|
||||
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
@@ -198,7 +198,7 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s
|
||||
; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
|
||||
; X32-AVX512F: # %bb.0:
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
-; X32-AVX512F-NEXT: vmovaps 40(%eax), %xmm0
|
||||
+; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0
|
||||
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||||
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; X32-AVX512F-NEXT: retl
|
6131
debian/patches/julia/llvm-rL327898.patch
vendored
Normal file
6131
debian/patches/julia/llvm-rL327898.patch
vendored
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user