mirror of
https://git.proxmox.com/git/llvm-toolchain
synced 2025-06-14 20:00:43 +00:00
remove julia patches
This commit is contained in:
parent
3a82f68a05
commit
001fee1fb8
@ -1,72 +0,0 @@
|
|||||||
From 6e7b660ee185445640110c80d80aafd436682fca Mon Sep 17 00:00:00 2001
|
|
||||||
From: Yichao Yu <yyc1992@gmail.com>
|
|
||||||
Date: Fri, 9 Dec 2016 15:59:46 -0500
|
|
||||||
Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
|
|
||||||
|
|
||||||
---
|
|
||||||
lib/MC/MCObjectFileInfo.cpp | 2 ++
|
|
||||||
.../AArch64/ELF_ARM64_BE-large-relocations.s | 18 ++++++++++++++++++
|
|
||||||
.../RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s | 18 ++++++++++++++++++
|
|
||||||
3 files changed, 38 insertions(+)
|
|
||||||
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
|
|
||||||
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
|
||||||
|
|
||||||
Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
|
|
||||||
===================================================================
|
|
||||||
--- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp
|
|
||||||
+++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
|
|
||||||
@@ -328,6 +328,8 @@ void MCObjectFileInfo::initELFMCObjectFi
|
|
||||||
dwarf::DW_EH_PE_sdata4
|
|
||||||
: dwarf::DW_EH_PE_absptr;
|
|
||||||
break;
|
|
||||||
+ case Triple::aarch64:
|
|
||||||
+ case Triple::aarch64_be:
|
|
||||||
case Triple::x86_64:
|
|
||||||
if (PositionIndependent) {
|
|
||||||
PersonalityEncoding =
|
|
||||||
Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
|
|
||||||
===================================================================
|
|
||||||
--- /dev/null
|
|
||||||
+++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
|
|
||||||
@@ -0,0 +1,18 @@
|
|
||||||
+# RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -code-model=large -filetype=obj -o %T/be-large-reloc.o %s
|
|
||||||
+# RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o
|
|
||||||
+
|
|
||||||
+ .text
|
|
||||||
+ .globl g
|
|
||||||
+ .p2align 2
|
|
||||||
+ .type g,@function
|
|
||||||
+g:
|
|
||||||
+ .cfi_startproc
|
|
||||||
+ mov x0, xzr
|
|
||||||
+ ret
|
|
||||||
+ .Lfunc_end0:
|
|
||||||
+ .size g, .Lfunc_end0-g
|
|
||||||
+ .cfi_endproc
|
|
||||||
+
|
|
||||||
+# Skip the CIE and load the 8 bytes PC begin pointer.
|
|
||||||
+# Assuming the CIE and the FDE length are both 4 bytes.
|
|
||||||
+# rtdyld-check: *{8}(section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc)
|
|
||||||
Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
|
||||||
===================================================================
|
|
||||||
--- /dev/null
|
|
||||||
+++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
|
||||||
@@ -0,0 +1,18 @@
|
|
||||||
+# RUN: llvm-mc -triple=arm64-none-linux-gnu -code-model=large -filetype=obj -o %T/large-reloc.o %s
|
|
||||||
+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o
|
|
||||||
+
|
|
||||||
+ .text
|
|
||||||
+ .globl g
|
|
||||||
+ .p2align 2
|
|
||||||
+ .type g,@function
|
|
||||||
+g:
|
|
||||||
+ .cfi_startproc
|
|
||||||
+ mov x0, xzr
|
|
||||||
+ ret
|
|
||||||
+ .Lfunc_end0:
|
|
||||||
+ .size g, .Lfunc_end0-g
|
|
||||||
+ .cfi_endproc
|
|
||||||
+
|
|
||||||
+# Skip the CIE and load the 8 bytes PC begin pointer.
|
|
||||||
+# Assuming the CIE and the FDE length are both 4 bytes.
|
|
||||||
+# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc)
|
|
@ -1,24 +0,0 @@
|
|||||||
From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Valentin Churavy <v.churavy@gmail.com>
|
|
||||||
Date: Thu, 5 Jul 2018 12:37:50 -0400
|
|
||||||
Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
|
|
||||||
|
|
||||||
---
|
|
||||||
lib/MC/MCObjectFileInfo.cpp | 2 ++
|
|
||||||
.../AArch64/ELF_ARM64_large-relocations.s | 20 +++++++++++++++++++
|
|
||||||
2 files changed, 22 insertions(+)
|
|
||||||
create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
|
|
||||||
|
|
||||||
Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
|
|
||||||
===================================================================
|
|
||||||
--- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp
|
|
||||||
+++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
|
|
||||||
@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFi
|
|
||||||
break;
|
|
||||||
case Triple::ppc64:
|
|
||||||
case Triple::ppc64le:
|
|
||||||
+ case Triple::aarch64:
|
|
||||||
+ case Triple::aarch64_be:
|
|
||||||
case Triple::x86_64:
|
|
||||||
FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
|
|
||||||
(Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
|
|
@ -1,53 +0,0 @@
|
|||||||
From f94d12b6108b944199b715f31f25a022f75d2feb Mon Sep 17 00:00:00 2001
|
|
||||||
From: Yichao Yu <yyc1992@gmail.com>
|
|
||||||
Date: Sat, 10 Jun 2017 08:45:13 -0400
|
|
||||||
Subject: [PATCH 4/4] Enable support for floating-point division reductions
|
|
||||||
|
|
||||||
Similar to fsub, fdiv can also be vectorized using fmul.
|
|
||||||
---
|
|
||||||
lib/Transforms/Utils/LoopUtils.cpp | 1 +
|
|
||||||
test/Transforms/LoopVectorize/float-reduction.ll | 22 ++++++++++++++++++++++
|
|
||||||
2 files changed, 23 insertions(+)
|
|
||||||
|
|
||||||
Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp
|
|
||||||
===================================================================
|
|
||||||
--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Utils/LoopUtils.cpp
|
|
||||||
+++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp
|
|
||||||
@@ -513,6 +513,7 @@ RecurrenceDescriptor::isRecurrenceInstr(
|
|
||||||
return InstDesc(Kind == RK_IntegerOr, I);
|
|
||||||
case Instruction::Xor:
|
|
||||||
return InstDesc(Kind == RK_IntegerXor, I);
|
|
||||||
+ case Instruction::FDiv:
|
|
||||||
case Instruction::FMul:
|
|
||||||
return InstDesc(Kind == RK_FloatMult, I, UAI);
|
|
||||||
case Instruction::FSub:
|
|
||||||
Index: llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll
|
|
||||||
===================================================================
|
|
||||||
--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/LoopVectorize/float-reduction.ll
|
|
||||||
+++ llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll
|
|
||||||
@@ -44,3 +44,25 @@ for.body:
|
|
||||||
for.end: ; preds = %for.body
|
|
||||||
ret float %sub
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+;CHECK-LABEL: @foodiv(
|
|
||||||
+;CHECK: fdiv fast <4 x float>
|
|
||||||
+;CHECK: ret
|
|
||||||
+define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
|
|
||||||
+entry:
|
|
||||||
+ br label %for.body
|
|
||||||
+
|
|
||||||
+for.body: ; preds = %for.body, %entry
|
|
||||||
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
||||||
+ %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
|
|
||||||
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
|
|
||||||
+ %0 = load float, float* %arrayidx, align 4
|
|
||||||
+ %sub = fdiv fast float %sum.04, %0
|
|
||||||
+ %indvars.iv.next = add i64 %indvars.iv, 1
|
|
||||||
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
|
||||||
+ %exitcond = icmp eq i32 %lftr.wideiv, 200
|
|
||||||
+ br i1 %exitcond, label %for.end, label %for.body
|
|
||||||
+
|
|
||||||
+for.end: ; preds = %for.body
|
|
||||||
+ ret float %sub
|
|
||||||
+}
|
|
@ -1,82 +0,0 @@
|
|||||||
commit 6a311a7a804831fea43cfb2f61322adcb407a1af
|
|
||||||
Author: Keno Fischer <keno@juliacomputing.com>
|
|
||||||
Date: Thu Jan 18 15:57:05 2018 -0500
|
|
||||||
|
|
||||||
[JumpThreading] Don't restrict cast-traversal to i1
|
|
||||||
|
|
||||||
Summary:
|
|
||||||
In D17663, JumpThreading learned to look trough simple cast instructions,
|
|
||||||
but only if the source of those cast instructions was a phi/cmp i1
|
|
||||||
(in an effort to limit compile time effects). I think this condition
|
|
||||||
is too restrictive. For switches with limited value range, InstCombine
|
|
||||||
will readily introduce an extra `trunc` instruction to a smaller
|
|
||||||
integer type (e.g. from i8 to i2), leaving us in the somewhat perverse
|
|
||||||
situation that jump-threading would work before running instcombine,
|
|
||||||
but not after. Since instcombine produces this pattern, I think we
|
|
||||||
need to consider it canonical and support it in JumpThreading.
|
|
||||||
In general, for limiting recursion, I think the existing restriction
|
|
||||||
to phi and cmp nodes should be sufficient to avoid looking through
|
|
||||||
unprofitable chains of instructions.
|
|
||||||
|
|
||||||
Reviewers: haicheng, gberry, bmakam, mcrosier
|
|
||||||
|
|
||||||
Subscribers: llvm-commits
|
|
||||||
|
|
||||||
Differential Revision: https://reviews.llvm.org/D42262
|
|
||||||
|
|
||||||
Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp
|
|
||||||
===================================================================
|
|
||||||
--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Scalar/JumpThreading.cpp
|
|
||||||
+++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp
|
|
||||||
@@ -656,11 +656,9 @@ bool JumpThreadingPass::ComputeValueKnow
|
|
||||||
}
|
|
||||||
|
|
||||||
// Handle Cast instructions. Only see through Cast when the source operand is
|
|
||||||
- // PHI or Cmp and the source type is i1 to save the compilation time.
|
|
||||||
+ // PHI or Cmp to save the compilation time.
|
|
||||||
if (CastInst *CI = dyn_cast<CastInst>(I)) {
|
|
||||||
Value *Source = CI->getOperand(0);
|
|
||||||
- if (!Source->getType()->isIntegerTy(1))
|
|
||||||
- return false;
|
|
||||||
if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
|
|
||||||
return false;
|
|
||||||
ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
|
|
||||||
Index: llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll
|
|
||||||
===================================================================
|
|
||||||
--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/JumpThreading/basic.ll
|
|
||||||
+++ llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll
|
|
||||||
@@ -547,6 +547,34 @@ l5:
|
|
||||||
; CHECK: }
|
|
||||||
}
|
|
||||||
|
|
||||||
+define i1 @trunc_switch(i1 %arg) {
|
|
||||||
+; CHECK-LABEL: @trunc_switch
|
|
||||||
+top:
|
|
||||||
+; CHECK: br i1 %arg, label %exitA, label %exitB
|
|
||||||
+ br i1 %arg, label %common, label %B
|
|
||||||
+
|
|
||||||
+B:
|
|
||||||
+ br label %common
|
|
||||||
+
|
|
||||||
+common:
|
|
||||||
+ %phi = phi i8 [ 2, %B ], [ 1, %top ]
|
|
||||||
+ %trunc = trunc i8 %phi to i2
|
|
||||||
+; CHECK-NOT: switch
|
|
||||||
+ switch i2 %trunc, label %unreach [
|
|
||||||
+ i2 1, label %exitA
|
|
||||||
+ i2 -2, label %exitB
|
|
||||||
+ ]
|
|
||||||
+
|
|
||||||
+unreach:
|
|
||||||
+ unreachable
|
|
||||||
+
|
|
||||||
+exitA:
|
|
||||||
+ ret i1 true
|
|
||||||
+
|
|
||||||
+exitB:
|
|
||||||
+ ret i1 false
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
; CHECK-LABEL: define void @h_con(i32 %p) {
|
|
||||||
define void @h_con(i32 %p) {
|
|
||||||
%x = icmp ult i32 %p, 5
|
|
@ -1,677 +0,0 @@
|
|||||||
From 45bc0f0badbdbabaed7d204757c2aad7ab49a3fe Mon Sep 17 00:00:00 2001
|
|
||||||
From: DokFaust <rodia@autistici.org>
|
|
||||||
Date: Mon, 11 Jun 2018 12:59:42 +0200
|
|
||||||
Subject: [PATCH] PerfJITEventListener integration, requires compile flag
|
|
||||||
LLVM_USE_PERF
|
|
||||||
|
|
||||||
---
|
|
||||||
CMakeLists.txt | 13 +
|
|
||||||
include/llvm/Config/config.h.cmake | 3 +
|
|
||||||
include/llvm/Config/llvm-config.h.cmake | 3 +
|
|
||||||
.../llvm/ExecutionEngine/JITEventListener.h | 9 +
|
|
||||||
lib/ExecutionEngine/CMakeLists.txt | 4 +
|
|
||||||
lib/ExecutionEngine/LLVMBuild.txt | 2 +-
|
|
||||||
lib/ExecutionEngine/Orc/LLVMBuild.txt | 2 +-
|
|
||||||
.../PerfJITEvents/CMakeLists.txt | 5 +
|
|
||||||
.../PerfJITEvents/LLVMBuild.txt | 23 +
|
|
||||||
.../PerfJITEvents/PerfJITEventListener.cpp | 492 ++++++++++++++++++
|
|
||||||
10 files changed, 554 insertions(+), 2 deletions(-)
|
|
||||||
create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
|
|
||||||
create mode 100644 lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
|
|
||||||
create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
|
|
||||||
|
|
||||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
||||||
index f8da6cf9211..fb92c825a46 100644
|
|
||||||
--- a/CMakeLists.txt
|
|
||||||
+++ b/CMakeLists.txt
|
|
||||||
@@ -426,6 +426,16 @@ if( LLVM_USE_OPROFILE )
|
|
||||||
endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
|
||||||
endif( LLVM_USE_OPROFILE )
|
|
||||||
|
|
||||||
+option(LLVM_USE_PERF
|
|
||||||
+ "Use perf JIT interface to inform perf about JIT code" OFF)
|
|
||||||
+
|
|
||||||
+# If enabled, verify we are on a platform that supports perf.
|
|
||||||
+if( LLVM_USE_PERF )
|
|
||||||
+ if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
|
||||||
+ message(FATAL_ERROR "perf support is available on Linux only.")
|
|
||||||
+ endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
|
||||||
+endif( LLVM_USE_PERF )
|
|
||||||
+
|
|
||||||
set(LLVM_USE_SANITIZER "" CACHE STRING
|
|
||||||
"Define the sanitizer used to build binaries and tests.")
|
|
||||||
set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
|
|
||||||
@@ -634,6 +644,9 @@ endif (LLVM_USE_INTEL_JITEVENTS)
|
|
||||||
if (LLVM_USE_OPROFILE)
|
|
||||||
set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT)
|
|
||||||
endif (LLVM_USE_OPROFILE)
|
|
||||||
+if (LLVM_USE_PERF)
|
|
||||||
+ set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents)
|
|
||||||
+endif (LLVM_USE_PERF)
|
|
||||||
|
|
||||||
message(STATUS "Constructing LLVMBuild project information")
|
|
||||||
execute_process(
|
|
||||||
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
|
|
||||||
index 940f8420304..17787ed779b 100644
|
|
||||||
--- a/include/llvm/Config/config.h.cmake
|
|
||||||
+++ b/include/llvm/Config/config.h.cmake
|
|
||||||
@@ -377,6 +377,9 @@
|
|
||||||
/* Define if we have the oprofile JIT-support library */
|
|
||||||
#cmakedefine01 LLVM_USE_OPROFILE
|
|
||||||
|
|
||||||
+/* Define if we have the perf JIT-support library */
|
|
||||||
+#cmakedefine01 LLVM_USE_PERF
|
|
||||||
+
|
|
||||||
/* LLVM version information */
|
|
||||||
#cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}"
|
|
||||||
|
|
||||||
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
|
|
||||||
index 4daa00f3bc4..8d9c3b24d52 100644
|
|
||||||
--- a/include/llvm/Config/llvm-config.h.cmake
|
|
||||||
+++ b/include/llvm/Config/llvm-config.h.cmake
|
|
||||||
@@ -65,6 +65,9 @@
|
|
||||||
/* Define if we have the oprofile JIT-support library */
|
|
||||||
#cmakedefine01 LLVM_USE_OPROFILE
|
|
||||||
|
|
||||||
+/* Define if we have the perf JIT-support library */
|
|
||||||
+#cmakedefine01 LLVM_USE_PERF
|
|
||||||
+
|
|
||||||
/* Major version of the LLVM API */
|
|
||||||
#define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
|
|
||||||
|
|
||||||
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
|
|
||||||
index ff7840f00a4..1cc2c423a8b 100644
|
|
||||||
--- a/include/llvm/ExecutionEngine/JITEventListener.h
|
|
||||||
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
|
|
||||||
@@ -115,6 +115,15 @@ public:
|
|
||||||
}
|
|
||||||
#endif // USE_OPROFILE
|
|
||||||
|
|
||||||
+#ifdef LLVM_USE_PERF
|
|
||||||
+ static JITEventListener *createPerfJITEventListener();
|
|
||||||
+#else
|
|
||||||
+ static JITEventListener *createPerfJITEventListener()
|
|
||||||
+ {
|
|
||||||
+ return nullptr;
|
|
||||||
+ }
|
|
||||||
+#endif //USE_PERF
|
|
||||||
+
|
|
||||||
private:
|
|
||||||
virtual void anchor();
|
|
||||||
};
|
|
||||||
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
|
|
||||||
index 84b34919e44..893d113a685 100644
|
|
||||||
--- a/lib/ExecutionEngine/CMakeLists.txt
|
|
||||||
+++ b/lib/ExecutionEngine/CMakeLists.txt
|
|
||||||
@@ -30,3 +30,7 @@ endif( LLVM_USE_OPROFILE )
|
|
||||||
if( LLVM_USE_INTEL_JITEVENTS )
|
|
||||||
add_subdirectory(IntelJITEvents)
|
|
||||||
endif( LLVM_USE_INTEL_JITEVENTS )
|
|
||||||
+
|
|
||||||
+if( LLVM_USE_PERF )
|
|
||||||
+ add_subdirectory(PerfJITEvents)
|
|
||||||
+endif( LLVM_USE_PERF )
|
|
||||||
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
|
|
||||||
index 9d29a41f504..b6e1bda6a51 100644
|
|
||||||
--- a/lib/ExecutionEngine/LLVMBuild.txt
|
|
||||||
+++ b/lib/ExecutionEngine/LLVMBuild.txt
|
|
||||||
@@ -16,7 +16,7 @@
|
|
||||||
;===------------------------------------------------------------------------===;
|
|
||||||
|
|
||||||
[common]
|
|
||||||
-subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc
|
|
||||||
+subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents
|
|
||||||
|
|
||||||
[component_0]
|
|
||||||
type = Library
|
|
||||||
diff --git a/lib/ExecutionEngine/Orc/LLVMBuild.txt b/lib/ExecutionEngine/Orc/LLVMBuild.txt
|
|
||||||
index 8f05172e77a..ef4ae64e823 100644
|
|
||||||
--- a/lib/ExecutionEngine/Orc/LLVMBuild.txt
|
|
||||||
+++ b/lib/ExecutionEngine/Orc/LLVMBuild.txt
|
|
||||||
@@ -19,4 +19,4 @@
|
|
||||||
type = Library
|
|
||||||
name = OrcJIT
|
|
||||||
parent = ExecutionEngine
|
|
||||||
-required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
|
|
||||||
+required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
|
|
||||||
diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..136cc429d02
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
|
|
||||||
@@ -0,0 +1,5 @@
|
|
||||||
+add_llvm_library(LLVMPerfJITEvents
|
|
||||||
+ PerfJITEventListener.cpp
|
|
||||||
+ )
|
|
||||||
+
|
|
||||||
+add_dependencies(LLVMPerfJITEvents LLVMCodeGen)
|
|
||||||
diff --git a/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..b1958a69260
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
|
|
||||||
@@ -0,0 +1,23 @@
|
|
||||||
+;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===;
|
|
||||||
+;
|
|
||||||
+; The LLVM Compiler Infrastructure
|
|
||||||
+;
|
|
||||||
+; This file is distributed under the University of Illinois Open Source
|
|
||||||
+; License. See LICENSE.TXT for details.
|
|
||||||
+;
|
|
||||||
+;===------------------------------------------------------------------------===;
|
|
||||||
+;
|
|
||||||
+; This is an LLVMBuild description file for the components in this subdirectory.
|
|
||||||
+;
|
|
||||||
+; For more information on the LLVMBuild system, please see:
|
|
||||||
+;
|
|
||||||
+; http://llvm.org/docs/LLVMBuild.html
|
|
||||||
+;
|
|
||||||
+;===------------------------------------------------------------------------===;
|
|
||||||
+
|
|
||||||
+[component_0]
|
|
||||||
+type = OptionalLibrary
|
|
||||||
+name = PerfJITEvents
|
|
||||||
+parent = ExecutionEngine
|
|
||||||
+required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support TransformUtils
|
|
||||||
+
|
|
||||||
diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
|
|
||||||
new file mode 100644
|
|
||||||
index 00000000000..c2b97dd59f3
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
|
|
||||||
@@ -0,0 +1,492 @@
|
|
||||||
+//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
|
|
||||||
+//
|
|
||||||
+// The LLVM Compiler Infrastructure
|
|
||||||
+//
|
|
||||||
+// This file is distributed under the University of Illinois Open Source
|
|
||||||
+// License. See LICENSE.TXT for details.
|
|
||||||
+//
|
|
||||||
+//===----------------------------------------------------------------------===//
|
|
||||||
+//
|
|
||||||
+// This file defines a JITEventListener object that tells perf about JITted
|
|
||||||
+// functions, including source line information.
|
|
||||||
+//
|
|
||||||
+// Documentation for perf jit integration is available at:
|
|
||||||
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
|
|
||||||
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
|
|
||||||
+//
|
|
||||||
+//===----------------------------------------------------------------------===//
|
|
||||||
+
|
|
||||||
+#include "llvm/ADT/Twine.h"
|
|
||||||
+#include "llvm/Config/config.h"
|
|
||||||
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
|
|
||||||
+#include "llvm/ExecutionEngine/JITEventListener.h"
|
|
||||||
+#include "llvm/Object/ObjectFile.h"
|
|
||||||
+#include "llvm/Object/SymbolSize.h"
|
|
||||||
+#include "llvm/Support/Debug.h"
|
|
||||||
+#include "llvm/Support/Errno.h"
|
|
||||||
+#include "llvm/Support/FileSystem.h"
|
|
||||||
+#include "llvm/Support/MemoryBuffer.h"
|
|
||||||
+#include "llvm/Support/Mutex.h"
|
|
||||||
+#include "llvm/Support/MutexGuard.h"
|
|
||||||
+#include "llvm/Support/Path.h"
|
|
||||||
+#include "llvm/Support/Process.h"
|
|
||||||
+#include "llvm/Support/Threading.h"
|
|
||||||
+#include "llvm/Support/raw_ostream.h"
|
|
||||||
+
|
|
||||||
+#include <sys/mman.h> // mmap()
|
|
||||||
+#include <sys/types.h> // getpid()
|
|
||||||
+#include <time.h> // clock_gettime(), time(), localtime_r() */
|
|
||||||
+#include <unistd.h> // for getpid(), read(), close()
|
|
||||||
+
|
|
||||||
+using namespace llvm;
|
|
||||||
+using namespace llvm::object;
|
|
||||||
+typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
|
|
||||||
+
|
|
||||||
+namespace {
|
|
||||||
+
|
|
||||||
+// language identifier (XXX: should we generate something better from debug
|
|
||||||
+// info?)
|
|
||||||
+#define JIT_LANG "llvm-IR"
|
|
||||||
+#define LLVM_PERF_JIT_MAGIC \
|
|
||||||
+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
|
|
||||||
+ (uint32_t)'D')
|
|
||||||
+#define LLVM_PERF_JIT_VERSION 1
|
|
||||||
+
|
|
||||||
+// bit 0: set if the jitdump file is using an architecture-specific timestamp
|
|
||||||
+// clock source
|
|
||||||
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
|
|
||||||
+
|
|
||||||
+struct LLVMPerfJitHeader;
|
|
||||||
+
|
|
||||||
+class PerfJITEventListener : public JITEventListener {
|
|
||||||
+public:
|
|
||||||
+ PerfJITEventListener();
|
|
||||||
+ ~PerfJITEventListener() {
|
|
||||||
+ if (MarkerAddr)
|
|
||||||
+ CloseMarker();
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ void NotifyObjectEmitted(const ObjectFile &Obj,
|
|
||||||
+ const RuntimeDyld::LoadedObjectInfo &L) override;
|
|
||||||
+ void NotifyFreeingObject(const ObjectFile &Obj) override;
|
|
||||||
+
|
|
||||||
+private:
|
|
||||||
+ bool InitDebuggingDir();
|
|
||||||
+ bool OpenMarker();
|
|
||||||
+ void CloseMarker();
|
|
||||||
+ static bool FillMachine(LLVMPerfJitHeader &hdr);
|
|
||||||
+
|
|
||||||
+ void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
|
|
||||||
+ uint64_t CodeSize);
|
|
||||||
+ void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
|
|
||||||
+
|
|
||||||
+ // cache lookups
|
|
||||||
+ pid_t Pid;
|
|
||||||
+
|
|
||||||
+ // base directory for output data
|
|
||||||
+ std::string JitPath;
|
|
||||||
+
|
|
||||||
+ // output data stream, closed via Dumpstream
|
|
||||||
+ int DumpFd = -1;
|
|
||||||
+
|
|
||||||
+ // output data stream
|
|
||||||
+ std::unique_ptr<raw_fd_ostream> Dumpstream;
|
|
||||||
+
|
|
||||||
+ // prevent concurrent dumps from messing up the output file
|
|
||||||
+ sys::Mutex Mutex;
|
|
||||||
+
|
|
||||||
+ // perf mmap marker
|
|
||||||
+ void *MarkerAddr = NULL;
|
|
||||||
+
|
|
||||||
+ // perf support ready
|
|
||||||
+ bool SuccessfullyInitialized = false;
|
|
||||||
+
|
|
||||||
+ // identifier for functions, primarily to identify when moving them around
|
|
||||||
+ uint64_t CodeGeneration = 1;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+// The following are POD struct definitions from the perf jit specification
|
|
||||||
+
|
|
||||||
+enum LLVMPerfJitRecordType {
|
|
||||||
+ JIT_CODE_LOAD = 0,
|
|
||||||
+ JIT_CODE_MOVE = 1, // not emitted, code isn't moved
|
|
||||||
+ JIT_CODE_DEBUG_INFO = 2,
|
|
||||||
+ JIT_CODE_CLOSE = 3, // not emitted, unnecessary
|
|
||||||
+ JIT_CODE_UNWINDING_INFO = 4, // not emitted
|
|
||||||
+
|
|
||||||
+ JIT_CODE_MAX
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct LLVMPerfJitHeader {
|
|
||||||
+ uint32_t Magic; // characters "JiTD"
|
|
||||||
+ uint32_t Version; // header version
|
|
||||||
+ uint32_t TotalSize; // total size of header
|
|
||||||
+ uint32_t ElfMach; // elf mach target
|
|
||||||
+ uint32_t Pad1; // reserved
|
|
||||||
+ uint32_t Pid;
|
|
||||||
+ uint64_t Timestamp; // timestamp
|
|
||||||
+ uint64_t Flags; // flags
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+// record prefix (mandatory in each record)
|
|
||||||
+struct LLVMPerfJitRecordPrefix {
|
|
||||||
+ uint32_t Id; // record type identifier
|
|
||||||
+ uint32_t TotalSize;
|
|
||||||
+ uint64_t Timestamp;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct LLVMPerfJitRecordCodeLoad {
|
|
||||||
+ LLVMPerfJitRecordPrefix Prefix;
|
|
||||||
+
|
|
||||||
+ uint32_t Pid;
|
|
||||||
+ uint32_t Tid;
|
|
||||||
+ uint64_t Vma;
|
|
||||||
+ uint64_t CodeAddr;
|
|
||||||
+ uint64_t CodeSize;
|
|
||||||
+ uint64_t CodeIndex;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct LLVMPerfJitDebugEntry {
|
|
||||||
+ uint64_t Addr;
|
|
||||||
+ int Lineno; // source line number starting at 1
|
|
||||||
+ int Discrim; // column discriminator, 0 is default
|
|
||||||
+ // followed by null terminated filename, \xff\0 if same as previous entry
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+struct LLVMPerfJitRecordDebugInfo {
|
|
||||||
+ LLVMPerfJitRecordPrefix Prefix;
|
|
||||||
+
|
|
||||||
+ uint64_t CodeAddr;
|
|
||||||
+ uint64_t NrEntry;
|
|
||||||
+ // followed by NrEntry LLVMPerfJitDebugEntry records
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static inline uint64_t timespec_to_ns(const struct timespec *ts) {
|
|
||||||
+ const uint64_t NanoSecPerSec = 1000000000;
|
|
||||||
+ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static inline uint64_t perf_get_timestamp(void) {
|
|
||||||
+ struct timespec ts;
|
|
||||||
+ int ret;
|
|
||||||
+
|
|
||||||
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
||||||
+ if (ret)
|
|
||||||
+ return 0;
|
|
||||||
+
|
|
||||||
+ return timespec_to_ns(&ts);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
|
|
||||||
+ // check if clock-source is supported
|
|
||||||
+ if (!perf_get_timestamp()) {
|
|
||||||
+ errs() << "kernel does not support CLOCK_MONOTONIC\n";
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (!InitDebuggingDir()) {
|
|
||||||
+ errs() << "could not initialize debugging directory\n";
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ std::string Filename;
|
|
||||||
+ raw_string_ostream FilenameBuf(Filename);
|
|
||||||
+ FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
|
|
||||||
+
|
|
||||||
+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and
|
|
||||||
+ // raw_fd_ostream doesn't expose the FD.
|
|
||||||
+ using sys::fs::openFileForWrite;
|
|
||||||
+ if (auto EC =
|
|
||||||
+ openFileForWrite(FilenameBuf.str(), DumpFd, sys::fs::F_RW, 0666)) {
|
|
||||||
+ errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
|
|
||||||
+ << EC.message() << "\n";
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ Dumpstream = make_unique<raw_fd_ostream>(DumpFd, true);
|
|
||||||
+
|
|
||||||
+ LLVMPerfJitHeader Header = {0};
|
|
||||||
+ if (!FillMachine(Header))
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ // signal this process emits JIT information
|
|
||||||
+ if (!OpenMarker())
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ // emit dumpstream header
|
|
||||||
+ Header.Magic = LLVM_PERF_JIT_MAGIC;
|
|
||||||
+ Header.Version = LLVM_PERF_JIT_VERSION;
|
|
||||||
+ Header.TotalSize = sizeof(Header);
|
|
||||||
+ Header.Pid = Pid;
|
|
||||||
+ Header.Timestamp = perf_get_timestamp();
|
|
||||||
+ Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
|
|
||||||
+
|
|
||||||
+ // Everything initialized, can do profiling now.
|
|
||||||
+ if (!Dumpstream->has_error())
|
|
||||||
+ SuccessfullyInitialized = true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void PerfJITEventListener::NotifyObjectEmitted(
|
|
||||||
+ const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) {
|
|
||||||
+
|
|
||||||
+ if (!SuccessfullyInitialized)
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
|
|
||||||
+ const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
|
|
||||||
+
|
|
||||||
+ // Get the address of the object image for use as a unique identifier
|
|
||||||
+ std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
|
|
||||||
+
|
|
||||||
+ // Use symbol info to iterate over functions in the object.
|
|
||||||
+ for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
|
|
||||||
+ SymbolRef Sym = P.first;
|
|
||||||
+ std::string SourceFileName;
|
|
||||||
+
|
|
||||||
+ Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
|
|
||||||
+ if (!SymTypeOrErr) {
|
|
||||||
+ // There's not much we can with errors here
|
|
||||||
+ consumeError(SymTypeOrErr.takeError());
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ SymbolRef::Type SymType = *SymTypeOrErr;
|
|
||||||
+ if (SymType != SymbolRef::ST_Function)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
+ Expected<StringRef> Name = Sym.getName();
|
|
||||||
+ if (!Name) {
|
|
||||||
+ consumeError(Name.takeError());
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ Expected<uint64_t> AddrOrErr = Sym.getAddress();
|
|
||||||
+ if (!AddrOrErr) {
|
|
||||||
+ consumeError(AddrOrErr.takeError());
|
|
||||||
+ continue;
|
|
||||||
+ }
|
|
||||||
+ uint64_t Addr = *AddrOrErr;
|
|
||||||
+ uint64_t Size = P.second;
|
|
||||||
+
|
|
||||||
+ // According to spec debugging info has to come before loading the
|
|
||||||
+ // corresonding code load.
|
|
||||||
+ DILineInfoTable Lines = Context->getLineInfoForAddressRange(
|
|
||||||
+ Addr, Size, FileLineInfoKind::AbsoluteFilePath);
|
|
||||||
+
|
|
||||||
+ NotifyDebug(Addr, Lines);
|
|
||||||
+ NotifyCode(Name, Addr, Size);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ Dumpstream->flush();
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
|
|
||||||
+ // perf currently doesn't have an interface for unloading. But munmap()ing the
|
|
||||||
+ // code section does, so that's ok.
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+bool PerfJITEventListener::InitDebuggingDir() {
|
|
||||||
+ time_t Time;
|
|
||||||
+ struct tm LocalTime;
|
|
||||||
+ char TimeBuffer[sizeof("YYYYMMDD")];
|
|
||||||
+ SmallString<64> Path;
|
|
||||||
+
|
|
||||||
+ // search for location to dump data to
|
|
||||||
+ if (const char *BaseDir = getenv("JITDUMPDIR"))
|
|
||||||
+ Path.append(BaseDir);
|
|
||||||
+ else if (!sys::path::home_directory(Path))
|
|
||||||
+ Path = ".";
|
|
||||||
+
|
|
||||||
+ // create debug directory
|
|
||||||
+ Path += "/.debug/jit/";
|
|
||||||
+ if (auto EC = sys::fs::create_directories(Path)) {
|
|
||||||
+ errs() << "could not create jit cache directory " << Path << ": "
|
|
||||||
+ << EC.message() << "\n";
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ // create unique directory for dump data related to this process
|
|
||||||
+ time(&Time);
|
|
||||||
+ localtime_r(&Time, &LocalTime);
|
|
||||||
+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
|
|
||||||
+ Path += JIT_LANG "-jit-";
|
|
||||||
+ Path += TimeBuffer;
|
|
||||||
+
|
|
||||||
+ SmallString<128> UniqueDebugDir;
|
|
||||||
+
|
|
||||||
+ using sys::fs::createUniqueDirectory;
|
|
||||||
+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
|
|
||||||
+ errs() << "could not create unique jit cache directory " << UniqueDebugDir
|
|
||||||
+ << ": " << EC.message() << "\n";
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ JitPath = UniqueDebugDir.str();
|
|
||||||
+
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+bool PerfJITEventListener::OpenMarker() {
|
|
||||||
+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
|
|
||||||
+ // is captured either live (perf record running when we mmap) or in deferred
|
|
||||||
+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
|
|
||||||
+ // file for more meta data info about the jitted code. Perf report/annotate
|
|
||||||
+ // detect this special filename and process the jitdump file.
|
|
||||||
+ //
|
|
||||||
+ // Mapping must be PROT_EXEC to ensure it is captured by perf record
|
|
||||||
+ // even when not using -d option.
|
|
||||||
+ MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC,
|
|
||||||
+ MAP_PRIVATE, DumpFd, 0);
|
|
||||||
+
|
|
||||||
+ if (MarkerAddr == MAP_FAILED) {
|
|
||||||
+ errs() << "could not mmap JIT marker\n";
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void PerfJITEventListener::CloseMarker() {
|
|
||||||
+ if (!MarkerAddr)
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ munmap(MarkerAddr, sys::Process::getPageSize());
|
|
||||||
+ MarkerAddr = nullptr;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
|
|
||||||
+ char id[16];
|
|
||||||
+ struct {
|
|
||||||
+ uint16_t e_type;
|
|
||||||
+ uint16_t e_machine;
|
|
||||||
+ } info;
|
|
||||||
+
|
|
||||||
+ size_t RequiredMemory = sizeof(id) + sizeof(info);
|
|
||||||
+
|
|
||||||
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
|
|
||||||
+ MemoryBuffer::getFileSlice("/proc/self/exe",
|
|
||||||
+ RequiredMemory,
|
|
||||||
+ 0);
|
|
||||||
+
|
|
||||||
+ // This'll not guarantee that enough data was actually read from the
|
|
||||||
+ // underlying file. Instead the trailing part of the buffer would be
|
|
||||||
+ // zeroed. Given the ELF signature check below that seems ok though,
|
|
||||||
+ // it's unlikely that the file ends just after that, and the
|
|
||||||
+ // consequence would just be that perf wouldn't recognize the
|
|
||||||
+ // signature.
|
|
||||||
+ if (auto EC = MB.getError()) {
|
|
||||||
+ errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
|
|
||||||
+ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
|
|
||||||
+
|
|
||||||
+ // check ELF signature
|
|
||||||
+ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
|
|
||||||
+ errs() << "invalid elf signature\n";
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ hdr.ElfMach = info.e_machine;
|
|
||||||
+
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
|
|
||||||
+ uint64_t CodeAddr, uint64_t CodeSize) {
|
|
||||||
+ assert(SuccessfullyInitialized);
|
|
||||||
+
|
|
||||||
+ // 0 length functions can't have samples.
|
|
||||||
+ if (CodeSize == 0)
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ LLVMPerfJitRecordCodeLoad rec;
|
|
||||||
+ rec.Prefix.Id = JIT_CODE_LOAD;
|
|
||||||
+ rec.Prefix.TotalSize = sizeof(rec) + // debug record itself
|
|
||||||
+ Symbol->size() + 1 + // symbol name
|
|
||||||
+ CodeSize; // and code
|
|
||||||
+ rec.Prefix.Timestamp = perf_get_timestamp();
|
|
||||||
+
|
|
||||||
+ rec.CodeSize = CodeSize;
|
|
||||||
+ rec.Vma = 0;
|
|
||||||
+ rec.CodeAddr = CodeAddr;
|
|
||||||
+ rec.Pid = Pid;
|
|
||||||
+ rec.Tid = get_threadid();
|
|
||||||
+
|
|
||||||
+ // avoid interspersing output
|
|
||||||
+ MutexGuard Guard(Mutex);
|
|
||||||
+
|
|
||||||
+ rec.CodeIndex = CodeGeneration++; // under lock!
|
|
||||||
+
|
|
||||||
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
|
|
||||||
+ Dumpstream->write(Symbol->data(), Symbol->size() + 1);
|
|
||||||
+ Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
|
|
||||||
+ DILineInfoTable Lines) {
|
|
||||||
+ assert(SuccessfullyInitialized);
|
|
||||||
+
|
|
||||||
+ // Didn't get useful debug info.
|
|
||||||
+ if (Lines.empty())
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
+ LLVMPerfJitRecordDebugInfo rec;
|
|
||||||
+ rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
|
|
||||||
+ rec.Prefix.TotalSize = sizeof(rec); // will be increased further
|
|
||||||
+ rec.Prefix.Timestamp = perf_get_timestamp();
|
|
||||||
+ rec.CodeAddr = CodeAddr;
|
|
||||||
+ rec.NrEntry = Lines.size();
|
|
||||||
+
|
|
||||||
+ // compute total size size of record (variable due to filenames)
|
|
||||||
+ DILineInfoTable::iterator Begin = Lines.begin();
|
|
||||||
+ DILineInfoTable::iterator End = Lines.end();
|
|
||||||
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
|
|
||||||
+ DILineInfo &line = It->second;
|
|
||||||
+ rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
|
|
||||||
+ rec.Prefix.TotalSize += line.FileName.size() + 1;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ // The debug_entry describes the source line information. It is defined as
|
|
||||||
+ // follows in order:
|
|
||||||
+ // * uint64_t code_addr: address of function for which the debug information
|
|
||||||
+ // is generated
|
|
||||||
+ // * uint32_t line : source file line number (starting at 1)
|
|
||||||
+ // * uint32_t discrim : column discriminator, 0 is default
|
|
||||||
+ // * char name[n] : source file name in ASCII, including null termination
|
|
||||||
+
|
|
||||||
+ // avoid interspersing output
|
|
||||||
+ MutexGuard Guard(Mutex);
|
|
||||||
+
|
|
||||||
+ Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
|
|
||||||
+
|
|
||||||
+ for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
|
|
||||||
+ LLVMPerfJitDebugEntry LineInfo;
|
|
||||||
+ DILineInfo &Line = It->second;
|
|
||||||
+
|
|
||||||
+ LineInfo.Addr = It->first;
|
|
||||||
+ // The function re-created by perf is preceded by a elf
|
|
||||||
+ // header. Need to adjust for that, otherwise the results are
|
|
||||||
+ // wrong.
|
|
||||||
+ LineInfo.Addr += 0x40;
|
|
||||||
+ LineInfo.Lineno = Line.Line;
|
|
||||||
+ LineInfo.Discrim = Line.Discriminator;
|
|
||||||
+
|
|
||||||
+ Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
|
|
||||||
+ sizeof(LineInfo));
|
|
||||||
+ Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+// There should be only a single event listener per process, otherwise perf gets
|
|
||||||
+// confused.
|
|
||||||
+llvm::ManagedStatic<PerfJITEventListener> PerfListener;
|
|
||||||
+
|
|
||||||
+} // end anonymous namespace
|
|
||||||
+
|
|
||||||
+namespace llvm {
|
|
||||||
+JITEventListener *JITEventListener::createPerfJITEventListener() {
|
|
||||||
+ return &*PerfListener;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+} // namespace llvm
|
|
||||||
+
|
|
||||||
--
|
|
||||||
2.17.1
|
|
||||||
|
|
@ -1,89 +0,0 @@
|
|||||||
commit 8eb2b102a203d83fb713f3bf79acf235dabdd8cd
|
|
||||||
Author: Keno Fischer <keno@juliacomputing.com>
|
|
||||||
Date: Mon Jul 30 16:59:08 2018 -0400
|
|
||||||
|
|
||||||
[VNCoercion] Disallow coercion between different ni addrspaces
|
|
||||||
|
|
||||||
Summary:
|
|
||||||
I'm not sure if it would be legal by the IR reference to introduce
|
|
||||||
an addrspacecast here, since the IR reference is a bit vague on
|
|
||||||
the exact semantics, but at least for our usage of it (and I
|
|
||||||
suspect for many other's usage) it is not. For us, addrspacecasts
|
|
||||||
between non-integral address spaces carry frontend information that the
|
|
||||||
optimizer cannot deduce afterwards in a generic way (though we
|
|
||||||
have frontend specific passes in our pipline that do propagate
|
|
||||||
these). In any case, I'm sure nobody is using it this way at
|
|
||||||
the moment, since it would have introduced inttoptrs, which
|
|
||||||
are definitely illegal.
|
|
||||||
|
|
||||||
Fixes PR38375
|
|
||||||
|
|
||||||
Reviewers: sanjoy, reames, dberlin
|
|
||||||
|
|
||||||
Subscribers: llvm-commits
|
|
||||||
|
|
||||||
Differential Revision: https://reviews.llvm.org/D50010
|
|
||||||
|
|
||||||
diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
|
|
||||||
index c3feea6a0a4..735d1e7b792 100644
|
|
||||||
--- a/lib/Transforms/Utils/VNCoercion.cpp
|
|
||||||
+++ b/lib/Transforms/Utils/VNCoercion.cpp
|
|
||||||
@@ -20,14 +20,21 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
|
|
||||||
StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
+ Type *StoredValTy = StoredVal->getType();
|
|
||||||
+
|
|
||||||
// The store has to be at least as big as the load.
|
|
||||||
if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
- // Don't coerce non-integral pointers to integers or vice versa.
|
|
||||||
- if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
|
|
||||||
- DL.isNonIntegralPointerType(LoadTy))
|
|
||||||
+ bool StoredNI = DL.isNonIntegralPointerType(StoredValTy);
|
|
||||||
+ bool LoadNI = DL.isNonIntegralPointerType(LoadTy);
|
|
||||||
+ if (StoredNI != LoadNI) {
|
|
||||||
return false;
|
|
||||||
+ } else if (StoredNI && LoadNI &&
|
|
||||||
+ cast<PointerType>(StoredValTy)->getAddressSpace() !=
|
|
||||||
+ cast<PointerType>(LoadTy)->getAddressSpace()) {
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
|
|
||||||
index 9ae4132231d..5217fc1a06a 100644
|
|
||||||
--- a/test/Transforms/GVN/non-integral-pointers.ll
|
|
||||||
+++ b/test/Transforms/GVN/non-integral-pointers.ll
|
|
||||||
@@ -1,6 +1,6 @@
|
|
||||||
; RUN: opt -gvn -S < %s | FileCheck %s
|
|
||||||
|
|
||||||
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
|
|
||||||
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
|
|
||||||
target triple = "x86_64-unknown-linux-gnu"
|
|
||||||
|
|
||||||
define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
|
|
||||||
@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
|
|
||||||
alwaysTaken:
|
|
||||||
ret i64 42
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
|
|
||||||
+ ; CHECK-LABEL: @multini(
|
|
||||||
+ ; CHECK-NOT: inttoptr
|
|
||||||
+ ; CHECK-NOT: ptrtoint
|
|
||||||
+ ; CHECK-NOT: addrspacecast
|
|
||||||
+ entry:
|
|
||||||
+ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
|
|
||||||
+ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
|
|
||||||
+
|
|
||||||
+ neverTaken:
|
|
||||||
+ %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
|
|
||||||
+ %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
|
|
||||||
+ ret i8 addrspace(5)* %differentas
|
|
||||||
+
|
|
||||||
+ alwaysTaken:
|
|
||||||
+ ret i8 addrspace(5)* null
|
|
||||||
+ }
|
|
1143
debian/patches/julia/llvm-D50167-scev-umin.patch
vendored
1143
debian/patches/julia/llvm-D50167-scev-umin.patch
vendored
File diff suppressed because it is too large
Load Diff
26
debian/patches/julia/llvm-PPC-addrspaces.patch
vendored
26
debian/patches/julia/llvm-PPC-addrspaces.patch
vendored
@ -1,26 +0,0 @@
|
|||||||
From 15899eaab58e96bb7bbe7a14099674e255656a50 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Valentin Churavy <v.churavy@gmail.com>
|
|
||||||
Date: Fri, 23 Feb 2018 14:41:20 -0500
|
|
||||||
Subject: [PATCH] Make AddrSpaceCast noops on PPC
|
|
||||||
|
|
||||||
PPC as AArch64 doesn't have address-spaces so we can drop them in the backend
|
|
||||||
---
|
|
||||||
lib/Target/PowerPC/PPCISelLowering.h | 5 +++++
|
|
||||||
1 file changed, 5 insertions(+)
|
|
||||||
|
|
||||||
Index: llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h
|
|
||||||
===================================================================
|
|
||||||
--- llvm-toolchain-6.0-6.0.1.orig/lib/Target/PowerPC/PPCISelLowering.h
|
|
||||||
+++ llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h
|
|
||||||
@@ -889,6 +889,11 @@ namespace llvm {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
|
|
||||||
+ // Addrspacecasts are always noops.
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
|
|
||||||
SelectionDAG &DAG,
|
|
||||||
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
|
|
@ -1,301 +0,0 @@
|
|||||||
commit b398d8e1fa5a5a914957fa22d0a64db97f6c265e
|
|
||||||
Author: Craig Topper <craig.topper@intel.com>
|
|
||||||
Date: Thu Mar 8 00:21:17 2018 +0000
|
|
||||||
|
|
||||||
[X86] Fix some isel patterns that used aligned vector load instructions with unaligned predicates.
|
|
||||||
|
|
||||||
These patterns weren't checking the alignment of the load, but were using the aligned instructions. This will cause a GP fault if the data isn't aligned.
|
|
||||||
|
|
||||||
I believe these were introduced in r312450.
|
|
||||||
|
|
||||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326967 91177308-0d34-0410-b5e6-96231b3b80d8
|
|
||||||
|
|
||||||
diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td
|
|
||||||
index db3dfe56531..50c7763a2c3 100644
|
|
||||||
--- a/lib/Target/X86/X86InstrVecCompiler.td
|
|
||||||
+++ b/lib/Target/X86/X86InstrVecCompiler.td
|
|
||||||
@@ -261,10 +261,10 @@ let Predicates = [HasVLX] in {
|
|
||||||
// will zero the upper bits.
|
|
||||||
// TODO: Is there a safe way to detect whether the producing instruction
|
|
||||||
// already zeroed the upper bits?
|
|
||||||
-multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
|
|
||||||
- ValueType DstTy, ValueType SrcTy,
|
|
||||||
- ValueType ZeroTy, PatFrag memop,
|
|
||||||
- SubRegIndex SubIdx> {
|
|
||||||
+multiclass subvector_zero_lowering<string MoveStr, string LoadStr,
|
|
||||||
+ RegisterClass RC, ValueType DstTy,
|
|
||||||
+ ValueType SrcTy, ValueType ZeroTy,
|
|
||||||
+ PatFrag memop, SubRegIndex SubIdx> {
|
|
||||||
def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
|
|
||||||
(SrcTy RC:$src), (iPTR 0))),
|
|
||||||
(SUBREG_TO_REG (i64 0),
|
|
||||||
@@ -274,91 +274,91 @@ multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
|
|
||||||
(SrcTy (bitconvert (memop addr:$src))),
|
|
||||||
(iPTR 0))),
|
|
||||||
(SUBREG_TO_REG (i64 0),
|
|
||||||
- (!cast<Instruction>("VMOV"#MoveStr#"rm") addr:$src), SubIdx)>;
|
|
||||||
+ (!cast<Instruction>("VMOV"#LoadStr#"rm") addr:$src), SubIdx)>;
|
|
||||||
}
|
|
||||||
|
|
||||||
let Predicates = [HasAVX, NoVLX] in {
|
|
||||||
- defm : subvector_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, loadv2f64,
|
|
||||||
- sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, loadv4f32,
|
|
||||||
- sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, loadv2i64,
|
|
||||||
- sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, loadv2i64,
|
|
||||||
- sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, loadv2i64,
|
|
||||||
- sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, loadv2i64,
|
|
||||||
- sub_xmm>;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-let Predicates = [HasVLX] in {
|
|
||||||
- defm : subvector_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32,
|
|
||||||
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v4f64, v2f64, v8i32,
|
|
||||||
loadv2f64, sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32,
|
|
||||||
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v8f32, v4f32, v8i32,
|
|
||||||
loadv4f32, sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32,
|
|
||||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v4i64, v2i64, v8i32,
|
|
||||||
loadv2i64, sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32,
|
|
||||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i32, v4i32, v8i32,
|
|
||||||
loadv2i64, sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32,
|
|
||||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i16, v8i16, v8i32,
|
|
||||||
loadv2i64, sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32,
|
|
||||||
- loadv2i64, sub_xmm>;
|
|
||||||
-
|
|
||||||
- defm : subvector_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32,
|
|
||||||
- loadv2f64, sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32,
|
|
||||||
- loadv4f32, sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32,
|
|
||||||
- loadv2i64, sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32,
|
|
||||||
- loadv2i64, sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32,
|
|
||||||
- loadv2i64, sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32,
|
|
||||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i8, v16i8, v8i32,
|
|
||||||
loadv2i64, sub_xmm>;
|
|
||||||
+}
|
|
||||||
|
|
||||||
- defm : subvector_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32,
|
|
||||||
- loadv4f64, sub_ymm>;
|
|
||||||
- defm : subvector_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32,
|
|
||||||
- loadv8f32, sub_ymm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32,
|
|
||||||
- loadv4i64, sub_ymm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32,
|
|
||||||
- loadv4i64, sub_ymm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32,
|
|
||||||
- loadv4i64, sub_ymm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32,
|
|
||||||
- loadv4i64, sub_ymm>;
|
|
||||||
+let Predicates = [HasVLX] in {
|
|
||||||
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v4f64,
|
|
||||||
+ v2f64, v8i32, loadv2f64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v8f32,
|
|
||||||
+ v4f32, v8i32, loadv4f32, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v4i64,
|
|
||||||
+ v2i64, v8i32, loadv2i64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i32,
|
|
||||||
+ v4i32, v8i32, loadv2i64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i16,
|
|
||||||
+ v8i16, v8i32, loadv2i64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i8,
|
|
||||||
+ v16i8, v8i32, loadv2i64, sub_xmm>;
|
|
||||||
+
|
|
||||||
+ defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v8f64,
|
|
||||||
+ v2f64, v16i32, loadv2f64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v16f32,
|
|
||||||
+ v4f32, v16i32, loadv4f32, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i64,
|
|
||||||
+ v2i64, v16i32, loadv2i64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i32,
|
|
||||||
+ v4i32, v16i32, loadv2i64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i16,
|
|
||||||
+ v8i16, v16i32, loadv2i64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v64i8,
|
|
||||||
+ v16i8, v16i32, loadv2i64, sub_xmm>;
|
|
||||||
+
|
|
||||||
+ defm : subvector_zero_lowering<"APDZ256", "UPDZ256", VR256X, v8f64,
|
|
||||||
+ v4f64, v16i32, loadv4f64, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"APSZ256", "UPDZ256", VR256X, v16f32,
|
|
||||||
+ v8f32, v16i32, loadv8f32, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v8i64,
|
|
||||||
+ v4i64, v16i32, loadv4i64, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v16i32,
|
|
||||||
+ v8i32, v16i32, loadv4i64, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v32i16,
|
|
||||||
+ v16i16, v16i32, loadv4i64, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v64i8,
|
|
||||||
+ v32i8, v16i32, loadv4i64, sub_ymm>;
|
|
||||||
}
|
|
||||||
|
|
||||||
let Predicates = [HasAVX512, NoVLX] in {
|
|
||||||
- defm : subvector_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, loadv2f64,
|
|
||||||
- sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, loadv4f32,
|
|
||||||
- sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, loadv2i64,
|
|
||||||
- sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, loadv2i64,
|
|
||||||
- sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, loadv2i64,
|
|
||||||
- sub_xmm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, loadv2i64,
|
|
||||||
- sub_xmm>;
|
|
||||||
-
|
|
||||||
- defm : subvector_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32,
|
|
||||||
- loadv4f64, sub_ymm>;
|
|
||||||
- defm : subvector_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32,
|
|
||||||
- loadv8f32, sub_ymm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32,
|
|
||||||
- loadv4i64, sub_ymm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32,
|
|
||||||
- loadv4i64, sub_ymm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32,
|
|
||||||
- loadv4i64, sub_ymm>;
|
|
||||||
- defm : subvector_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32,
|
|
||||||
- loadv4i64, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"APD", "UPD", VR128, v8f64, v2f64,
|
|
||||||
+ v16i32,loadv2f64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"APS", "UPS", VR128, v16f32, v4f32,
|
|
||||||
+ v16i32, loadv4f32, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i64, v2i64,
|
|
||||||
+ v16i32, loadv2i64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i32, v4i32,
|
|
||||||
+ v16i32, loadv2i64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i16, v8i16,
|
|
||||||
+ v16i32, loadv2i64, sub_xmm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQA", "DQU", VR128, v64i8, v16i8,
|
|
||||||
+ v16i32, loadv2i64, sub_xmm>;
|
|
||||||
+
|
|
||||||
+ defm : subvector_zero_lowering<"APDY", "UPDY", VR256, v8f64, v4f64,
|
|
||||||
+ v16i32, loadv4f64, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"APSY", "UPSY", VR256, v16f32, v8f32,
|
|
||||||
+ v16i32, loadv8f32, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v8i64, v4i64,
|
|
||||||
+ v16i32, loadv4i64, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v16i32, v8i32,
|
|
||||||
+ v16i32, loadv4i64, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v32i16, v16i16,
|
|
||||||
+ v16i32, loadv4i64, sub_ymm>;
|
|
||||||
+ defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v64i8, v32i8,
|
|
||||||
+ v16i32, loadv4i64, sub_ymm>;
|
|
||||||
}
|
|
||||||
|
|
||||||
// List of opcodes that guaranteed to zero the upper elements of vector regs.
|
|
||||||
diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll
|
|
||||||
index 6ecd8116443..0f2cf594b1c 100644
|
|
||||||
--- a/test/CodeGen/X86/merge-consecutive-loads-256.ll
|
|
||||||
+++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll
|
|
||||||
@@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi
|
|
||||||
define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
|
|
||||||
; AVX-LABEL: merge_4f64_2f64_2z:
|
|
||||||
; AVX: # %bb.0:
|
|
||||||
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
|
|
||||||
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
|
|
||||||
; AVX-NEXT: retq
|
|
||||||
;
|
|
||||||
; X32-AVX-LABEL: merge_4f64_2f64_2z:
|
|
||||||
; X32-AVX: # %bb.0:
|
|
||||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
|
|
||||||
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
|
|
||||||
; X32-AVX-NEXT: retl
|
|
||||||
%ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
|
|
||||||
%val0 = load <2 x double>, <2 x double>* %ptr0
|
|
||||||
@@ -109,13 +109,13 @@ define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline
|
|
||||||
define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
|
|
||||||
; AVX-LABEL: merge_4f64_f64_45zz:
|
|
||||||
; AVX: # %bb.0:
|
|
||||||
-; AVX-NEXT: vmovaps 32(%rdi), %xmm0
|
|
||||||
+; AVX-NEXT: vmovups 32(%rdi), %xmm0
|
|
||||||
; AVX-NEXT: retq
|
|
||||||
;
|
|
||||||
; X32-AVX-LABEL: merge_4f64_f64_45zz:
|
|
||||||
; X32-AVX: # %bb.0:
|
|
||||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
-; X32-AVX-NEXT: vmovaps 32(%eax), %xmm0
|
|
||||||
+; X32-AVX-NEXT: vmovups 32(%eax), %xmm0
|
|
||||||
; X32-AVX-NEXT: retl
|
|
||||||
%ptr0 = getelementptr inbounds double, double* %ptr, i64 4
|
|
||||||
%ptr1 = getelementptr inbounds double, double* %ptr, i64 5
|
|
||||||
@@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline
|
|
||||||
define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp {
|
|
||||||
; AVX-LABEL: merge_4i64_2i64_3z:
|
|
||||||
; AVX: # %bb.0:
|
|
||||||
-; AVX-NEXT: vmovaps 48(%rdi), %xmm0
|
|
||||||
+; AVX-NEXT: vmovups 48(%rdi), %xmm0
|
|
||||||
; AVX-NEXT: retq
|
|
||||||
;
|
|
||||||
; X32-AVX-LABEL: merge_4i64_2i64_3z:
|
|
||||||
; X32-AVX: # %bb.0:
|
|
||||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
-; X32-AVX-NEXT: vmovaps 48(%eax), %xmm0
|
|
||||||
+; X32-AVX-NEXT: vmovups 48(%eax), %xmm0
|
|
||||||
; X32-AVX-NEXT: retl
|
|
||||||
%ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3
|
|
||||||
%val0 = load <2 x i64>, <2 x i64>* %ptr0
|
|
||||||
@@ -217,13 +217,13 @@ define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp {
|
|
||||||
define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp {
|
|
||||||
; AVX-LABEL: merge_4i64_i64_23zz:
|
|
||||||
; AVX: # %bb.0:
|
|
||||||
-; AVX-NEXT: vmovaps 16(%rdi), %xmm0
|
|
||||||
+; AVX-NEXT: vmovups 16(%rdi), %xmm0
|
|
||||||
; AVX-NEXT: retq
|
|
||||||
;
|
|
||||||
; X32-AVX-LABEL: merge_4i64_i64_23zz:
|
|
||||||
; X32-AVX: # %bb.0:
|
|
||||||
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
-; X32-AVX-NEXT: vmovaps 16(%eax), %xmm0
|
|
||||||
+; X32-AVX-NEXT: vmovups 16(%eax), %xmm0
|
|
||||||
; X32-AVX-NEXT: retl
|
|
||||||
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2
|
|
||||||
%ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3
|
|
||||||
diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll
|
|
||||||
index 62102eb382c..3c6eaf65292 100644
|
|
||||||
--- a/test/CodeGen/X86/merge-consecutive-loads-512.ll
|
|
||||||
+++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll
|
|
||||||
@@ -106,13 +106,13 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin
|
|
||||||
define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
|
|
||||||
; ALL-LABEL: merge_8f64_f64_12zzuuzz:
|
|
||||||
; ALL: # %bb.0:
|
|
||||||
-; ALL-NEXT: vmovaps 8(%rdi), %xmm0
|
|
||||||
+; ALL-NEXT: vmovups 8(%rdi), %xmm0
|
|
||||||
; ALL-NEXT: retq
|
|
||||||
;
|
|
||||||
; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
|
|
||||||
; X32-AVX512F: # %bb.0:
|
|
||||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
-; X32-AVX512F-NEXT: vmovaps 8(%eax), %xmm0
|
|
||||||
+; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0
|
|
||||||
; X32-AVX512F-NEXT: retl
|
|
||||||
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1
|
|
||||||
%ptr1 = getelementptr inbounds double, double* %ptr, i64 2
|
|
||||||
@@ -190,7 +190,7 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline
|
|
||||||
define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
|
|
||||||
; ALL-LABEL: merge_8i64_i64_56zz9uzz:
|
|
||||||
; ALL: # %bb.0:
|
|
||||||
-; ALL-NEXT: vmovaps 40(%rdi), %xmm0
|
|
||||||
+; ALL-NEXT: vmovups 40(%rdi), %xmm0
|
|
||||||
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
||||||
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
|
|
||||||
; ALL-NEXT: retq
|
|
||||||
@@ -198,7 +198,7 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s
|
|
||||||
; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
|
|
||||||
; X32-AVX512F: # %bb.0:
|
|
||||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
||||||
-; X32-AVX512F-NEXT: vmovaps 40(%eax), %xmm0
|
|
||||||
+; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0
|
|
||||||
; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
|
||||||
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
|
|
||||||
; X32-AVX512F-NEXT: retl
|
|
6131
debian/patches/julia/llvm-rL327898.patch
vendored
6131
debian/patches/julia/llvm-rL327898.patch
vendored
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user