mirror of
https://git.proxmox.com/git/llvm-toolchain
synced 2025-07-27 11:00:08 +00:00
* Non-maintainer upload.
* Backport some patches (originally from rust, and upstreamed) to fix two failing tests in rustc.
This commit is contained in:
parent
e784e58e94
commit
be94a5a691
8
debian/changelog
vendored
8
debian/changelog
vendored
@ -1,3 +1,11 @@
|
||||
llvm-toolchain-4.0 (1:4.0.1-7.1) UNRELEASED; urgency=medium
|
||||
|
||||
* Non-maintainer upload.
|
||||
* Backport some patches (originally from rust, and upstreamed) to fix two
|
||||
failing tests in rustc.
|
||||
|
||||
-- Ximin Luo <infinity0@debian.org> Wed, 18 Oct 2017 15:28:20 +0200
|
||||
|
||||
llvm-toolchain-4.0 (1:4.0.1-7) unstable; urgency=medium
|
||||
|
||||
* Force the deactivation of ocaml until the transition is done
|
||||
|
@ -1,84 +0,0 @@
|
||||
From eee68eafa7e8e4ce996b49f5551636639a6c331a Mon Sep 17 00:00:00 2001
|
||||
From: David Majnemer <david.majnemer@gmail.com>
|
||||
Date: Mon, 29 Aug 2016 17:14:08 +0000
|
||||
Subject: [PATCH 11/17] [SimplifyCFG] Hoisting invalidates metadata
|
||||
|
||||
We forgot to remove optimization metadata when performing hosting during
|
||||
FoldTwoEntryPHINode.
|
||||
|
||||
This fixes PR29163.
|
||||
|
||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279980 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||
---
|
||||
lib/Transforms/Utils/SimplifyCFG.cpp | 10 ++++++++--
|
||||
test/Transforms/SimplifyCFG/PR29163.ll | 31 +++++++++++++++++++++++++++++++
|
||||
2 files changed, 39 insertions(+), 2 deletions(-)
|
||||
create mode 100644 test/Transforms/SimplifyCFG/PR29163.ll
|
||||
|
||||
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
|
||||
index 0504646..c197317 100644
|
||||
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
|
||||
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
|
||||
@@ -2024,14 +2024,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
|
||||
|
||||
// Move all 'aggressive' instructions, which are defined in the
|
||||
// conditional parts of the if's up to the dominating block.
|
||||
- if (IfBlock1)
|
||||
+ if (IfBlock1) {
|
||||
+ for (auto &I : *IfBlock1)
|
||||
+ I.dropUnknownNonDebugMetadata();
|
||||
DomBlock->getInstList().splice(InsertPt->getIterator(),
|
||||
IfBlock1->getInstList(), IfBlock1->begin(),
|
||||
IfBlock1->getTerminator()->getIterator());
|
||||
- if (IfBlock2)
|
||||
+ }
|
||||
+ if (IfBlock2) {
|
||||
+ for (auto &I : *IfBlock2)
|
||||
+ I.dropUnknownNonDebugMetadata();
|
||||
DomBlock->getInstList().splice(InsertPt->getIterator(),
|
||||
IfBlock2->getInstList(), IfBlock2->begin(),
|
||||
IfBlock2->getTerminator()->getIterator());
|
||||
+ }
|
||||
|
||||
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
|
||||
// Change the PHI node into a select instruction.
|
||||
diff --git a/test/Transforms/SimplifyCFG/PR29163.ll b/test/Transforms/SimplifyCFG/PR29163.ll
|
||||
new file mode 100644
|
||||
index 0000000..65f9090
|
||||
--- /dev/null
|
||||
+++ b/test/Transforms/SimplifyCFG/PR29163.ll
|
||||
@@ -0,0 +1,31 @@
|
||||
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
|
||||
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
+target triple = "x86_64-unknown-linux-gnu"
|
||||
+
|
||||
+@GV = external constant i64*
|
||||
+
|
||||
+define i64* @test1(i1 %cond, i8* %P) {
|
||||
+entry:
|
||||
+ br i1 %cond, label %if, label %then
|
||||
+
|
||||
+then:
|
||||
+ %bc = bitcast i8* %P to i64*
|
||||
+ br label %join
|
||||
+
|
||||
+if:
|
||||
+ %load = load i64*, i64** @GV, align 8, !dereferenceable !0
|
||||
+ br label %join
|
||||
+
|
||||
+join:
|
||||
+ %phi = phi i64* [ %bc, %then ], [ %load, %if ]
|
||||
+ ret i64* %phi
|
||||
+}
|
||||
+
|
||||
+; CHECK-LABEL: define i64* @test1(
|
||||
+; CHECK: %[[bc:.*]] = bitcast i8* %P to i64*
|
||||
+; CHECK: %[[load:.*]] = load i64*, i64** @GV, align 8{{$}}
|
||||
+; CHECK: %[[phi:.*]] = select i1 %cond, i64* %[[load]], i64* %[[bc]]
|
||||
+; CHECK: ret i64* %[[phi]]
|
||||
+
|
||||
+
|
||||
+!0 = !{i64 8}
|
||||
--
|
||||
2.10.1
|
||||
|
292
debian/patches/rL298540.diff
vendored
Normal file
292
debian/patches/rL298540.diff
vendored
Normal file
@ -0,0 +1,292 @@
|
||||
commit 2b66aeed20b61d42cd2782b94d5419c6faca49a1
|
||||
Author: Luqman Aden <me@luqman.ca>
|
||||
Date: Wed Mar 22 19:16:39 2017 +0000
|
||||
|
||||
Preserve nonnull metadata on Loads through SROA & mem2reg.
|
||||
|
||||
Summary:
|
||||
https://llvm.org/bugs/show_bug.cgi?id=31142 :
|
||||
|
||||
SROA was dropping the nonnull metadata on loads from allocas that got optimized out. This patch simply preserves nonnull metadata on loads through SROA and mem2reg.
|
||||
|
||||
Reviewers: chandlerc, efriedma
|
||||
|
||||
Reviewed By: efriedma
|
||||
|
||||
Subscribers: hfinkel, spatel, efriedma, arielb1, davide, llvm-commits
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D27114
|
||||
|
||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298540 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||
|
||||
--- a/lib/Transforms/Scalar/SROA.cpp
|
||||
+++ b/lib/Transforms/Scalar/SROA.cpp
|
||||
@@ -2387,6 +2387,10 @@
|
||||
LI.isVolatile(), LI.getName());
|
||||
if (LI.isVolatile())
|
||||
NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
|
||||
+
|
||||
+ // Try to preserve nonnull metadata
|
||||
+ if (TargetTy->isPointerTy())
|
||||
+ NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
|
||||
V = NewLI;
|
||||
|
||||
// If this is an integer load past the end of the slice (which means the
|
||||
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
|
||||
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
|
||||
@@ -15,7 +15,6 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
|
||||
#include "llvm/ADT/ArrayRef.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
@@ -23,6 +22,7 @@
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/Analysis/AliasSetTracker.h"
|
||||
+#include "llvm/Analysis/AssumptionCache.h"
|
||||
#include "llvm/Analysis/InstructionSimplify.h"
|
||||
#include "llvm/Analysis/IteratedDominanceFrontier.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
@@ -38,6 +38,7 @@
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
|
||||
#include <algorithm>
|
||||
using namespace llvm;
|
||||
|
||||
@@ -301,6 +302,18 @@
|
||||
|
||||
} // end of anonymous namespace
|
||||
|
||||
+/// Given a LoadInst LI this adds assume(LI != null) after it.
|
||||
+static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
|
||||
+ Function *AssumeIntrinsic =
|
||||
+ Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
|
||||
+ ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
|
||||
+ Constant::getNullValue(LI->getType()));
|
||||
+ LoadNotNull->insertAfter(LI);
|
||||
+ CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
|
||||
+ CI->insertAfter(LoadNotNull);
|
||||
+ AC->registerAssumption(CI);
|
||||
+}
|
||||
+
|
||||
static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
|
||||
// Knowing that this alloca is promotable, we know that it's safe to kill all
|
||||
// instructions except for load and store.
|
||||
@@ -334,9 +347,9 @@
|
||||
/// and thus must be phi-ed with undef. We fall back to the standard alloca
|
||||
/// promotion algorithm in that case.
|
||||
static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
|
||||
- LargeBlockInfo &LBI,
|
||||
- DominatorTree &DT,
|
||||
- AliasSetTracker *AST) {
|
||||
+ LargeBlockInfo &LBI, DominatorTree &DT,
|
||||
+ AliasSetTracker *AST,
|
||||
+ AssumptionCache *AC) {
|
||||
StoreInst *OnlyStore = Info.OnlyStore;
|
||||
bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
|
||||
BasicBlock *StoreBB = OnlyStore->getParent();
|
||||
@@ -387,6 +400,14 @@
|
||||
// code.
|
||||
if (ReplVal == LI)
|
||||
ReplVal = UndefValue::get(LI->getType());
|
||||
+
|
||||
+ // If the load was marked as nonnull we don't want to lose
|
||||
+ // that information when we erase this Load. So we preserve
|
||||
+ // it with an assume.
|
||||
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
|
||||
+ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
|
||||
+ addAssumeNonNull(AC, LI);
|
||||
+
|
||||
LI->replaceAllUsesWith(ReplVal);
|
||||
if (AST && LI->getType()->isPointerTy())
|
||||
AST->deleteValue(LI);
|
||||
@@ -435,7 +456,9 @@
|
||||
/// }
|
||||
static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
|
||||
LargeBlockInfo &LBI,
|
||||
- AliasSetTracker *AST) {
|
||||
+ AliasSetTracker *AST,
|
||||
+ DominatorTree &DT,
|
||||
+ AssumptionCache *AC) {
|
||||
// The trickiest case to handle is when we have large blocks. Because of this,
|
||||
// this code is optimized assuming that large blocks happen. This does not
|
||||
// significantly pessimize the small block case. This uses LargeBlockInfo to
|
||||
@@ -476,10 +499,17 @@
|
||||
// There is no store before this load, bail out (load may be affected
|
||||
// by the following stores - see main comment).
|
||||
return false;
|
||||
- }
|
||||
- else
|
||||
+ } else {
|
||||
// Otherwise, there was a store before this load, the load takes its value.
|
||||
- LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
|
||||
+ // Note, if the load was marked as nonnull we don't want to lose that
|
||||
+ // information when we erase it. So we preserve it with an assume.
|
||||
+ Value *ReplVal = std::prev(I)->second->getOperand(0);
|
||||
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
|
||||
+ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
|
||||
+ addAssumeNonNull(AC, LI);
|
||||
+
|
||||
+ LI->replaceAllUsesWith(ReplVal);
|
||||
+ }
|
||||
|
||||
if (AST && LI->getType()->isPointerTy())
|
||||
AST->deleteValue(LI);
|
||||
@@ -553,7 +583,7 @@
|
||||
// If there is only a single store to this value, replace any loads of
|
||||
// it that are directly dominated by the definition with the value stored.
|
||||
if (Info.DefiningBlocks.size() == 1) {
|
||||
- if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
|
||||
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST, AC)) {
|
||||
// The alloca has been processed, move on.
|
||||
RemoveFromAllocasList(AllocaNum);
|
||||
++NumSingleStore;
|
||||
@@ -564,7 +594,7 @@
|
||||
// If the alloca is only read and written in one basic block, just perform a
|
||||
// linear sweep over the block to eliminate it.
|
||||
if (Info.OnlyUsedInOneBlock &&
|
||||
- promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
|
||||
+ promoteSingleBlockAlloca(AI, Info, LBI, AST, DT, AC)) {
|
||||
// The alloca has been processed, move on.
|
||||
RemoveFromAllocasList(AllocaNum);
|
||||
continue;
|
||||
@@ -940,6 +970,13 @@
|
||||
|
||||
Value *V = IncomingVals[AI->second];
|
||||
|
||||
+ // If the load was marked as nonnull we don't want to lose
|
||||
+ // that information when we erase this Load. So we preserve
|
||||
+ // it with an assume.
|
||||
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
|
||||
+ !llvm::isKnownNonNullAt(V, LI, &DT))
|
||||
+ addAssumeNonNull(AC, LI);
|
||||
+
|
||||
// Anything using the load now uses the current value.
|
||||
LI->replaceAllUsesWith(V);
|
||||
if (AST && LI->getType()->isPointerTy())
|
||||
--- /dev/null
|
||||
+++ b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll
|
||||
@@ -0,0 +1,89 @@
|
||||
+; RUN: opt < %s -mem2reg -S | FileCheck %s
|
||||
+
|
||||
+; This tests that mem2reg preserves the !nonnull metadata on loads
|
||||
+; from allocas that get optimized out.
|
||||
+
|
||||
+; Check the case where the alloca in question has a single store.
|
||||
+define float* @single_store(float** %arg) {
|
||||
+; CHECK-LABEL: define float* @single_store
|
||||
+; CHECK: %arg.load = load float*, float** %arg, align 8
|
||||
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
|
||||
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
|
||||
+; CHECK: ret float* %arg.load
|
||||
+entry:
|
||||
+ %buf = alloca float*
|
||||
+ %arg.load = load float*, float** %arg, align 8
|
||||
+ store float* %arg.load, float** %buf, align 8
|
||||
+ %buf.load = load float*, float **%buf, !nonnull !0
|
||||
+ ret float* %buf.load
|
||||
+}
|
||||
+
|
||||
+; Check the case where the alloca in question has more than one
|
||||
+; store but still within one basic block.
|
||||
+define float* @single_block(float** %arg) {
|
||||
+; CHECK-LABEL: define float* @single_block
|
||||
+; CHECK: %arg.load = load float*, float** %arg, align 8
|
||||
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
|
||||
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
|
||||
+; CHECK: ret float* %arg.load
|
||||
+entry:
|
||||
+ %buf = alloca float*
|
||||
+ %arg.load = load float*, float** %arg, align 8
|
||||
+ store float* null, float** %buf, align 8
|
||||
+ store float* %arg.load, float** %buf, align 8
|
||||
+ %buf.load = load float*, float **%buf, !nonnull !0
|
||||
+ ret float* %buf.load
|
||||
+}
|
||||
+
|
||||
+; Check the case where the alloca in question has more than one
|
||||
+; store and also reads ands writes in multiple blocks.
|
||||
+define float* @multi_block(float** %arg) {
|
||||
+; CHECK-LABEL: define float* @multi_block
|
||||
+; CHECK-LABEL: entry:
|
||||
+; CHECK: %arg.load = load float*, float** %arg, align 8
|
||||
+; CHECK: br label %next
|
||||
+; CHECK-LABEL: next:
|
||||
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
|
||||
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
|
||||
+; CHECK: ret float* %arg.load
|
||||
+entry:
|
||||
+ %buf = alloca float*
|
||||
+ %arg.load = load float*, float** %arg, align 8
|
||||
+ store float* null, float** %buf, align 8
|
||||
+ br label %next
|
||||
+next:
|
||||
+ store float* %arg.load, float** %buf, align 8
|
||||
+ %buf.load = load float*, float** %buf, !nonnull !0
|
||||
+ ret float* %buf.load
|
||||
+}
|
||||
+
|
||||
+; Check that we don't add an assume if it's not
|
||||
+; necessary i.e. the value is already implied to be nonnull
|
||||
+define float* @no_assume(float** %arg) {
|
||||
+; CHECK-LABEL: define float* @no_assume
|
||||
+; CHECK-LABEL: entry:
|
||||
+; CHECK: %arg.load = load float*, float** %arg, align 8
|
||||
+; CHECK: %cn = icmp ne float* %arg.load, null
|
||||
+; CHECK: br i1 %cn, label %next, label %fin
|
||||
+; CHECK-LABEL: next:
|
||||
+; CHECK-NOT: call void @llvm.assume
|
||||
+; CHECK: ret float* %arg.load
|
||||
+; CHECK-LABEL: fin:
|
||||
+; CHECK: ret float* null
|
||||
+entry:
|
||||
+ %buf = alloca float*
|
||||
+ %arg.load = load float*, float** %arg, align 8
|
||||
+ %cn = icmp ne float* %arg.load, null
|
||||
+ br i1 %cn, label %next, label %fin
|
||||
+next:
|
||||
+; At this point the above nonnull check ensures that
|
||||
+; the value %arg.load is nonnull in this block and thus
|
||||
+; we need not add the assume.
|
||||
+ store float* %arg.load, float** %buf, align 8
|
||||
+ %buf.load = load float*, float** %buf, !nonnull !0
|
||||
+ ret float* %buf.load
|
||||
+fin:
|
||||
+ ret float* null
|
||||
+}
|
||||
+
|
||||
+!0 = !{}
|
||||
--- /dev/null
|
||||
+++ b/test/Transforms/SROA/preserve-nonnull.ll
|
||||
@@ -0,0 +1,26 @@
|
||||
+; RUN: opt < %s -sroa -S | FileCheck %s
|
||||
+;
|
||||
+; Make sure that SROA doesn't lose nonnull metadata
|
||||
+; on loads from allocas that get optimized out.
|
||||
+
|
||||
+; CHECK-LABEL: define float* @yummy_nonnull
|
||||
+; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
|
||||
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
|
||||
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
|
||||
+; CHECK: ret float* {{.*}}[[RETURN]]
|
||||
+
|
||||
+define float* @yummy_nonnull(float** %arg) {
|
||||
+entry-block:
|
||||
+ %buf = alloca float*
|
||||
+
|
||||
+ %_arg_i8 = bitcast float** %arg to i8*
|
||||
+ %_buf_i8 = bitcast float** %buf to i8*
|
||||
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
|
||||
+
|
||||
+ %ret = load float*, float** %buf, align 8, !nonnull !0
|
||||
+ ret float* %ret
|
||||
+}
|
||||
+
|
||||
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
||||
+
|
||||
+!0 = !{}
|
442
debian/patches/rL305193-backport.diff
vendored
Normal file
442
debian/patches/rL305193-backport.diff
vendored
Normal file
@ -0,0 +1,442 @@
|
||||
commit 2b622a393ce80c6157d32a50bf67d6b830729469
|
||||
Author: Than McIntosh <thanm@google.com>
|
||||
Date: Mon Jun 12 14:56:02 2017 +0000
|
||||
|
||||
StackColoring: smarter check for slot overlap
|
||||
|
||||
Summary:
|
||||
The old check for slot overlap treated 2 slots `S` and `T` as
|
||||
overlapping if there existed a CFG node in which both of the slots could
|
||||
possibly be active. That is overly conservative and caused stack blowups
|
||||
in Rust programs. Instead, check whether there is a single CFG node in
|
||||
which both of the slots are possibly active *together*.
|
||||
|
||||
Fixes PR32488.
|
||||
|
||||
Patch by Ariel Ben-Yehuda <ariel.byd@gmail.com>
|
||||
|
||||
Reviewers: thanm, nagisa, llvm-commits, efriedma, rnk
|
||||
|
||||
Reviewed By: thanm
|
||||
|
||||
Subscribers: dotdash
|
||||
|
||||
Differential Revision: https://reviews.llvm.org/D31583
|
||||
|
||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305193 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||
|
||||
--- a/lib/CodeGen/StackColoring.cpp
|
||||
+++ b/lib/CodeGen/StackColoring.cpp
|
||||
@@ -87,10 +87,134 @@
|
||||
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
|
||||
STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
|
||||
|
||||
+//===----------------------------------------------------------------------===//
|
||||
+// StackColoring Pass
|
||||
+//===----------------------------------------------------------------------===//
|
||||
+//
|
||||
+// Stack Coloring reduces stack usage by merging stack slots when they
|
||||
+// can't be used together. For example, consider the following C program:
|
||||
+//
|
||||
+// void bar(char *, int);
|
||||
+// void foo(bool var) {
|
||||
+// A: {
|
||||
+// char z[4096];
|
||||
+// bar(z, 0);
|
||||
+// }
|
||||
+//
|
||||
+// char *p;
|
||||
+// char x[4096];
|
||||
+// char y[4096];
|
||||
+// if (var) {
|
||||
+// p = x;
|
||||
+// } else {
|
||||
+// bar(y, 1);
|
||||
+// p = y + 1024;
|
||||
+// }
|
||||
+// B:
|
||||
+// bar(p, 2);
|
||||
+// }
|
||||
+//
|
||||
+// Naively-compiled, this program would use 12k of stack space. However, the
|
||||
+// stack slot corresponding to `z` is always destroyed before either of the
|
||||
+// stack slots for `x` or `y` are used, and then `x` is only used if `var`
|
||||
+// is true, while `y` is only used if `var` is false. So in no time are 2
|
||||
+// of the stack slots used together, and therefore we can merge them,
|
||||
+// compiling the function using only a single 4k alloca:
|
||||
+//
|
||||
+// void foo(bool var) { // equivalent
|
||||
+// char x[4096];
|
||||
+// char *p;
|
||||
+// bar(x, 0);
|
||||
+// if (var) {
|
||||
+// p = x;
|
||||
+// } else {
|
||||
+// bar(x, 1);
|
||||
+// p = x + 1024;
|
||||
+// }
|
||||
+// bar(p, 2);
|
||||
+// }
|
||||
+//
|
||||
+// This is an important optimization if we want stack space to be under
|
||||
+// control in large functions, both open-coded ones and ones created by
|
||||
+// inlining.
|
||||
//
|
||||
// Implementation Notes:
|
||||
// ---------------------
|
||||
//
|
||||
+// An important part of the above reasoning is that `z` can't be accessed
|
||||
+// while the latter 2 calls to `bar` are running. This is justified because
|
||||
+// `z`'s lifetime is over after we exit from block `A:`, so any further
|
||||
+// accesses to it would be UB. The way we represent this information
|
||||
+// in LLVM is by having frontends delimit blocks with `lifetime.start`
|
||||
+// and `lifetime.end` intrinsics.
|
||||
+//
|
||||
+// The effect of these intrinsics seems to be as follows (maybe I should
|
||||
+// specify this in the reference?):
|
||||
+//
|
||||
+// L1) at start, each stack-slot is marked as *out-of-scope*, unless no
|
||||
+// lifetime intrinsic refers to that stack slot, in which case
|
||||
+// it is marked as *in-scope*.
|
||||
+// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
|
||||
+// the stack slot is overwritten with `undef`.
|
||||
+// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
|
||||
+// L4) on function exit, all stack slots are marked as *out-of-scope*.
|
||||
+// L5) `lifetime.end` is a no-op when called on a slot that is already
|
||||
+// *out-of-scope*.
|
||||
+// L6) memory accesses to *out-of-scope* stack slots are UB.
|
||||
+// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
|
||||
+// are invalidated, unless the slot is "degenerate". This is used to
|
||||
+// justify not marking slots as in-use until the pointer to them is
|
||||
+// used, but feels a bit hacky in the presence of things like LICM. See
|
||||
+// the "Degenerate Slots" section for more details.
|
||||
+//
|
||||
+// Now, let's ground stack coloring on these rules. We'll define a slot
|
||||
+// as *in-use* at a (dynamic) point in execution if it either can be
|
||||
+// written to at that point, or if it has a live and non-undef content
|
||||
+// at that point.
|
||||
+//
|
||||
+// Obviously, slots that are never *in-use* together can be merged, and
|
||||
+// in our example `foo`, the slots for `x`, `y` and `z` are never
|
||||
+// in-use together (of course, sometimes slots that *are* in-use together
|
||||
+// might still be mergable, but we don't care about that here).
|
||||
+//
|
||||
+// In this implementation, we successively merge pairs of slots that are
|
||||
+// not *in-use* together. We could be smarter - for example, we could merge
|
||||
+// a single large slot with 2 small slots, or we could construct the
|
||||
+// interference graph and run a "smart" graph coloring algorithm, but with
|
||||
+// that aside, how do we find out whether a pair of slots might be *in-use*
|
||||
+// together?
|
||||
+//
|
||||
+// From our rules, we see that *out-of-scope* slots are never *in-use*,
|
||||
+// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
|
||||
+// until their address is taken. Therefore, we can approximate slot activity
|
||||
+// using dataflow.
|
||||
+//
|
||||
+// A subtle point: naively, we might try to figure out which pairs of
|
||||
+// stack-slots interfere by propagating `S in-use` through the CFG for every
|
||||
+// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
|
||||
+// which they are both *in-use*.
|
||||
+//
|
||||
+// That is sound, but overly conservative in some cases: in our (artificial)
|
||||
+// example `foo`, either `x` or `y` might be in use at the label `B:`, but
|
||||
+// as `x` is only in use if we came in from the `var` edge and `y` only
|
||||
+// if we came from the `!var` edge, they still can't be in use together.
|
||||
+// See PR32488 for an important real-life case.
|
||||
+//
|
||||
+// If we wanted to find all points of interference precisely, we could
|
||||
+// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
|
||||
+// would be precise, but requires propagating `O(n^2)` dataflow facts.
|
||||
+//
|
||||
+// However, we aren't interested in the *set* of points of interference
|
||||
+// between 2 stack slots, only *whether* there *is* such a point. So we
|
||||
+// can rely on a little trick: for `S` and `T` to be in-use together,
|
||||
+// one of them needs to become in-use while the other is in-use (or
|
||||
+// they might both become in use simultaneously). We can check this
|
||||
+// by also keeping track of the points at which a stack slot might *start*
|
||||
+// being in-use.
|
||||
+//
|
||||
+// Exact first use:
|
||||
+// ----------------
|
||||
+//
|
||||
// Consider the following motivating example:
|
||||
//
|
||||
// int foo() {
|
||||
@@ -159,6 +283,9 @@
|
||||
// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
|
||||
// byte stack (better).
|
||||
//
|
||||
+// Degenerate Slots:
|
||||
+// -----------------
|
||||
+//
|
||||
// Relying entirely on first-use of stack slots is problematic,
|
||||
// however, due to the fact that optimizations can sometimes migrate
|
||||
// uses of a variable outside of its lifetime start/end region. Here
|
||||
@@ -238,10 +365,6 @@
|
||||
// for "b" then it will appear that 'b' has a degenerate lifetime.
|
||||
//
|
||||
|
||||
-//===----------------------------------------------------------------------===//
|
||||
-// StackColoring Pass
|
||||
-//===----------------------------------------------------------------------===//
|
||||
-
|
||||
namespace {
|
||||
/// StackColoring - A machine pass for merging disjoint stack allocations,
|
||||
/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
|
||||
@@ -272,8 +395,11 @@
|
||||
/// Maps basic blocks to a serial number.
|
||||
SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
|
||||
|
||||
- /// Maps liveness intervals for each slot.
|
||||
+ /// Maps slots to their use interval. Outside of this interval, slots
|
||||
+ /// values are either dead or `undef` and they will not be written to.
|
||||
SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
|
||||
+ /// Maps slots to the points where they can become in-use.
|
||||
+ SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
|
||||
/// VNInfo is used for the construction of LiveIntervals.
|
||||
VNInfo::Allocator VNInfoAllocator;
|
||||
/// SlotIndex analysis object.
|
||||
@@ -676,15 +802,22 @@
|
||||
|
||||
void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
|
||||
SmallVector<SlotIndex, 16> Starts;
|
||||
- SmallVector<SlotIndex, 16> Finishes;
|
||||
+ SmallVector<bool, 16> DefinitelyInUse;
|
||||
|
||||
// For each block, find which slots are active within this block
|
||||
// and update the live intervals.
|
||||
for (const MachineBasicBlock &MBB : *MF) {
|
||||
Starts.clear();
|
||||
Starts.resize(NumSlots);
|
||||
- Finishes.clear();
|
||||
- Finishes.resize(NumSlots);
|
||||
+ DefinitelyInUse.clear();
|
||||
+ DefinitelyInUse.resize(NumSlots);
|
||||
+
|
||||
+ // Start the interval of the slots that we previously found to be 'in-use'.
|
||||
+ BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
|
||||
+ for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
|
||||
+ pos = MBBLiveness.LiveIn.find_next(pos)) {
|
||||
+ Starts[pos] = Indexes->getMBBStartIdx(&MBB);
|
||||
+ }
|
||||
|
||||
// Create the interval for the basic blocks containing lifetime begin/end.
|
||||
for (const MachineInstr &MI : MBB) {
|
||||
@@ -696,68 +829,35 @@
|
||||
SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
|
||||
for (auto Slot : slots) {
|
||||
if (IsStart) {
|
||||
- if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
|
||||
+ // If a slot is already definitely in use, we don't have to emit
|
||||
+ // a new start marker because there is already a pre-existing
|
||||
+ // one.
|
||||
+ if (!DefinitelyInUse[Slot]) {
|
||||
+ LiveStarts[Slot].push_back(ThisIndex);
|
||||
+ DefinitelyInUse[Slot] = true;
|
||||
+ }
|
||||
+ if (!Starts[Slot].isValid())
|
||||
Starts[Slot] = ThisIndex;
|
||||
} else {
|
||||
- if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
|
||||
- Finishes[Slot] = ThisIndex;
|
||||
+ if (Starts[Slot].isValid()) {
|
||||
+ VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
|
||||
+ Intervals[Slot]->addSegment(
|
||||
+ LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
|
||||
+ Starts[Slot] = SlotIndex(); // Invalidate the start index
|
||||
+ DefinitelyInUse[Slot] = false;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- // Create the interval of the blocks that we previously found to be 'alive'.
|
||||
- BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
|
||||
- for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
|
||||
- pos = MBBLiveness.LiveIn.find_next(pos)) {
|
||||
- Starts[pos] = Indexes->getMBBStartIdx(&MBB);
|
||||
- }
|
||||
- for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
|
||||
- pos = MBBLiveness.LiveOut.find_next(pos)) {
|
||||
- Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
|
||||
- }
|
||||
-
|
||||
+ // Finish up started segments
|
||||
for (unsigned i = 0; i < NumSlots; ++i) {
|
||||
- //
|
||||
- // When LifetimeStartOnFirstUse is turned on, data flow analysis
|
||||
- // is forward (from starts to ends), not bidirectional. A
|
||||
- // consequence of this is that we can wind up in situations
|
||||
- // where Starts[i] is invalid but Finishes[i] is valid and vice
|
||||
- // versa. Example:
|
||||
- //
|
||||
- // LIFETIME_START x
|
||||
- // if (...) {
|
||||
- // <use of x>
|
||||
- // throw ...;
|
||||
- // }
|
||||
- // LIFETIME_END x
|
||||
- // return 2;
|
||||
- //
|
||||
- //
|
||||
- // Here the slot for "x" will not be live into the block
|
||||
- // containing the "return 2" (since lifetimes start with first
|
||||
- // use, not at the dominating LIFETIME_START marker).
|
||||
- //
|
||||
- if (Starts[i].isValid() && !Finishes[i].isValid()) {
|
||||
- Finishes[i] = Indexes->getMBBEndIdx(&MBB);
|
||||
- }
|
||||
if (!Starts[i].isValid())
|
||||
continue;
|
||||
|
||||
- assert(Starts[i] && Finishes[i] && "Invalid interval");
|
||||
- VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
|
||||
- SlotIndex S = Starts[i];
|
||||
- SlotIndex F = Finishes[i];
|
||||
- if (S < F) {
|
||||
- // We have a single consecutive region.
|
||||
- Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
|
||||
- } else {
|
||||
- // We have two non-consecutive regions. This happens when
|
||||
- // LIFETIME_START appears after the LIFETIME_END marker.
|
||||
- SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
|
||||
- SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
|
||||
- Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
|
||||
- Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
|
||||
- }
|
||||
+ SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
|
||||
+ VNInfo *VNI = Intervals[i]->getValNumInfo(0);
|
||||
+ Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -987,6 +1087,7 @@
|
||||
BasicBlockNumbering.clear();
|
||||
Markers.clear();
|
||||
Intervals.clear();
|
||||
+ LiveStarts.clear();
|
||||
VNInfoAllocator.Reset();
|
||||
|
||||
unsigned NumSlots = MFI->getObjectIndexEnd();
|
||||
@@ -998,6 +1099,7 @@
|
||||
SmallVector<int, 8> SortedSlots;
|
||||
SortedSlots.reserve(NumSlots);
|
||||
Intervals.reserve(NumSlots);
|
||||
+ LiveStarts.resize(NumSlots);
|
||||
|
||||
unsigned NumMarkers = collectMarkers(NumSlots);
|
||||
|
||||
@@ -1069,6 +1171,9 @@
|
||||
return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
|
||||
});
|
||||
|
||||
+ for (auto &s : LiveStarts)
|
||||
+ std::sort(s.begin(), s.end());
|
||||
+
|
||||
bool Changed = true;
|
||||
while (Changed) {
|
||||
Changed = false;
|
||||
@@ -1084,12 +1189,22 @@
|
||||
int SecondSlot = SortedSlots[J];
|
||||
LiveInterval *First = &*Intervals[FirstSlot];
|
||||
LiveInterval *Second = &*Intervals[SecondSlot];
|
||||
+ auto &FirstS = LiveStarts[FirstSlot];
|
||||
+ auto &SecondS = LiveStarts[SecondSlot];
|
||||
assert (!First->empty() && !Second->empty() && "Found an empty range");
|
||||
|
||||
- // Merge disjoint slots.
|
||||
- if (!First->overlaps(*Second)) {
|
||||
+ // Merge disjoint slots. This is a little bit tricky - see the
|
||||
+ // Implementation Notes section for an explanation.
|
||||
+ if (!First->isLiveAtIndexes(SecondS) &&
|
||||
+ !Second->isLiveAtIndexes(FirstS)) {
|
||||
Changed = true;
|
||||
First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
|
||||
+
|
||||
+ int OldSize = FirstS.size();
|
||||
+ FirstS.append(SecondS.begin(), SecondS.end());
|
||||
+ auto Mid = FirstS.begin() + OldSize;
|
||||
+ std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
|
||||
+
|
||||
SlotRemap[SecondSlot] = FirstSlot;
|
||||
SortedSlots[J] = -1;
|
||||
DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
|
||||
--- a/test/CodeGen/X86/StackColoring.ll
|
||||
+++ b/test/CodeGen/X86/StackColoring.ll
|
||||
@@ -582,12 +582,76 @@
|
||||
ret i32 %x.addr.0
|
||||
}
|
||||
|
||||
+;CHECK-LABEL: multi_segment:
|
||||
+;YESCOLOR: subq $256, %rsp
|
||||
+;NOFIRSTUSE: subq $256, %rsp
|
||||
+;NOCOLOR: subq $512, %rsp
|
||||
+define i1 @multi_segment(i1, i1)
|
||||
+{
|
||||
+entry-block:
|
||||
+ %foo = alloca [32 x i64]
|
||||
+ %bar = alloca [32 x i64]
|
||||
+ %foo_i8 = bitcast [32 x i64]* %foo to i8*
|
||||
+ %bar_i8 = bitcast [32 x i64]* %bar to i8*
|
||||
+ call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
|
||||
+ call void @baz([32 x i64]* %bar, i32 1)
|
||||
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
|
||||
+ call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
|
||||
+ call void @baz([32 x i64]* %foo, i32 1)
|
||||
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
|
||||
+ call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
|
||||
+ call void @baz([32 x i64]* %bar, i32 1)
|
||||
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
|
||||
+ ret i1 true
|
||||
+}
|
||||
+
|
||||
+;CHECK-LABEL: pr32488:
|
||||
+;YESCOLOR: subq $256, %rsp
|
||||
+;NOFIRSTUSE: subq $256, %rsp
|
||||
+;NOCOLOR: subq $512, %rsp
|
||||
+define i1 @pr32488(i1, i1)
|
||||
+{
|
||||
+entry-block:
|
||||
+ %foo = alloca [32 x i64]
|
||||
+ %bar = alloca [32 x i64]
|
||||
+ %foo_i8 = bitcast [32 x i64]* %foo to i8*
|
||||
+ %bar_i8 = bitcast [32 x i64]* %bar to i8*
|
||||
+ br i1 %0, label %if_false, label %if_true
|
||||
+if_false:
|
||||
+ call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
|
||||
+ call void @baz([32 x i64]* %bar, i32 0)
|
||||
+ br i1 %1, label %if_false.1, label %onerr
|
||||
+if_false.1:
|
||||
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
|
||||
+ br label %merge
|
||||
+if_true:
|
||||
+ call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
|
||||
+ call void @baz([32 x i64]* %foo, i32 1)
|
||||
+ br i1 %1, label %if_true.1, label %onerr
|
||||
+if_true.1:
|
||||
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
|
||||
+ br label %merge
|
||||
+merge:
|
||||
+ ret i1 false
|
||||
+onerr:
|
||||
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
|
||||
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
|
||||
+ call void @destructor()
|
||||
+ ret i1 true
|
||||
+}
|
||||
+
|
||||
+%Data = type { [32 x i64] }
|
||||
+
|
||||
+declare void @destructor()
|
||||
+
|
||||
declare void @inita(i32*)
|
||||
|
||||
declare void @initb(i32*,i32*,i32*)
|
||||
|
||||
declare void @bar([100 x i32]* , [100 x i32]*) nounwind
|
||||
|
||||
+declare void @baz([32 x i64]*, i32)
|
||||
+
|
||||
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
|
||||
|
||||
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
|
187
debian/patches/rL306267.diff
vendored
Normal file
187
debian/patches/rL306267.diff
vendored
Normal file
@ -0,0 +1,187 @@
|
||||
commit 5a057dc8edbb63887f8c611dd8ddf1b76997f07c
|
||||
Author: Chandler Carruth <chandlerc@gmail.com>
|
||||
Date: Mon Jun 26 03:31:31 2017 +0000
|
||||
|
||||
[InstCombine] Factor the logic for propagating !nonnull and !range
|
||||
metadata out of InstCombine and into helpers.
|
||||
|
||||
NFC, this just exposes the logic used by InstCombine when propagating
|
||||
metadata from one load instruction to another. The plan is to use this
|
||||
in SROA to address PR32902.
|
||||
|
||||
If anyone has better ideas about how to factor this or name variables,
|
||||
I'm all ears, but this seemed like a pretty good start and lets us make
|
||||
progress on the PR.
|
||||
|
||||
This is based on a patch by Ariel Ben-Yehuda (D34285).
|
||||
|
||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306267 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||
|
||||
--- a/include/llvm/Transforms/Utils/Local.h
|
||||
+++ b/include/llvm/Transforms/Utils/Local.h
|
||||
@@ -366,6 +366,19 @@
|
||||
/// during lowering by the GC infrastructure.
|
||||
bool callsGCLeafFunction(ImmutableCallSite CS);
|
||||
|
||||
+/// Copy a nonnull metadata node to a new load instruction.
|
||||
+///
|
||||
+/// This handles mapping it to range metadata if the new load is an integer
|
||||
+/// load instead of a pointer load.
|
||||
+void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI);
|
||||
+
|
||||
+/// Copy a range metadata node to a new load instruction.
|
||||
+///
|
||||
+/// This handles mapping it to nonnull metadata if the new load is a pointer
|
||||
+/// load instead of an integer load and the range doesn't cover null.
|
||||
+void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N,
|
||||
+ LoadInst &NewLI);
|
||||
+
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Intrinsic pattern matching
|
||||
//
|
||||
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
|
||||
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
|
||||
@@ -471,21 +471,7 @@
|
||||
break;
|
||||
|
||||
case LLVMContext::MD_nonnull:
|
||||
- // This only directly applies if the new type is also a pointer.
|
||||
- if (NewTy->isPointerTy()) {
|
||||
- NewLoad->setMetadata(ID, N);
|
||||
- break;
|
||||
- }
|
||||
- // If it's integral now, translate it to !range metadata.
|
||||
- if (NewTy->isIntegerTy()) {
|
||||
- auto *ITy = cast<IntegerType>(NewTy);
|
||||
- auto *NullInt = ConstantExpr::getPtrToInt(
|
||||
- ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
|
||||
- auto *NonNullInt =
|
||||
- ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
|
||||
- NewLoad->setMetadata(LLVMContext::MD_range,
|
||||
- MDB.createRange(NonNullInt, NullInt));
|
||||
- }
|
||||
+ copyNonnullMetadata(LI, N, *NewLoad);
|
||||
break;
|
||||
case LLVMContext::MD_align:
|
||||
case LLVMContext::MD_dereferenceable:
|
||||
@@ -495,17 +481,7 @@
|
||||
NewLoad->setMetadata(ID, N);
|
||||
break;
|
||||
case LLVMContext::MD_range:
|
||||
- // FIXME: It would be nice to propagate this in some way, but the type
|
||||
- // conversions make it hard.
|
||||
-
|
||||
- // If it's a pointer now and the range does not contain 0, make it !nonnull.
|
||||
- if (NewTy->isPointerTy()) {
|
||||
- unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy);
|
||||
- if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
|
||||
- MDNode *NN = MDNode::get(LI.getContext(), None);
|
||||
- NewLoad->setMetadata(LLVMContext::MD_nonnull, NN);
|
||||
- }
|
||||
- }
|
||||
+ copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
|
||||
break;
|
||||
}
|
||||
}
|
||||
--- a/lib/Transforms/Utils/Local.cpp
|
||||
+++ b/lib/Transforms/Utils/Local.cpp
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "llvm/Analysis/LazyValueInfo.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
+#include "llvm/IR/ConstantRange.h"
|
||||
#include "llvm/IR/Constants.h"
|
||||
#include "llvm/IR/DIBuilder.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
@@ -1069,7 +1070,7 @@
|
||||
}
|
||||
|
||||
/// See if there is a dbg.value intrinsic for DIVar for the PHI node.
|
||||
-static bool PhiHasDebugValue(DILocalVariable *DIVar,
|
||||
+static bool PhiHasDebugValue(DILocalVariable *DIVar,
|
||||
DIExpression *DIExpr,
|
||||
PHINode *APN) {
|
||||
// Since we can't guarantee that the original dbg.declare instrinsic
|
||||
@@ -1152,7 +1153,7 @@
|
||||
DbgValue->insertAfter(LI);
|
||||
}
|
||||
|
||||
-/// Inserts a llvm.dbg.value intrinsic after a phi
|
||||
+/// Inserts a llvm.dbg.value intrinsic after a phi
|
||||
/// that has an associated llvm.dbg.decl intrinsic.
|
||||
void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
|
||||
PHINode *APN, DIBuilder &Builder) {
|
||||
@@ -1723,12 +1724,12 @@
|
||||
// Preserve !invariant.group in K.
|
||||
break;
|
||||
case LLVMContext::MD_align:
|
||||
- K->setMetadata(Kind,
|
||||
+ K->setMetadata(Kind,
|
||||
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
|
||||
break;
|
||||
case LLVMContext::MD_dereferenceable:
|
||||
case LLVMContext::MD_dereferenceable_or_null:
|
||||
- K->setMetadata(Kind,
|
||||
+ K->setMetadata(Kind,
|
||||
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
|
||||
break;
|
||||
}
|
||||
@@ -1812,6 +1813,49 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
+void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
|
||||
+ LoadInst &NewLI) {
|
||||
+ auto *NewTy = NewLI.getType();
|
||||
+
|
||||
+ // This only directly applies if the new type is also a pointer.
|
||||
+ if (NewTy->isPointerTy()) {
|
||||
+ NewLI.setMetadata(LLVMContext::MD_nonnull, N);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ // The only other translation we can do is to integral loads with !range
|
||||
+ // metadata.
|
||||
+ if (!NewTy->isIntegerTy())
|
||||
+ return;
|
||||
+
|
||||
+ MDBuilder MDB(NewLI.getContext());
|
||||
+ const Value *Ptr = OldLI.getPointerOperand();
|
||||
+ auto *ITy = cast<IntegerType>(NewTy);
|
||||
+ auto *NullInt = ConstantExpr::getPtrToInt(
|
||||
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
|
||||
+ auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
|
||||
+ NewLI.setMetadata(LLVMContext::MD_range,
|
||||
+ MDB.createRange(NonNullInt, NullInt));
|
||||
+}
|
||||
+
|
||||
+void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
|
||||
+ MDNode *N, LoadInst &NewLI) {
|
||||
+ auto *NewTy = NewLI.getType();
|
||||
+
|
||||
+ // Give up unless it is converted to a pointer where there is a single very
|
||||
+ // valuable mapping we can do reliably.
|
||||
+ // FIXME: It would be nice to propagate this in more ways, but the type
|
||||
+ // conversions make it hard.
|
||||
+ if (!NewTy->isPointerTy())
|
||||
+ return;
|
||||
+
|
||||
+ unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
|
||||
+ if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
|
||||
+ MDNode *NN = MDNode::get(OldLI.getContext(), None);
|
||||
+ NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
namespace {
|
||||
/// A potential constituent of a bitreverse or bswap expression. See
|
||||
/// collectBitParts for a fuller explanation.
|
||||
@@ -1933,7 +1977,7 @@
|
||||
unsigned NumMaskedBits = AndMask.countPopulation();
|
||||
if (!MatchBitReversals && NumMaskedBits % 8 != 0)
|
||||
return Result;
|
||||
-
|
||||
+
|
||||
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
|
||||
MatchBitReversals, BPS);
|
||||
if (!Res)
|
47
debian/patches/rL306353.diff
vendored
Normal file
47
debian/patches/rL306353.diff
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
commit 477bd758b48cb96477d3dd4cf3b36bf2706e8c1d
|
||||
Author: Chandler Carruth <chandlerc@gmail.com>
|
||||
Date: Tue Jun 27 02:23:15 2017 +0000
|
||||
|
||||
[SROA] Clean up a test case a bit prior to adding more testing for
|
||||
nonnull as part of fixing PR32902.
|
||||
|
||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306353 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||
|
||||
--- a/test/Transforms/SROA/preserve-nonnull.ll
|
||||
+++ b/test/Transforms/SROA/preserve-nonnull.ll
|
||||
@@ -3,22 +3,20 @@
|
||||
; Make sure that SROA doesn't lose nonnull metadata
|
||||
; on loads from allocas that get optimized out.
|
||||
|
||||
-; CHECK-LABEL: define float* @yummy_nonnull
|
||||
-; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
|
||||
-; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
|
||||
-; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
|
||||
-; CHECK: ret float* {{.*}}[[RETURN]]
|
||||
-
|
||||
define float* @yummy_nonnull(float** %arg) {
|
||||
-entry-block:
|
||||
- %buf = alloca float*
|
||||
-
|
||||
- %_arg_i8 = bitcast float** %arg to i8*
|
||||
- %_buf_i8 = bitcast float** %buf to i8*
|
||||
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
|
||||
-
|
||||
- %ret = load float*, float** %buf, align 8, !nonnull !0
|
||||
- ret float* %ret
|
||||
+; CHECK-LABEL: define float* @yummy_nonnull(
|
||||
+; CHECK-NEXT: entry:
|
||||
+; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8
|
||||
+; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
|
||||
+; CHECK-NEXT: call void @llvm.assume(i1 %[[ASSUME]])
|
||||
+; CHECK-NEXT: ret float* %[[RETURN]]
|
||||
+entry:
|
||||
+ %buf = alloca float*
|
||||
+ %_arg_i8 = bitcast float** %arg to i8*
|
||||
+ %_buf_i8 = bitcast float** %buf to i8*
|
||||
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
|
||||
+ %ret = load float*, float** %buf, align 8, !nonnull !0
|
||||
+ ret float* %ret
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
52
debian/patches/rL306358.diff
vendored
Normal file
52
debian/patches/rL306358.diff
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
commit 156cc49e505986a1659adaa3a0b5a070372377c8
|
||||
Author: Chandler Carruth <chandlerc@gmail.com>
|
||||
Date: Tue Jun 27 03:08:45 2017 +0000
|
||||
|
||||
[SROA] Further test cleanup and add a test for the actual propagation of
|
||||
the nonnull attribute distinct from rewriting it into an assume.
|
||||
|
||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306358 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||
|
||||
--- a/test/Transforms/SROA/preserve-nonnull.ll
|
||||
+++ b/test/Transforms/SROA/preserve-nonnull.ll
|
||||
@@ -3,8 +3,31 @@
|
||||
; Make sure that SROA doesn't lose nonnull metadata
|
||||
; on loads from allocas that get optimized out.
|
||||
|
||||
-define float* @yummy_nonnull(float** %arg) {
|
||||
-; CHECK-LABEL: define float* @yummy_nonnull(
|
||||
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
||||
+
|
||||
+; Check that we do basic propagation of nonnull when rewriting.
|
||||
+define i8* @propagate_nonnull(i32* %v) {
|
||||
+; CHECK-LABEL: define i8* @propagate_nonnull(
|
||||
+; CHECK-NEXT: entry:
|
||||
+; CHECK-NEXT: %[[A:.*]] = alloca i8*
|
||||
+; CHECK-NEXT: %[[V_CAST:.*]] = bitcast i32* %v to i8*
|
||||
+; CHECK-NEXT: store i8* %[[V_CAST]], i8** %[[A]]
|
||||
+; CHECK-NEXT: %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0
|
||||
+; CHECK-NEXT: ret i8* %[[LOAD]]
|
||||
+entry:
|
||||
+ %a = alloca [2 x i8*]
|
||||
+ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
|
||||
+ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
|
||||
+ %a.gep0.cast = bitcast i8** %a.gep0 to i32**
|
||||
+ %a.gep1.cast = bitcast i8** %a.gep1 to i32**
|
||||
+ store i32* %v, i32** %a.gep1.cast
|
||||
+ store i32* null, i32** %a.gep0.cast
|
||||
+ %load = load volatile i8*, i8** %a.gep1, !nonnull !0
|
||||
+ ret i8* %load
|
||||
+}
|
||||
+
|
||||
+define float* @turn_nonnull_into_assume(float** %arg) {
|
||||
+; CHECK-LABEL: define float* @turn_nonnull_into_assume(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8
|
||||
; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
|
||||
@@ -19,6 +42,4 @@
|
||||
ret float* %ret
|
||||
}
|
||||
|
||||
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
||||
-
|
||||
!0 = !{}
|
147
debian/patches/rL306379.diff
vendored
Normal file
147
debian/patches/rL306379.diff
vendored
Normal file
@ -0,0 +1,147 @@
|
||||
commit 7df06519765b14e1b08d7034c82c45a0a653eb25
|
||||
Author: Chandler Carruth <chandlerc@gmail.com>
|
||||
Date: Tue Jun 27 08:32:03 2017 +0000
|
||||
|
||||
[SROA] Fix PR32902 by more carefully propagating !nonnull metadata.
|
||||
|
||||
This is based heavily on the work done ni D34285. I mostly wanted to do
|
||||
test cleanup for the author to save them some time, but I had a really
|
||||
hard time understanding why it was so hard to write better test cases
|
||||
for these issues.
|
||||
|
||||
The problem is that because SROA does a second rewrite of the loads and
|
||||
because we *don't* propagate !nonnull for non-pointer loads, we first
|
||||
introduced invalid !nonnull metadata and then stripped it back off just
|
||||
in time to avoid most ways of this PR manifesting. Moving to the more
|
||||
careful utility only fixes this by changing the predicate to look at the
|
||||
new load's type rather than the target type. However, that *does* fix
|
||||
the bug, and the utility is much nicer including adding range metadata
|
||||
to model the nonnull property after a conversion to an integer.
|
||||
|
||||
However, we have bigger problems because we don't actually propagate
|
||||
*range* metadata, and the utility to do this extracted from instcombine
|
||||
isn't really in good shape to do this currently. It *only* handles the
|
||||
case of copying range metadata from an integer load to a pointer load.
|
||||
It doesn't even handle the trivial cases of propagating from one integer
|
||||
load to another when they are the same width! This utility will need to
|
||||
be beefed up prior to using in this location to get the metadata to
|
||||
fully survive.
|
||||
|
||||
And even then, we need to go and teach things to turn the range metadata
|
||||
into an assume the way we do with nonnull so that when we *promote* an
|
||||
integer we don't lose the information.
|
||||
|
||||
All of this will require a new test case that looks kind-of like
|
||||
`preserve-nonnull.ll` does here but focuses on range metadata. It will
|
||||
also likely require more testing because it needs to correctly handle
|
||||
changes to the integer width, especially as SROA actively tries to
|
||||
change the integer width!
|
||||
|
||||
Last but not least, I'm a little worried about hooking the range
|
||||
metadata up here because the instcombine logic for converting from
|
||||
a range metadata *to* a nonnull metadata node seems broken in the face
|
||||
of non-zero address spaces where null is not mapped to the integer `0`.
|
||||
So that probably needs to get fixed with test cases both in SROA and in
|
||||
instcombine to cover it.
|
||||
|
||||
But this *does* extract the core PR fix from D34285 of preventing the
|
||||
!nonnull metadata from being propagated in a broken state just long
|
||||
enough to feed into promotion and crash value tracking.
|
||||
|
||||
On D34285 there is some discussion of zero-extend handling because it
|
||||
isn't necessary. First, the new load size covers all of the non-undef
|
||||
(ie, possibly initialized) bits. This may even extend past the original
|
||||
alloca if loading those bits could produce valid data. The only way its
|
||||
valid for us to zero-extend an integer load in SROA is if the original
|
||||
code had a zero extend or those bits were undef. And we get to assume
|
||||
things like undef *never* satifies nonnull, so non undef bits can
|
||||
participate here. No need to special case the zero-extend handling, it
|
||||
just falls out correctly.
|
||||
|
||||
The original credit goes to Ariel Ben-Yehuda! I'm mostly landing this to
|
||||
save a few rounds of trivial edits fixing style issues and test case
|
||||
formulation.
|
||||
|
||||
Differental Revision: D34285
|
||||
|
||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306379 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||
|
||||
--- a/lib/Transforms/Scalar/SROA.cpp
|
||||
+++ b/lib/Transforms/Scalar/SROA.cpp
|
||||
@@ -2388,9 +2388,20 @@
|
||||
if (LI.isVolatile())
|
||||
NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
|
||||
|
||||
+ // Any !nonnull metadata or !range metadata on the old load is also valid
|
||||
+ // on the new load. This is even true in some cases even when the loads
|
||||
+ // are different types, for example by mapping !nonnull metadata to
|
||||
+ // !range metadata by modeling the null pointer constant converted to the
|
||||
+ // integer type.
|
||||
+ // FIXME: Add support for range metadata here. Currently the utilities
|
||||
+ // for this don't propagate range metadata in trivial cases from one
|
||||
+ // integer load to another, don't handle non-addrspace-0 null pointers
|
||||
+ // correctly, and don't have any support for mapping ranges as the
|
||||
+ // integer type becomes winder or narrower.
|
||||
+ if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
|
||||
+ copyNonnullMetadata(LI, N, *NewLI);
|
||||
+
|
||||
// Try to preserve nonnull metadata
|
||||
- if (TargetTy->isPointerTy())
|
||||
- NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
|
||||
V = NewLI;
|
||||
|
||||
// If this is an integer load past the end of the slice (which means the
|
||||
--- a/test/Transforms/SROA/preserve-nonnull.ll
|
||||
+++ b/test/Transforms/SROA/preserve-nonnull.ll
|
||||
@@ -42,4 +42,51 @@
|
||||
ret float* %ret
|
||||
}
|
||||
|
||||
+; Make sure we properly handle the !nonnull attribute when we convert
|
||||
+; a pointer load to an integer load.
|
||||
+; FIXME: While this doesn't do anythnig actively harmful today, it really
|
||||
+; should propagate the !nonnull metadata to range metadata. The irony is, it
|
||||
+; *does* initially, but then we lose that !range metadata before we finish
|
||||
+; SROA.
|
||||
+define i8* @propagate_nonnull_to_int() {
|
||||
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
|
||||
+; CHECK-NEXT: entry:
|
||||
+; CHECK-NEXT: %[[A:.*]] = alloca i64
|
||||
+; CHECK-NEXT: store i64 42, i64* %[[A]]
|
||||
+; CHECK-NEXT: %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
|
||||
+; CHECK-NEXT: %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
|
||||
+; CHECK-NEXT: ret i8* %[[CAST]]
|
||||
+entry:
|
||||
+ %a = alloca [2 x i8*]
|
||||
+ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
|
||||
+ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
|
||||
+ %a.gep0.cast = bitcast i8** %a.gep0 to i64*
|
||||
+ %a.gep1.cast = bitcast i8** %a.gep1 to i64*
|
||||
+ store i64 42, i64* %a.gep1.cast
|
||||
+ store i64 0, i64* %a.gep0.cast
|
||||
+ %load = load volatile i8*, i8** %a.gep1, !nonnull !0
|
||||
+ ret i8* %load
|
||||
+}
|
||||
+
|
||||
+; Make sure we properly handle the !nonnull attribute when we convert
|
||||
+; a pointer load to an integer load and immediately promote it to an SSA
|
||||
+; register. This can fail in interesting ways due to the rewrite iteration of
|
||||
+; SROA, resulting in PR32902.
|
||||
+define i8* @propagate_nonnull_to_int_and_promote() {
|
||||
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
|
||||
+; CHECK-NEXT: entry:
|
||||
+; CHECK-NEXT: %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
|
||||
+; CHECK-NEXT: ret i8* %[[PROMOTED_VALUE]]
|
||||
+entry:
|
||||
+ %a = alloca [2 x i8*], align 8
|
||||
+ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
|
||||
+ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
|
||||
+ %a.gep0.cast = bitcast i8** %a.gep0 to i64*
|
||||
+ %a.gep1.cast = bitcast i8** %a.gep1 to i64*
|
||||
+ store i64 42, i64* %a.gep1.cast
|
||||
+ store i64 0, i64* %a.gep0.cast
|
||||
+ %load = load i8*, i8** %a.gep1, align 8, !nonnull !0
|
||||
+ ret i8* %load
|
||||
+}
|
||||
+
|
||||
!0 = !{}
|
11
debian/patches/series
vendored
11
debian/patches/series
vendored
@ -38,7 +38,6 @@ disable-llvm-symbolizer-test.diff
|
||||
clang-tidy-run-bin.diff
|
||||
#bug-30342.diff
|
||||
fix-scan-view-path.diff
|
||||
#0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
|
||||
clang-fix-cmpxchg8-detection-on-i386.patch
|
||||
lldb-addversion-suffix-to-llvm-server-exec.patch
|
||||
lldb-missing-install.diff
|
||||
@ -49,3 +48,13 @@ add_symbols_versioning.patch
|
||||
ftfbs-gcc.diff
|
||||
pr81066.diff
|
||||
armhf-bitfield.diff
|
||||
# rust LLVM PR84, LLVM PR32488
|
||||
# This is actually Rust's backport of upstream RL305193 (which doesn't apply cleanly to LLVM 4)
|
||||
# https://github.com/rust-lang/llvm/commit/2b622a393ce
|
||||
rL305193-backport.diff
|
||||
# rust LLVM PR90, LLVM PR32902, PR31142
|
||||
rL298540.diff
|
||||
rL306267.diff
|
||||
rL306353.diff
|
||||
rL306358.diff
|
||||
rL306379.diff
|
||||
|
Loading…
Reference in New Issue
Block a user