mirror of
https://git.proxmox.com/git/llvm-toolchain
synced 2025-07-27 13:25:37 +00:00
* Non-maintainer upload.
* Backport some patches (originally from rust, and upstreamed) to fix two failing tests in rustc.
This commit is contained in:
parent
e784e58e94
commit
be94a5a691
8
debian/changelog
vendored
8
debian/changelog
vendored
@ -1,3 +1,11 @@
|
|||||||
|
llvm-toolchain-4.0 (1:4.0.1-7.1) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
|
* Non-maintainer upload.
|
||||||
|
* Backport some patches (originally from rust, and upstreamed) to fix two
|
||||||
|
failing tests in rustc.
|
||||||
|
|
||||||
|
-- Ximin Luo <infinity0@debian.org> Wed, 18 Oct 2017 15:28:20 +0200
|
||||||
|
|
||||||
llvm-toolchain-4.0 (1:4.0.1-7) unstable; urgency=medium
|
llvm-toolchain-4.0 (1:4.0.1-7) unstable; urgency=medium
|
||||||
|
|
||||||
* Force the deactivation of ocaml until the transition is done
|
* Force the deactivation of ocaml until the transition is done
|
||||||
|
@ -1,84 +0,0 @@
|
|||||||
From eee68eafa7e8e4ce996b49f5551636639a6c331a Mon Sep 17 00:00:00 2001
|
|
||||||
From: David Majnemer <david.majnemer@gmail.com>
|
|
||||||
Date: Mon, 29 Aug 2016 17:14:08 +0000
|
|
||||||
Subject: [PATCH 11/17] [SimplifyCFG] Hoisting invalidates metadata
|
|
||||||
|
|
||||||
We forgot to remove optimization metadata when performing hosting during
|
|
||||||
FoldTwoEntryPHINode.
|
|
||||||
|
|
||||||
This fixes PR29163.
|
|
||||||
|
|
||||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279980 91177308-0d34-0410-b5e6-96231b3b80d8
|
|
||||||
---
|
|
||||||
lib/Transforms/Utils/SimplifyCFG.cpp | 10 ++++++++--
|
|
||||||
test/Transforms/SimplifyCFG/PR29163.ll | 31 +++++++++++++++++++++++++++++++
|
|
||||||
2 files changed, 39 insertions(+), 2 deletions(-)
|
|
||||||
create mode 100644 test/Transforms/SimplifyCFG/PR29163.ll
|
|
||||||
|
|
||||||
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
|
|
||||||
index 0504646..c197317 100644
|
|
||||||
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
|
|
||||||
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
|
|
||||||
@@ -2024,14 +2024,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
|
|
||||||
|
|
||||||
// Move all 'aggressive' instructions, which are defined in the
|
|
||||||
// conditional parts of the if's up to the dominating block.
|
|
||||||
- if (IfBlock1)
|
|
||||||
+ if (IfBlock1) {
|
|
||||||
+ for (auto &I : *IfBlock1)
|
|
||||||
+ I.dropUnknownNonDebugMetadata();
|
|
||||||
DomBlock->getInstList().splice(InsertPt->getIterator(),
|
|
||||||
IfBlock1->getInstList(), IfBlock1->begin(),
|
|
||||||
IfBlock1->getTerminator()->getIterator());
|
|
||||||
- if (IfBlock2)
|
|
||||||
+ }
|
|
||||||
+ if (IfBlock2) {
|
|
||||||
+ for (auto &I : *IfBlock2)
|
|
||||||
+ I.dropUnknownNonDebugMetadata();
|
|
||||||
DomBlock->getInstList().splice(InsertPt->getIterator(),
|
|
||||||
IfBlock2->getInstList(), IfBlock2->begin(),
|
|
||||||
IfBlock2->getTerminator()->getIterator());
|
|
||||||
+ }
|
|
||||||
|
|
||||||
while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
|
|
||||||
// Change the PHI node into a select instruction.
|
|
||||||
diff --git a/test/Transforms/SimplifyCFG/PR29163.ll b/test/Transforms/SimplifyCFG/PR29163.ll
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..65f9090
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/test/Transforms/SimplifyCFG/PR29163.ll
|
|
||||||
@@ -0,0 +1,31 @@
|
|
||||||
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
|
|
||||||
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
|
||||||
+target triple = "x86_64-unknown-linux-gnu"
|
|
||||||
+
|
|
||||||
+@GV = external constant i64*
|
|
||||||
+
|
|
||||||
+define i64* @test1(i1 %cond, i8* %P) {
|
|
||||||
+entry:
|
|
||||||
+ br i1 %cond, label %if, label %then
|
|
||||||
+
|
|
||||||
+then:
|
|
||||||
+ %bc = bitcast i8* %P to i64*
|
|
||||||
+ br label %join
|
|
||||||
+
|
|
||||||
+if:
|
|
||||||
+ %load = load i64*, i64** @GV, align 8, !dereferenceable !0
|
|
||||||
+ br label %join
|
|
||||||
+
|
|
||||||
+join:
|
|
||||||
+ %phi = phi i64* [ %bc, %then ], [ %load, %if ]
|
|
||||||
+ ret i64* %phi
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+; CHECK-LABEL: define i64* @test1(
|
|
||||||
+; CHECK: %[[bc:.*]] = bitcast i8* %P to i64*
|
|
||||||
+; CHECK: %[[load:.*]] = load i64*, i64** @GV, align 8{{$}}
|
|
||||||
+; CHECK: %[[phi:.*]] = select i1 %cond, i64* %[[load]], i64* %[[bc]]
|
|
||||||
+; CHECK: ret i64* %[[phi]]
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+!0 = !{i64 8}
|
|
||||||
--
|
|
||||||
2.10.1
|
|
||||||
|
|
292
debian/patches/rL298540.diff
vendored
Normal file
292
debian/patches/rL298540.diff
vendored
Normal file
@ -0,0 +1,292 @@
|
|||||||
|
commit 2b66aeed20b61d42cd2782b94d5419c6faca49a1
|
||||||
|
Author: Luqman Aden <me@luqman.ca>
|
||||||
|
Date: Wed Mar 22 19:16:39 2017 +0000
|
||||||
|
|
||||||
|
Preserve nonnull metadata on Loads through SROA & mem2reg.
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
https://llvm.org/bugs/show_bug.cgi?id=31142 :
|
||||||
|
|
||||||
|
SROA was dropping the nonnull metadata on loads from allocas that got optimized out. This patch simply preserves nonnull metadata on loads through SROA and mem2reg.
|
||||||
|
|
||||||
|
Reviewers: chandlerc, efriedma
|
||||||
|
|
||||||
|
Reviewed By: efriedma
|
||||||
|
|
||||||
|
Subscribers: hfinkel, spatel, efriedma, arielb1, davide, llvm-commits
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D27114
|
||||||
|
|
||||||
|
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298540 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||||
|
|
||||||
|
--- a/lib/Transforms/Scalar/SROA.cpp
|
||||||
|
+++ b/lib/Transforms/Scalar/SROA.cpp
|
||||||
|
@@ -2387,6 +2387,10 @@
|
||||||
|
LI.isVolatile(), LI.getName());
|
||||||
|
if (LI.isVolatile())
|
||||||
|
NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
|
||||||
|
+
|
||||||
|
+ // Try to preserve nonnull metadata
|
||||||
|
+ if (TargetTy->isPointerTy())
|
||||||
|
+ NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
|
||||||
|
V = NewLI;
|
||||||
|
|
||||||
|
// If this is an integer load past the end of the slice (which means the
|
||||||
|
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
|
||||||
|
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
|
||||||
|
@@ -15,7 +15,6 @@
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
|
||||||
|
#include "llvm/ADT/ArrayRef.h"
|
||||||
|
#include "llvm/ADT/DenseMap.h"
|
||||||
|
#include "llvm/ADT/STLExtras.h"
|
||||||
|
@@ -23,6 +22,7 @@
|
||||||
|
#include "llvm/ADT/SmallVector.h"
|
||||||
|
#include "llvm/ADT/Statistic.h"
|
||||||
|
#include "llvm/Analysis/AliasSetTracker.h"
|
||||||
|
+#include "llvm/Analysis/AssumptionCache.h"
|
||||||
|
#include "llvm/Analysis/InstructionSimplify.h"
|
||||||
|
#include "llvm/Analysis/IteratedDominanceFrontier.h"
|
||||||
|
#include "llvm/Analysis/ValueTracking.h"
|
||||||
|
@@ -38,6 +38,7 @@
|
||||||
|
#include "llvm/IR/Metadata.h"
|
||||||
|
#include "llvm/IR/Module.h"
|
||||||
|
#include "llvm/Transforms/Utils/Local.h"
|
||||||
|
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
|
||||||
|
#include <algorithm>
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
@@ -301,6 +302,18 @@
|
||||||
|
|
||||||
|
} // end of anonymous namespace
|
||||||
|
|
||||||
|
+/// Given a LoadInst LI this adds assume(LI != null) after it.
|
||||||
|
+static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
|
||||||
|
+ Function *AssumeIntrinsic =
|
||||||
|
+ Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
|
||||||
|
+ ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
|
||||||
|
+ Constant::getNullValue(LI->getType()));
|
||||||
|
+ LoadNotNull->insertAfter(LI);
|
||||||
|
+ CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
|
||||||
|
+ CI->insertAfter(LoadNotNull);
|
||||||
|
+ AC->registerAssumption(CI);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
|
||||||
|
// Knowing that this alloca is promotable, we know that it's safe to kill all
|
||||||
|
// instructions except for load and store.
|
||||||
|
@@ -334,9 +347,9 @@
|
||||||
|
/// and thus must be phi-ed with undef. We fall back to the standard alloca
|
||||||
|
/// promotion algorithm in that case.
|
||||||
|
static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
|
||||||
|
- LargeBlockInfo &LBI,
|
||||||
|
- DominatorTree &DT,
|
||||||
|
- AliasSetTracker *AST) {
|
||||||
|
+ LargeBlockInfo &LBI, DominatorTree &DT,
|
||||||
|
+ AliasSetTracker *AST,
|
||||||
|
+ AssumptionCache *AC) {
|
||||||
|
StoreInst *OnlyStore = Info.OnlyStore;
|
||||||
|
bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
|
||||||
|
BasicBlock *StoreBB = OnlyStore->getParent();
|
||||||
|
@@ -387,6 +400,14 @@
|
||||||
|
// code.
|
||||||
|
if (ReplVal == LI)
|
||||||
|
ReplVal = UndefValue::get(LI->getType());
|
||||||
|
+
|
||||||
|
+ // If the load was marked as nonnull we don't want to lose
|
||||||
|
+ // that information when we erase this Load. So we preserve
|
||||||
|
+ // it with an assume.
|
||||||
|
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
|
||||||
|
+ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
|
||||||
|
+ addAssumeNonNull(AC, LI);
|
||||||
|
+
|
||||||
|
LI->replaceAllUsesWith(ReplVal);
|
||||||
|
if (AST && LI->getType()->isPointerTy())
|
||||||
|
AST->deleteValue(LI);
|
||||||
|
@@ -435,7 +456,9 @@
|
||||||
|
/// }
|
||||||
|
static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
|
||||||
|
LargeBlockInfo &LBI,
|
||||||
|
- AliasSetTracker *AST) {
|
||||||
|
+ AliasSetTracker *AST,
|
||||||
|
+ DominatorTree &DT,
|
||||||
|
+ AssumptionCache *AC) {
|
||||||
|
// The trickiest case to handle is when we have large blocks. Because of this,
|
||||||
|
// this code is optimized assuming that large blocks happen. This does not
|
||||||
|
// significantly pessimize the small block case. This uses LargeBlockInfo to
|
||||||
|
@@ -476,10 +499,17 @@
|
||||||
|
// There is no store before this load, bail out (load may be affected
|
||||||
|
// by the following stores - see main comment).
|
||||||
|
return false;
|
||||||
|
- }
|
||||||
|
- else
|
||||||
|
+ } else {
|
||||||
|
// Otherwise, there was a store before this load, the load takes its value.
|
||||||
|
- LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
|
||||||
|
+ // Note, if the load was marked as nonnull we don't want to lose that
|
||||||
|
+ // information when we erase it. So we preserve it with an assume.
|
||||||
|
+ Value *ReplVal = std::prev(I)->second->getOperand(0);
|
||||||
|
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
|
||||||
|
+ !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
|
||||||
|
+ addAssumeNonNull(AC, LI);
|
||||||
|
+
|
||||||
|
+ LI->replaceAllUsesWith(ReplVal);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
if (AST && LI->getType()->isPointerTy())
|
||||||
|
AST->deleteValue(LI);
|
||||||
|
@@ -553,7 +583,7 @@
|
||||||
|
// If there is only a single store to this value, replace any loads of
|
||||||
|
// it that are directly dominated by the definition with the value stored.
|
||||||
|
if (Info.DefiningBlocks.size() == 1) {
|
||||||
|
- if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
|
||||||
|
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST, AC)) {
|
||||||
|
// The alloca has been processed, move on.
|
||||||
|
RemoveFromAllocasList(AllocaNum);
|
||||||
|
++NumSingleStore;
|
||||||
|
@@ -564,7 +594,7 @@
|
||||||
|
// If the alloca is only read and written in one basic block, just perform a
|
||||||
|
// linear sweep over the block to eliminate it.
|
||||||
|
if (Info.OnlyUsedInOneBlock &&
|
||||||
|
- promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
|
||||||
|
+ promoteSingleBlockAlloca(AI, Info, LBI, AST, DT, AC)) {
|
||||||
|
// The alloca has been processed, move on.
|
||||||
|
RemoveFromAllocasList(AllocaNum);
|
||||||
|
continue;
|
||||||
|
@@ -940,6 +970,13 @@
|
||||||
|
|
||||||
|
Value *V = IncomingVals[AI->second];
|
||||||
|
|
||||||
|
+ // If the load was marked as nonnull we don't want to lose
|
||||||
|
+ // that information when we erase this Load. So we preserve
|
||||||
|
+ // it with an assume.
|
||||||
|
+ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
|
||||||
|
+ !llvm::isKnownNonNullAt(V, LI, &DT))
|
||||||
|
+ addAssumeNonNull(AC, LI);
|
||||||
|
+
|
||||||
|
// Anything using the load now uses the current value.
|
||||||
|
LI->replaceAllUsesWith(V);
|
||||||
|
if (AST && LI->getType()->isPointerTy())
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll
|
||||||
|
@@ -0,0 +1,89 @@
|
||||||
|
+; RUN: opt < %s -mem2reg -S | FileCheck %s
|
||||||
|
+
|
||||||
|
+; This tests that mem2reg preserves the !nonnull metadata on loads
|
||||||
|
+; from allocas that get optimized out.
|
||||||
|
+
|
||||||
|
+; Check the case where the alloca in question has a single store.
|
||||||
|
+define float* @single_store(float** %arg) {
|
||||||
|
+; CHECK-LABEL: define float* @single_store
|
||||||
|
+; CHECK: %arg.load = load float*, float** %arg, align 8
|
||||||
|
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
|
||||||
|
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
|
||||||
|
+; CHECK: ret float* %arg.load
|
||||||
|
+entry:
|
||||||
|
+ %buf = alloca float*
|
||||||
|
+ %arg.load = load float*, float** %arg, align 8
|
||||||
|
+ store float* %arg.load, float** %buf, align 8
|
||||||
|
+ %buf.load = load float*, float **%buf, !nonnull !0
|
||||||
|
+ ret float* %buf.load
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+; Check the case where the alloca in question has more than one
|
||||||
|
+; store but still within one basic block.
|
||||||
|
+define float* @single_block(float** %arg) {
|
||||||
|
+; CHECK-LABEL: define float* @single_block
|
||||||
|
+; CHECK: %arg.load = load float*, float** %arg, align 8
|
||||||
|
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
|
||||||
|
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
|
||||||
|
+; CHECK: ret float* %arg.load
|
||||||
|
+entry:
|
||||||
|
+ %buf = alloca float*
|
||||||
|
+ %arg.load = load float*, float** %arg, align 8
|
||||||
|
+ store float* null, float** %buf, align 8
|
||||||
|
+ store float* %arg.load, float** %buf, align 8
|
||||||
|
+ %buf.load = load float*, float **%buf, !nonnull !0
|
||||||
|
+ ret float* %buf.load
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+; Check the case where the alloca in question has more than one
|
||||||
|
+; store and also reads ands writes in multiple blocks.
|
||||||
|
+define float* @multi_block(float** %arg) {
|
||||||
|
+; CHECK-LABEL: define float* @multi_block
|
||||||
|
+; CHECK-LABEL: entry:
|
||||||
|
+; CHECK: %arg.load = load float*, float** %arg, align 8
|
||||||
|
+; CHECK: br label %next
|
||||||
|
+; CHECK-LABEL: next:
|
||||||
|
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
|
||||||
|
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
|
||||||
|
+; CHECK: ret float* %arg.load
|
||||||
|
+entry:
|
||||||
|
+ %buf = alloca float*
|
||||||
|
+ %arg.load = load float*, float** %arg, align 8
|
||||||
|
+ store float* null, float** %buf, align 8
|
||||||
|
+ br label %next
|
||||||
|
+next:
|
||||||
|
+ store float* %arg.load, float** %buf, align 8
|
||||||
|
+ %buf.load = load float*, float** %buf, !nonnull !0
|
||||||
|
+ ret float* %buf.load
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+; Check that we don't add an assume if it's not
|
||||||
|
+; necessary i.e. the value is already implied to be nonnull
|
||||||
|
+define float* @no_assume(float** %arg) {
|
||||||
|
+; CHECK-LABEL: define float* @no_assume
|
||||||
|
+; CHECK-LABEL: entry:
|
||||||
|
+; CHECK: %arg.load = load float*, float** %arg, align 8
|
||||||
|
+; CHECK: %cn = icmp ne float* %arg.load, null
|
||||||
|
+; CHECK: br i1 %cn, label %next, label %fin
|
||||||
|
+; CHECK-LABEL: next:
|
||||||
|
+; CHECK-NOT: call void @llvm.assume
|
||||||
|
+; CHECK: ret float* %arg.load
|
||||||
|
+; CHECK-LABEL: fin:
|
||||||
|
+; CHECK: ret float* null
|
||||||
|
+entry:
|
||||||
|
+ %buf = alloca float*
|
||||||
|
+ %arg.load = load float*, float** %arg, align 8
|
||||||
|
+ %cn = icmp ne float* %arg.load, null
|
||||||
|
+ br i1 %cn, label %next, label %fin
|
||||||
|
+next:
|
||||||
|
+; At this point the above nonnull check ensures that
|
||||||
|
+; the value %arg.load is nonnull in this block and thus
|
||||||
|
+; we need not add the assume.
|
||||||
|
+ store float* %arg.load, float** %buf, align 8
|
||||||
|
+ %buf.load = load float*, float** %buf, !nonnull !0
|
||||||
|
+ ret float* %buf.load
|
||||||
|
+fin:
|
||||||
|
+ ret float* null
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+!0 = !{}
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/test/Transforms/SROA/preserve-nonnull.ll
|
||||||
|
@@ -0,0 +1,26 @@
|
||||||
|
+; RUN: opt < %s -sroa -S | FileCheck %s
|
||||||
|
+;
|
||||||
|
+; Make sure that SROA doesn't lose nonnull metadata
|
||||||
|
+; on loads from allocas that get optimized out.
|
||||||
|
+
|
||||||
|
+; CHECK-LABEL: define float* @yummy_nonnull
|
||||||
|
+; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
|
||||||
|
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
|
||||||
|
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
|
||||||
|
+; CHECK: ret float* {{.*}}[[RETURN]]
|
||||||
|
+
|
||||||
|
+define float* @yummy_nonnull(float** %arg) {
|
||||||
|
+entry-block:
|
||||||
|
+ %buf = alloca float*
|
||||||
|
+
|
||||||
|
+ %_arg_i8 = bitcast float** %arg to i8*
|
||||||
|
+ %_buf_i8 = bitcast float** %buf to i8*
|
||||||
|
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
|
||||||
|
+
|
||||||
|
+ %ret = load float*, float** %buf, align 8, !nonnull !0
|
||||||
|
+ ret float* %ret
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
||||||
|
+
|
||||||
|
+!0 = !{}
|
442
debian/patches/rL305193-backport.diff
vendored
Normal file
442
debian/patches/rL305193-backport.diff
vendored
Normal file
@ -0,0 +1,442 @@
|
|||||||
|
commit 2b622a393ce80c6157d32a50bf67d6b830729469
|
||||||
|
Author: Than McIntosh <thanm@google.com>
|
||||||
|
Date: Mon Jun 12 14:56:02 2017 +0000
|
||||||
|
|
||||||
|
StackColoring: smarter check for slot overlap
|
||||||
|
|
||||||
|
Summary:
|
||||||
|
The old check for slot overlap treated 2 slots `S` and `T` as
|
||||||
|
overlapping if there existed a CFG node in which both of the slots could
|
||||||
|
possibly be active. That is overly conservative and caused stack blowups
|
||||||
|
in Rust programs. Instead, check whether there is a single CFG node in
|
||||||
|
which both of the slots are possibly active *together*.
|
||||||
|
|
||||||
|
Fixes PR32488.
|
||||||
|
|
||||||
|
Patch by Ariel Ben-Yehuda <ariel.byd@gmail.com>
|
||||||
|
|
||||||
|
Reviewers: thanm, nagisa, llvm-commits, efriedma, rnk
|
||||||
|
|
||||||
|
Reviewed By: thanm
|
||||||
|
|
||||||
|
Subscribers: dotdash
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D31583
|
||||||
|
|
||||||
|
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305193 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||||
|
|
||||||
|
--- a/lib/CodeGen/StackColoring.cpp
|
||||||
|
+++ b/lib/CodeGen/StackColoring.cpp
|
||||||
|
@@ -87,10 +87,134 @@
|
||||||
|
STATISTIC(StackSlotMerged, "Number of stack slot merged.");
|
||||||
|
STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
|
||||||
|
|
||||||
|
+//===----------------------------------------------------------------------===//
|
||||||
|
+// StackColoring Pass
|
||||||
|
+//===----------------------------------------------------------------------===//
|
||||||
|
+//
|
||||||
|
+// Stack Coloring reduces stack usage by merging stack slots when they
|
||||||
|
+// can't be used together. For example, consider the following C program:
|
||||||
|
+//
|
||||||
|
+// void bar(char *, int);
|
||||||
|
+// void foo(bool var) {
|
||||||
|
+// A: {
|
||||||
|
+// char z[4096];
|
||||||
|
+// bar(z, 0);
|
||||||
|
+// }
|
||||||
|
+//
|
||||||
|
+// char *p;
|
||||||
|
+// char x[4096];
|
||||||
|
+// char y[4096];
|
||||||
|
+// if (var) {
|
||||||
|
+// p = x;
|
||||||
|
+// } else {
|
||||||
|
+// bar(y, 1);
|
||||||
|
+// p = y + 1024;
|
||||||
|
+// }
|
||||||
|
+// B:
|
||||||
|
+// bar(p, 2);
|
||||||
|
+// }
|
||||||
|
+//
|
||||||
|
+// Naively-compiled, this program would use 12k of stack space. However, the
|
||||||
|
+// stack slot corresponding to `z` is always destroyed before either of the
|
||||||
|
+// stack slots for `x` or `y` are used, and then `x` is only used if `var`
|
||||||
|
+// is true, while `y` is only used if `var` is false. So in no time are 2
|
||||||
|
+// of the stack slots used together, and therefore we can merge them,
|
||||||
|
+// compiling the function using only a single 4k alloca:
|
||||||
|
+//
|
||||||
|
+// void foo(bool var) { // equivalent
|
||||||
|
+// char x[4096];
|
||||||
|
+// char *p;
|
||||||
|
+// bar(x, 0);
|
||||||
|
+// if (var) {
|
||||||
|
+// p = x;
|
||||||
|
+// } else {
|
||||||
|
+// bar(x, 1);
|
||||||
|
+// p = x + 1024;
|
||||||
|
+// }
|
||||||
|
+// bar(p, 2);
|
||||||
|
+// }
|
||||||
|
+//
|
||||||
|
+// This is an important optimization if we want stack space to be under
|
||||||
|
+// control in large functions, both open-coded ones and ones created by
|
||||||
|
+// inlining.
|
||||||
|
//
|
||||||
|
// Implementation Notes:
|
||||||
|
// ---------------------
|
||||||
|
//
|
||||||
|
+// An important part of the above reasoning is that `z` can't be accessed
|
||||||
|
+// while the latter 2 calls to `bar` are running. This is justified because
|
||||||
|
+// `z`'s lifetime is over after we exit from block `A:`, so any further
|
||||||
|
+// accesses to it would be UB. The way we represent this information
|
||||||
|
+// in LLVM is by having frontends delimit blocks with `lifetime.start`
|
||||||
|
+// and `lifetime.end` intrinsics.
|
||||||
|
+//
|
||||||
|
+// The effect of these intrinsics seems to be as follows (maybe I should
|
||||||
|
+// specify this in the reference?):
|
||||||
|
+//
|
||||||
|
+// L1) at start, each stack-slot is marked as *out-of-scope*, unless no
|
||||||
|
+// lifetime intrinsic refers to that stack slot, in which case
|
||||||
|
+// it is marked as *in-scope*.
|
||||||
|
+// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
|
||||||
|
+// the stack slot is overwritten with `undef`.
|
||||||
|
+// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
|
||||||
|
+// L4) on function exit, all stack slots are marked as *out-of-scope*.
|
||||||
|
+// L5) `lifetime.end` is a no-op when called on a slot that is already
|
||||||
|
+// *out-of-scope*.
|
||||||
|
+// L6) memory accesses to *out-of-scope* stack slots are UB.
|
||||||
|
+// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
|
||||||
|
+// are invalidated, unless the slot is "degenerate". This is used to
|
||||||
|
+// justify not marking slots as in-use until the pointer to them is
|
||||||
|
+// used, but feels a bit hacky in the presence of things like LICM. See
|
||||||
|
+// the "Degenerate Slots" section for more details.
|
||||||
|
+//
|
||||||
|
+// Now, let's ground stack coloring on these rules. We'll define a slot
|
||||||
|
+// as *in-use* at a (dynamic) point in execution if it either can be
|
||||||
|
+// written to at that point, or if it has a live and non-undef content
|
||||||
|
+// at that point.
|
||||||
|
+//
|
||||||
|
+// Obviously, slots that are never *in-use* together can be merged, and
|
||||||
|
+// in our example `foo`, the slots for `x`, `y` and `z` are never
|
||||||
|
+// in-use together (of course, sometimes slots that *are* in-use together
|
||||||
|
+// might still be mergable, but we don't care about that here).
|
||||||
|
+//
|
||||||
|
+// In this implementation, we successively merge pairs of slots that are
|
||||||
|
+// not *in-use* together. We could be smarter - for example, we could merge
|
||||||
|
+// a single large slot with 2 small slots, or we could construct the
|
||||||
|
+// interference graph and run a "smart" graph coloring algorithm, but with
|
||||||
|
+// that aside, how do we find out whether a pair of slots might be *in-use*
|
||||||
|
+// together?
|
||||||
|
+//
|
||||||
|
+// From our rules, we see that *out-of-scope* slots are never *in-use*,
|
||||||
|
+// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
|
||||||
|
+// until their address is taken. Therefore, we can approximate slot activity
|
||||||
|
+// using dataflow.
|
||||||
|
+//
|
||||||
|
+// A subtle point: naively, we might try to figure out which pairs of
|
||||||
|
+// stack-slots interfere by propagating `S in-use` through the CFG for every
|
||||||
|
+// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
|
||||||
|
+// which they are both *in-use*.
|
||||||
|
+//
|
||||||
|
+// That is sound, but overly conservative in some cases: in our (artificial)
|
||||||
|
+// example `foo`, either `x` or `y` might be in use at the label `B:`, but
|
||||||
|
+// as `x` is only in use if we came in from the `var` edge and `y` only
|
||||||
|
+// if we came from the `!var` edge, they still can't be in use together.
|
||||||
|
+// See PR32488 for an important real-life case.
|
||||||
|
+//
|
||||||
|
+// If we wanted to find all points of interference precisely, we could
|
||||||
|
+// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
|
||||||
|
+// would be precise, but requires propagating `O(n^2)` dataflow facts.
|
||||||
|
+//
|
||||||
|
+// However, we aren't interested in the *set* of points of interference
|
||||||
|
+// between 2 stack slots, only *whether* there *is* such a point. So we
|
||||||
|
+// can rely on a little trick: for `S` and `T` to be in-use together,
|
||||||
|
+// one of them needs to become in-use while the other is in-use (or
|
||||||
|
+// they might both become in use simultaneously). We can check this
|
||||||
|
+// by also keeping track of the points at which a stack slot might *start*
|
||||||
|
+// being in-use.
|
||||||
|
+//
|
||||||
|
+// Exact first use:
|
||||||
|
+// ----------------
|
||||||
|
+//
|
||||||
|
// Consider the following motivating example:
|
||||||
|
//
|
||||||
|
// int foo() {
|
||||||
|
@@ -159,6 +283,9 @@
|
||||||
|
// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
|
||||||
|
// byte stack (better).
|
||||||
|
//
|
||||||
|
+// Degenerate Slots:
|
||||||
|
+// -----------------
|
||||||
|
+//
|
||||||
|
// Relying entirely on first-use of stack slots is problematic,
|
||||||
|
// however, due to the fact that optimizations can sometimes migrate
|
||||||
|
// uses of a variable outside of its lifetime start/end region. Here
|
||||||
|
@@ -238,10 +365,6 @@
|
||||||
|
// for "b" then it will appear that 'b' has a degenerate lifetime.
|
||||||
|
//
|
||||||
|
|
||||||
|
-//===----------------------------------------------------------------------===//
|
||||||
|
-// StackColoring Pass
|
||||||
|
-//===----------------------------------------------------------------------===//
|
||||||
|
-
|
||||||
|
namespace {
|
||||||
|
/// StackColoring - A machine pass for merging disjoint stack allocations,
|
||||||
|
/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
|
||||||
|
@@ -272,8 +395,11 @@
|
||||||
|
/// Maps basic blocks to a serial number.
|
||||||
|
SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
|
||||||
|
|
||||||
|
- /// Maps liveness intervals for each slot.
|
||||||
|
+ /// Maps slots to their use interval. Outside of this interval, slots
|
||||||
|
+ /// values are either dead or `undef` and they will not be written to.
|
||||||
|
SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
|
||||||
|
+ /// Maps slots to the points where they can become in-use.
|
||||||
|
+ SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
|
||||||
|
/// VNInfo is used for the construction of LiveIntervals.
|
||||||
|
VNInfo::Allocator VNInfoAllocator;
|
||||||
|
/// SlotIndex analysis object.
|
||||||
|
@@ -676,15 +802,22 @@
|
||||||
|
|
||||||
|
void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
|
||||||
|
SmallVector<SlotIndex, 16> Starts;
|
||||||
|
- SmallVector<SlotIndex, 16> Finishes;
|
||||||
|
+ SmallVector<bool, 16> DefinitelyInUse;
|
||||||
|
|
||||||
|
// For each block, find which slots are active within this block
|
||||||
|
// and update the live intervals.
|
||||||
|
for (const MachineBasicBlock &MBB : *MF) {
|
||||||
|
Starts.clear();
|
||||||
|
Starts.resize(NumSlots);
|
||||||
|
- Finishes.clear();
|
||||||
|
- Finishes.resize(NumSlots);
|
||||||
|
+ DefinitelyInUse.clear();
|
||||||
|
+ DefinitelyInUse.resize(NumSlots);
|
||||||
|
+
|
||||||
|
+ // Start the interval of the slots that we previously found to be 'in-use'.
|
||||||
|
+ BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
|
||||||
|
+ for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
|
||||||
|
+ pos = MBBLiveness.LiveIn.find_next(pos)) {
|
||||||
|
+ Starts[pos] = Indexes->getMBBStartIdx(&MBB);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
// Create the interval for the basic blocks containing lifetime begin/end.
|
||||||
|
for (const MachineInstr &MI : MBB) {
|
||||||
|
@@ -696,68 +829,35 @@
|
||||||
|
SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
|
||||||
|
for (auto Slot : slots) {
|
||||||
|
if (IsStart) {
|
||||||
|
- if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
|
||||||
|
+ // If a slot is already definitely in use, we don't have to emit
|
||||||
|
+ // a new start marker because there is already a pre-existing
|
||||||
|
+ // one.
|
||||||
|
+ if (!DefinitelyInUse[Slot]) {
|
||||||
|
+ LiveStarts[Slot].push_back(ThisIndex);
|
||||||
|
+ DefinitelyInUse[Slot] = true;
|
||||||
|
+ }
|
||||||
|
+ if (!Starts[Slot].isValid())
|
||||||
|
Starts[Slot] = ThisIndex;
|
||||||
|
} else {
|
||||||
|
- if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
|
||||||
|
- Finishes[Slot] = ThisIndex;
|
||||||
|
+ if (Starts[Slot].isValid()) {
|
||||||
|
+ VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
|
||||||
|
+ Intervals[Slot]->addSegment(
|
||||||
|
+ LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
|
||||||
|
+ Starts[Slot] = SlotIndex(); // Invalidate the start index
|
||||||
|
+ DefinitelyInUse[Slot] = false;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- // Create the interval of the blocks that we previously found to be 'alive'.
|
||||||
|
- BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
|
||||||
|
- for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
|
||||||
|
- pos = MBBLiveness.LiveIn.find_next(pos)) {
|
||||||
|
- Starts[pos] = Indexes->getMBBStartIdx(&MBB);
|
||||||
|
- }
|
||||||
|
- for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
|
||||||
|
- pos = MBBLiveness.LiveOut.find_next(pos)) {
|
||||||
|
- Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
+ // Finish up started segments
|
||||||
|
for (unsigned i = 0; i < NumSlots; ++i) {
|
||||||
|
- //
|
||||||
|
- // When LifetimeStartOnFirstUse is turned on, data flow analysis
|
||||||
|
- // is forward (from starts to ends), not bidirectional. A
|
||||||
|
- // consequence of this is that we can wind up in situations
|
||||||
|
- // where Starts[i] is invalid but Finishes[i] is valid and vice
|
||||||
|
- // versa. Example:
|
||||||
|
- //
|
||||||
|
- // LIFETIME_START x
|
||||||
|
- // if (...) {
|
||||||
|
- // <use of x>
|
||||||
|
- // throw ...;
|
||||||
|
- // }
|
||||||
|
- // LIFETIME_END x
|
||||||
|
- // return 2;
|
||||||
|
- //
|
||||||
|
- //
|
||||||
|
- // Here the slot for "x" will not be live into the block
|
||||||
|
- // containing the "return 2" (since lifetimes start with first
|
||||||
|
- // use, not at the dominating LIFETIME_START marker).
|
||||||
|
- //
|
||||||
|
- if (Starts[i].isValid() && !Finishes[i].isValid()) {
|
||||||
|
- Finishes[i] = Indexes->getMBBEndIdx(&MBB);
|
||||||
|
- }
|
||||||
|
if (!Starts[i].isValid())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
- assert(Starts[i] && Finishes[i] && "Invalid interval");
|
||||||
|
- VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
|
||||||
|
- SlotIndex S = Starts[i];
|
||||||
|
- SlotIndex F = Finishes[i];
|
||||||
|
- if (S < F) {
|
||||||
|
- // We have a single consecutive region.
|
||||||
|
- Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
|
||||||
|
- } else {
|
||||||
|
- // We have two non-consecutive regions. This happens when
|
||||||
|
- // LIFETIME_START appears after the LIFETIME_END marker.
|
||||||
|
- SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
|
||||||
|
- SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
|
||||||
|
- Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
|
||||||
|
- Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
|
||||||
|
- }
|
||||||
|
+ SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
|
||||||
|
+ VNInfo *VNI = Intervals[i]->getValNumInfo(0);
|
||||||
|
+ Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -987,6 +1087,7 @@
|
||||||
|
BasicBlockNumbering.clear();
|
||||||
|
Markers.clear();
|
||||||
|
Intervals.clear();
|
||||||
|
+ LiveStarts.clear();
|
||||||
|
VNInfoAllocator.Reset();
|
||||||
|
|
||||||
|
unsigned NumSlots = MFI->getObjectIndexEnd();
|
||||||
|
@@ -998,6 +1099,7 @@
|
||||||
|
SmallVector<int, 8> SortedSlots;
|
||||||
|
SortedSlots.reserve(NumSlots);
|
||||||
|
Intervals.reserve(NumSlots);
|
||||||
|
+ LiveStarts.resize(NumSlots);
|
||||||
|
|
||||||
|
unsigned NumMarkers = collectMarkers(NumSlots);
|
||||||
|
|
||||||
|
@@ -1069,6 +1171,9 @@
|
||||||
|
return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
|
||||||
|
});
|
||||||
|
|
||||||
|
+ for (auto &s : LiveStarts)
|
||||||
|
+ std::sort(s.begin(), s.end());
|
||||||
|
+
|
||||||
|
bool Changed = true;
|
||||||
|
while (Changed) {
|
||||||
|
Changed = false;
|
||||||
|
@@ -1084,12 +1189,22 @@
|
||||||
|
int SecondSlot = SortedSlots[J];
|
||||||
|
LiveInterval *First = &*Intervals[FirstSlot];
|
||||||
|
LiveInterval *Second = &*Intervals[SecondSlot];
|
||||||
|
+ auto &FirstS = LiveStarts[FirstSlot];
|
||||||
|
+ auto &SecondS = LiveStarts[SecondSlot];
|
||||||
|
assert (!First->empty() && !Second->empty() && "Found an empty range");
|
||||||
|
|
||||||
|
- // Merge disjoint slots.
|
||||||
|
- if (!First->overlaps(*Second)) {
|
||||||
|
+ // Merge disjoint slots. This is a little bit tricky - see the
|
||||||
|
+ // Implementation Notes section for an explanation.
|
||||||
|
+ if (!First->isLiveAtIndexes(SecondS) &&
|
||||||
|
+ !Second->isLiveAtIndexes(FirstS)) {
|
||||||
|
Changed = true;
|
||||||
|
First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
|
||||||
|
+
|
||||||
|
+ int OldSize = FirstS.size();
|
||||||
|
+ FirstS.append(SecondS.begin(), SecondS.end());
|
||||||
|
+ auto Mid = FirstS.begin() + OldSize;
|
||||||
|
+ std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
|
||||||
|
+
|
||||||
|
SlotRemap[SecondSlot] = FirstSlot;
|
||||||
|
SortedSlots[J] = -1;
|
||||||
|
DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
|
||||||
|
--- a/test/CodeGen/X86/StackColoring.ll
|
||||||
|
+++ b/test/CodeGen/X86/StackColoring.ll
|
||||||
|
@@ -582,12 +582,76 @@
|
||||||
|
ret i32 %x.addr.0
|
||||||
|
}
|
||||||
|
|
||||||
|
+;CHECK-LABEL: multi_segment:
|
||||||
|
+;YESCOLOR: subq $256, %rsp
|
||||||
|
+;NOFIRSTUSE: subq $256, %rsp
|
||||||
|
+;NOCOLOR: subq $512, %rsp
|
||||||
|
+define i1 @multi_segment(i1, i1)
|
||||||
|
+{
|
||||||
|
+entry-block:
|
||||||
|
+ %foo = alloca [32 x i64]
|
||||||
|
+ %bar = alloca [32 x i64]
|
||||||
|
+ %foo_i8 = bitcast [32 x i64]* %foo to i8*
|
||||||
|
+ %bar_i8 = bitcast [32 x i64]* %bar to i8*
|
||||||
|
+ call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
|
||||||
|
+ call void @baz([32 x i64]* %bar, i32 1)
|
||||||
|
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
|
||||||
|
+ call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
|
||||||
|
+ call void @baz([32 x i64]* %foo, i32 1)
|
||||||
|
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
|
||||||
|
+ call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
|
||||||
|
+ call void @baz([32 x i64]* %bar, i32 1)
|
||||||
|
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
|
||||||
|
+ ret i1 true
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+;CHECK-LABEL: pr32488:
|
||||||
|
+;YESCOLOR: subq $256, %rsp
|
||||||
|
+;NOFIRSTUSE: subq $256, %rsp
|
||||||
|
+;NOCOLOR: subq $512, %rsp
|
||||||
|
+define i1 @pr32488(i1, i1)
|
||||||
|
+{
|
||||||
|
+entry-block:
|
||||||
|
+ %foo = alloca [32 x i64]
|
||||||
|
+ %bar = alloca [32 x i64]
|
||||||
|
+ %foo_i8 = bitcast [32 x i64]* %foo to i8*
|
||||||
|
+ %bar_i8 = bitcast [32 x i64]* %bar to i8*
|
||||||
|
+ br i1 %0, label %if_false, label %if_true
|
||||||
|
+if_false:
|
||||||
|
+ call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
|
||||||
|
+ call void @baz([32 x i64]* %bar, i32 0)
|
||||||
|
+ br i1 %1, label %if_false.1, label %onerr
|
||||||
|
+if_false.1:
|
||||||
|
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
|
||||||
|
+ br label %merge
|
||||||
|
+if_true:
|
||||||
|
+ call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
|
||||||
|
+ call void @baz([32 x i64]* %foo, i32 1)
|
||||||
|
+ br i1 %1, label %if_true.1, label %onerr
|
||||||
|
+if_true.1:
|
||||||
|
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
|
||||||
|
+ br label %merge
|
||||||
|
+merge:
|
||||||
|
+ ret i1 false
|
||||||
|
+onerr:
|
||||||
|
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
|
||||||
|
+ call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
|
||||||
|
+ call void @destructor()
|
||||||
|
+ ret i1 true
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+%Data = type { [32 x i64] }
|
||||||
|
+
|
||||||
|
+declare void @destructor()
|
||||||
|
+
|
||||||
|
declare void @inita(i32*)
|
||||||
|
|
||||||
|
declare void @initb(i32*,i32*,i32*)
|
||||||
|
|
||||||
|
declare void @bar([100 x i32]* , [100 x i32]*) nounwind
|
||||||
|
|
||||||
|
+declare void @baz([32 x i64]*, i32)
|
||||||
|
+
|
||||||
|
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
|
||||||
|
|
||||||
|
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
|
187
debian/patches/rL306267.diff
vendored
Normal file
187
debian/patches/rL306267.diff
vendored
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
commit 5a057dc8edbb63887f8c611dd8ddf1b76997f07c
|
||||||
|
Author: Chandler Carruth <chandlerc@gmail.com>
|
||||||
|
Date: Mon Jun 26 03:31:31 2017 +0000
|
||||||
|
|
||||||
|
[InstCombine] Factor the logic for propagating !nonnull and !range
|
||||||
|
metadata out of InstCombine and into helpers.
|
||||||
|
|
||||||
|
NFC, this just exposes the logic used by InstCombine when propagating
|
||||||
|
metadata from one load instruction to another. The plan is to use this
|
||||||
|
in SROA to address PR32902.
|
||||||
|
|
||||||
|
If anyone has better ideas about how to factor this or name variables,
|
||||||
|
I'm all ears, but this seemed like a pretty good start and lets us make
|
||||||
|
progress on the PR.
|
||||||
|
|
||||||
|
This is based on a patch by Ariel Ben-Yehuda (D34285).
|
||||||
|
|
||||||
|
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306267 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||||
|
|
||||||
|
--- a/include/llvm/Transforms/Utils/Local.h
|
||||||
|
+++ b/include/llvm/Transforms/Utils/Local.h
|
||||||
|
@@ -366,6 +366,19 @@
|
||||||
|
/// during lowering by the GC infrastructure.
|
||||||
|
bool callsGCLeafFunction(ImmutableCallSite CS);
|
||||||
|
|
||||||
|
+/// Copy a nonnull metadata node to a new load instruction.
|
||||||
|
+///
|
||||||
|
+/// This handles mapping it to range metadata if the new load is an integer
|
||||||
|
+/// load instead of a pointer load.
|
||||||
|
+void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI);
|
||||||
|
+
|
||||||
|
+/// Copy a range metadata node to a new load instruction.
|
||||||
|
+///
|
||||||
|
+/// This handles mapping it to nonnull metadata if the new load is a pointer
|
||||||
|
+/// load instead of an integer load and the range doesn't cover null.
|
||||||
|
+void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N,
|
||||||
|
+ LoadInst &NewLI);
|
||||||
|
+
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// Intrinsic pattern matching
|
||||||
|
//
|
||||||
|
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
|
||||||
|
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
|
||||||
|
@@ -471,21 +471,7 @@
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LLVMContext::MD_nonnull:
|
||||||
|
- // This only directly applies if the new type is also a pointer.
|
||||||
|
- if (NewTy->isPointerTy()) {
|
||||||
|
- NewLoad->setMetadata(ID, N);
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
- // If it's integral now, translate it to !range metadata.
|
||||||
|
- if (NewTy->isIntegerTy()) {
|
||||||
|
- auto *ITy = cast<IntegerType>(NewTy);
|
||||||
|
- auto *NullInt = ConstantExpr::getPtrToInt(
|
||||||
|
- ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
|
||||||
|
- auto *NonNullInt =
|
||||||
|
- ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
|
||||||
|
- NewLoad->setMetadata(LLVMContext::MD_range,
|
||||||
|
- MDB.createRange(NonNullInt, NullInt));
|
||||||
|
- }
|
||||||
|
+ copyNonnullMetadata(LI, N, *NewLoad);
|
||||||
|
break;
|
||||||
|
case LLVMContext::MD_align:
|
||||||
|
case LLVMContext::MD_dereferenceable:
|
||||||
|
@@ -495,17 +481,7 @@
|
||||||
|
NewLoad->setMetadata(ID, N);
|
||||||
|
break;
|
||||||
|
case LLVMContext::MD_range:
|
||||||
|
- // FIXME: It would be nice to propagate this in some way, but the type
|
||||||
|
- // conversions make it hard.
|
||||||
|
-
|
||||||
|
- // If it's a pointer now and the range does not contain 0, make it !nonnull.
|
||||||
|
- if (NewTy->isPointerTy()) {
|
||||||
|
- unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy);
|
||||||
|
- if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
|
||||||
|
- MDNode *NN = MDNode::get(LI.getContext(), None);
|
||||||
|
- NewLoad->setMetadata(LLVMContext::MD_nonnull, NN);
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
+ copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--- a/lib/Transforms/Utils/Local.cpp
|
||||||
|
+++ b/lib/Transforms/Utils/Local.cpp
|
||||||
|
@@ -26,6 +26,7 @@
|
||||||
|
#include "llvm/Analysis/LazyValueInfo.h"
|
||||||
|
#include "llvm/Analysis/ValueTracking.h"
|
||||||
|
#include "llvm/IR/CFG.h"
|
||||||
|
+#include "llvm/IR/ConstantRange.h"
|
||||||
|
#include "llvm/IR/Constants.h"
|
||||||
|
#include "llvm/IR/DIBuilder.h"
|
||||||
|
#include "llvm/IR/DataLayout.h"
|
||||||
|
@@ -1069,7 +1070,7 @@
|
||||||
|
}
|
||||||
|
|
||||||
|
/// See if there is a dbg.value intrinsic for DIVar for the PHI node.
|
||||||
|
-static bool PhiHasDebugValue(DILocalVariable *DIVar,
|
||||||
|
+static bool PhiHasDebugValue(DILocalVariable *DIVar,
|
||||||
|
DIExpression *DIExpr,
|
||||||
|
PHINode *APN) {
|
||||||
|
// Since we can't guarantee that the original dbg.declare instrinsic
|
||||||
|
@@ -1152,7 +1153,7 @@
|
||||||
|
DbgValue->insertAfter(LI);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/// Inserts a llvm.dbg.value intrinsic after a phi
|
||||||
|
+/// Inserts a llvm.dbg.value intrinsic after a phi
|
||||||
|
/// that has an associated llvm.dbg.decl intrinsic.
|
||||||
|
void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
|
||||||
|
PHINode *APN, DIBuilder &Builder) {
|
||||||
|
@@ -1723,12 +1724,12 @@
|
||||||
|
// Preserve !invariant.group in K.
|
||||||
|
break;
|
||||||
|
case LLVMContext::MD_align:
|
||||||
|
- K->setMetadata(Kind,
|
||||||
|
+ K->setMetadata(Kind,
|
||||||
|
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
|
||||||
|
break;
|
||||||
|
case LLVMContext::MD_dereferenceable:
|
||||||
|
case LLVMContext::MD_dereferenceable_or_null:
|
||||||
|
- K->setMetadata(Kind,
|
||||||
|
+ K->setMetadata(Kind,
|
||||||
|
MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
@@ -1812,6 +1813,49 @@
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
+void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
|
||||||
|
+ LoadInst &NewLI) {
|
||||||
|
+ auto *NewTy = NewLI.getType();
|
||||||
|
+
|
||||||
|
+ // This only directly applies if the new type is also a pointer.
|
||||||
|
+ if (NewTy->isPointerTy()) {
|
||||||
|
+ NewLI.setMetadata(LLVMContext::MD_nonnull, N);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ // The only other translation we can do is to integral loads with !range
|
||||||
|
+ // metadata.
|
||||||
|
+ if (!NewTy->isIntegerTy())
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ MDBuilder MDB(NewLI.getContext());
|
||||||
|
+ const Value *Ptr = OldLI.getPointerOperand();
|
||||||
|
+ auto *ITy = cast<IntegerType>(NewTy);
|
||||||
|
+ auto *NullInt = ConstantExpr::getPtrToInt(
|
||||||
|
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
|
||||||
|
+ auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
|
||||||
|
+ NewLI.setMetadata(LLVMContext::MD_range,
|
||||||
|
+ MDB.createRange(NonNullInt, NullInt));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
|
||||||
|
+ MDNode *N, LoadInst &NewLI) {
|
||||||
|
+ auto *NewTy = NewLI.getType();
|
||||||
|
+
|
||||||
|
+ // Give up unless it is converted to a pointer where there is a single very
|
||||||
|
+ // valuable mapping we can do reliably.
|
||||||
|
+ // FIXME: It would be nice to propagate this in more ways, but the type
|
||||||
|
+ // conversions make it hard.
|
||||||
|
+ if (!NewTy->isPointerTy())
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
|
||||||
|
+ if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
|
||||||
|
+ MDNode *NN = MDNode::get(OldLI.getContext(), None);
|
||||||
|
+ NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
namespace {
|
||||||
|
/// A potential constituent of a bitreverse or bswap expression. See
|
||||||
|
/// collectBitParts for a fuller explanation.
|
||||||
|
@@ -1933,7 +1977,7 @@
|
||||||
|
unsigned NumMaskedBits = AndMask.countPopulation();
|
||||||
|
if (!MatchBitReversals && NumMaskedBits % 8 != 0)
|
||||||
|
return Result;
|
||||||
|
-
|
||||||
|
+
|
||||||
|
auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
|
||||||
|
MatchBitReversals, BPS);
|
||||||
|
if (!Res)
|
47
debian/patches/rL306353.diff
vendored
Normal file
47
debian/patches/rL306353.diff
vendored
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
commit 477bd758b48cb96477d3dd4cf3b36bf2706e8c1d
|
||||||
|
Author: Chandler Carruth <chandlerc@gmail.com>
|
||||||
|
Date: Tue Jun 27 02:23:15 2017 +0000
|
||||||
|
|
||||||
|
[SROA] Clean up a test case a bit prior to adding more testing for
|
||||||
|
nonnull as part of fixing PR32902.
|
||||||
|
|
||||||
|
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306353 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||||
|
|
||||||
|
--- a/test/Transforms/SROA/preserve-nonnull.ll
|
||||||
|
+++ b/test/Transforms/SROA/preserve-nonnull.ll
|
||||||
|
@@ -3,22 +3,20 @@
|
||||||
|
; Make sure that SROA doesn't lose nonnull metadata
|
||||||
|
; on loads from allocas that get optimized out.
|
||||||
|
|
||||||
|
-; CHECK-LABEL: define float* @yummy_nonnull
|
||||||
|
-; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
|
||||||
|
-; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
|
||||||
|
-; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
|
||||||
|
-; CHECK: ret float* {{.*}}[[RETURN]]
|
||||||
|
-
|
||||||
|
define float* @yummy_nonnull(float** %arg) {
|
||||||
|
-entry-block:
|
||||||
|
- %buf = alloca float*
|
||||||
|
-
|
||||||
|
- %_arg_i8 = bitcast float** %arg to i8*
|
||||||
|
- %_buf_i8 = bitcast float** %buf to i8*
|
||||||
|
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
|
||||||
|
-
|
||||||
|
- %ret = load float*, float** %buf, align 8, !nonnull !0
|
||||||
|
- ret float* %ret
|
||||||
|
+; CHECK-LABEL: define float* @yummy_nonnull(
|
||||||
|
+; CHECK-NEXT: entry:
|
||||||
|
+; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8
|
||||||
|
+; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
|
||||||
|
+; CHECK-NEXT: call void @llvm.assume(i1 %[[ASSUME]])
|
||||||
|
+; CHECK-NEXT: ret float* %[[RETURN]]
|
||||||
|
+entry:
|
||||||
|
+ %buf = alloca float*
|
||||||
|
+ %_arg_i8 = bitcast float** %arg to i8*
|
||||||
|
+ %_buf_i8 = bitcast float** %buf to i8*
|
||||||
|
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
|
||||||
|
+ %ret = load float*, float** %buf, align 8, !nonnull !0
|
||||||
|
+ ret float* %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
52
debian/patches/rL306358.diff
vendored
Normal file
52
debian/patches/rL306358.diff
vendored
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
commit 156cc49e505986a1659adaa3a0b5a070372377c8
|
||||||
|
Author: Chandler Carruth <chandlerc@gmail.com>
|
||||||
|
Date: Tue Jun 27 03:08:45 2017 +0000
|
||||||
|
|
||||||
|
[SROA] Further test cleanup and add a test for the actual propagation of
|
||||||
|
the nonnull attribute distinct from rewriting it into an assume.
|
||||||
|
|
||||||
|
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306358 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||||
|
|
||||||
|
--- a/test/Transforms/SROA/preserve-nonnull.ll
|
||||||
|
+++ b/test/Transforms/SROA/preserve-nonnull.ll
|
||||||
|
@@ -3,8 +3,31 @@
|
||||||
|
; Make sure that SROA doesn't lose nonnull metadata
|
||||||
|
; on loads from allocas that get optimized out.
|
||||||
|
|
||||||
|
-define float* @yummy_nonnull(float** %arg) {
|
||||||
|
-; CHECK-LABEL: define float* @yummy_nonnull(
|
||||||
|
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
||||||
|
+
|
||||||
|
+; Check that we do basic propagation of nonnull when rewriting.
|
||||||
|
+define i8* @propagate_nonnull(i32* %v) {
|
||||||
|
+; CHECK-LABEL: define i8* @propagate_nonnull(
|
||||||
|
+; CHECK-NEXT: entry:
|
||||||
|
+; CHECK-NEXT: %[[A:.*]] = alloca i8*
|
||||||
|
+; CHECK-NEXT: %[[V_CAST:.*]] = bitcast i32* %v to i8*
|
||||||
|
+; CHECK-NEXT: store i8* %[[V_CAST]], i8** %[[A]]
|
||||||
|
+; CHECK-NEXT: %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0
|
||||||
|
+; CHECK-NEXT: ret i8* %[[LOAD]]
|
||||||
|
+entry:
|
||||||
|
+ %a = alloca [2 x i8*]
|
||||||
|
+ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
|
||||||
|
+ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
|
||||||
|
+ %a.gep0.cast = bitcast i8** %a.gep0 to i32**
|
||||||
|
+ %a.gep1.cast = bitcast i8** %a.gep1 to i32**
|
||||||
|
+ store i32* %v, i32** %a.gep1.cast
|
||||||
|
+ store i32* null, i32** %a.gep0.cast
|
||||||
|
+ %load = load volatile i8*, i8** %a.gep1, !nonnull !0
|
||||||
|
+ ret i8* %load
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+define float* @turn_nonnull_into_assume(float** %arg) {
|
||||||
|
+; CHECK-LABEL: define float* @turn_nonnull_into_assume(
|
||||||
|
; CHECK-NEXT: entry:
|
||||||
|
; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8
|
||||||
|
; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
|
||||||
|
@@ -19,6 +42,4 @@
|
||||||
|
ret float* %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
|
||||||
|
-
|
||||||
|
!0 = !{}
|
147
debian/patches/rL306379.diff
vendored
Normal file
147
debian/patches/rL306379.diff
vendored
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
commit 7df06519765b14e1b08d7034c82c45a0a653eb25
|
||||||
|
Author: Chandler Carruth <chandlerc@gmail.com>
|
||||||
|
Date: Tue Jun 27 08:32:03 2017 +0000
|
||||||
|
|
||||||
|
[SROA] Fix PR32902 by more carefully propagating !nonnull metadata.
|
||||||
|
|
||||||
|
This is based heavily on the work done ni D34285. I mostly wanted to do
|
||||||
|
test cleanup for the author to save them some time, but I had a really
|
||||||
|
hard time understanding why it was so hard to write better test cases
|
||||||
|
for these issues.
|
||||||
|
|
||||||
|
The problem is that because SROA does a second rewrite of the loads and
|
||||||
|
because we *don't* propagate !nonnull for non-pointer loads, we first
|
||||||
|
introduced invalid !nonnull metadata and then stripped it back off just
|
||||||
|
in time to avoid most ways of this PR manifesting. Moving to the more
|
||||||
|
careful utility only fixes this by changing the predicate to look at the
|
||||||
|
new load's type rather than the target type. However, that *does* fix
|
||||||
|
the bug, and the utility is much nicer including adding range metadata
|
||||||
|
to model the nonnull property after a conversion to an integer.
|
||||||
|
|
||||||
|
However, we have bigger problems because we don't actually propagate
|
||||||
|
*range* metadata, and the utility to do this extracted from instcombine
|
||||||
|
isn't really in good shape to do this currently. It *only* handles the
|
||||||
|
case of copying range metadata from an integer load to a pointer load.
|
||||||
|
It doesn't even handle the trivial cases of propagating from one integer
|
||||||
|
load to another when they are the same width! This utility will need to
|
||||||
|
be beefed up prior to using in this location to get the metadata to
|
||||||
|
fully survive.
|
||||||
|
|
||||||
|
And even then, we need to go and teach things to turn the range metadata
|
||||||
|
into an assume the way we do with nonnull so that when we *promote* an
|
||||||
|
integer we don't lose the information.
|
||||||
|
|
||||||
|
All of this will require a new test case that looks kind-of like
|
||||||
|
`preserve-nonnull.ll` does here but focuses on range metadata. It will
|
||||||
|
also likely require more testing because it needs to correctly handle
|
||||||
|
changes to the integer width, especially as SROA actively tries to
|
||||||
|
change the integer width!
|
||||||
|
|
||||||
|
Last but not least, I'm a little worried about hooking the range
|
||||||
|
metadata up here because the instcombine logic for converting from
|
||||||
|
a range metadata *to* a nonnull metadata node seems broken in the face
|
||||||
|
of non-zero address spaces where null is not mapped to the integer `0`.
|
||||||
|
So that probably needs to get fixed with test cases both in SROA and in
|
||||||
|
instcombine to cover it.
|
||||||
|
|
||||||
|
But this *does* extract the core PR fix from D34285 of preventing the
|
||||||
|
!nonnull metadata from being propagated in a broken state just long
|
||||||
|
enough to feed into promotion and crash value tracking.
|
||||||
|
|
||||||
|
On D34285 there is some discussion of zero-extend handling because it
|
||||||
|
isn't necessary. First, the new load size covers all of the non-undef
|
||||||
|
(ie, possibly initialized) bits. This may even extend past the original
|
||||||
|
alloca if loading those bits could produce valid data. The only way its
|
||||||
|
valid for us to zero-extend an integer load in SROA is if the original
|
||||||
|
code had a zero extend or those bits were undef. And we get to assume
|
||||||
|
things like undef *never* satifies nonnull, so non undef bits can
|
||||||
|
participate here. No need to special case the zero-extend handling, it
|
||||||
|
just falls out correctly.
|
||||||
|
|
||||||
|
The original credit goes to Ariel Ben-Yehuda! I'm mostly landing this to
|
||||||
|
save a few rounds of trivial edits fixing style issues and test case
|
||||||
|
formulation.
|
||||||
|
|
||||||
|
Differental Revision: D34285
|
||||||
|
|
||||||
|
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306379 91177308-0d34-0410-b5e6-96231b3b80d8
|
||||||
|
|
||||||
|
--- a/lib/Transforms/Scalar/SROA.cpp
|
||||||
|
+++ b/lib/Transforms/Scalar/SROA.cpp
|
||||||
|
@@ -2388,9 +2388,20 @@
|
||||||
|
if (LI.isVolatile())
|
||||||
|
NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
|
||||||
|
|
||||||
|
+ // Any !nonnull metadata or !range metadata on the old load is also valid
|
||||||
|
+ // on the new load. This is even true in some cases even when the loads
|
||||||
|
+ // are different types, for example by mapping !nonnull metadata to
|
||||||
|
+ // !range metadata by modeling the null pointer constant converted to the
|
||||||
|
+ // integer type.
|
||||||
|
+ // FIXME: Add support for range metadata here. Currently the utilities
|
||||||
|
+ // for this don't propagate range metadata in trivial cases from one
|
||||||
|
+ // integer load to another, don't handle non-addrspace-0 null pointers
|
||||||
|
+ // correctly, and don't have any support for mapping ranges as the
|
||||||
|
+ // integer type becomes winder or narrower.
|
||||||
|
+ if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
|
||||||
|
+ copyNonnullMetadata(LI, N, *NewLI);
|
||||||
|
+
|
||||||
|
// Try to preserve nonnull metadata
|
||||||
|
- if (TargetTy->isPointerTy())
|
||||||
|
- NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
|
||||||
|
V = NewLI;
|
||||||
|
|
||||||
|
// If this is an integer load past the end of the slice (which means the
|
||||||
|
--- a/test/Transforms/SROA/preserve-nonnull.ll
|
||||||
|
+++ b/test/Transforms/SROA/preserve-nonnull.ll
|
||||||
|
@@ -42,4 +42,51 @@
|
||||||
|
ret float* %ret
|
||||||
|
}
|
||||||
|
|
||||||
|
+; Make sure we properly handle the !nonnull attribute when we convert
|
||||||
|
+; a pointer load to an integer load.
|
||||||
|
+; FIXME: While this doesn't do anythnig actively harmful today, it really
|
||||||
|
+; should propagate the !nonnull metadata to range metadata. The irony is, it
|
||||||
|
+; *does* initially, but then we lose that !range metadata before we finish
|
||||||
|
+; SROA.
|
||||||
|
+define i8* @propagate_nonnull_to_int() {
|
||||||
|
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
|
||||||
|
+; CHECK-NEXT: entry:
|
||||||
|
+; CHECK-NEXT: %[[A:.*]] = alloca i64
|
||||||
|
+; CHECK-NEXT: store i64 42, i64* %[[A]]
|
||||||
|
+; CHECK-NEXT: %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
|
||||||
|
+; CHECK-NEXT: %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
|
||||||
|
+; CHECK-NEXT: ret i8* %[[CAST]]
|
||||||
|
+entry:
|
||||||
|
+ %a = alloca [2 x i8*]
|
||||||
|
+ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
|
||||||
|
+ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
|
||||||
|
+ %a.gep0.cast = bitcast i8** %a.gep0 to i64*
|
||||||
|
+ %a.gep1.cast = bitcast i8** %a.gep1 to i64*
|
||||||
|
+ store i64 42, i64* %a.gep1.cast
|
||||||
|
+ store i64 0, i64* %a.gep0.cast
|
||||||
|
+ %load = load volatile i8*, i8** %a.gep1, !nonnull !0
|
||||||
|
+ ret i8* %load
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+; Make sure we properly handle the !nonnull attribute when we convert
|
||||||
|
+; a pointer load to an integer load and immediately promote it to an SSA
|
||||||
|
+; register. This can fail in interesting ways due to the rewrite iteration of
|
||||||
|
+; SROA, resulting in PR32902.
|
||||||
|
+define i8* @propagate_nonnull_to_int_and_promote() {
|
||||||
|
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
|
||||||
|
+; CHECK-NEXT: entry:
|
||||||
|
+; CHECK-NEXT: %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
|
||||||
|
+; CHECK-NEXT: ret i8* %[[PROMOTED_VALUE]]
|
||||||
|
+entry:
|
||||||
|
+ %a = alloca [2 x i8*], align 8
|
||||||
|
+ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
|
||||||
|
+ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
|
||||||
|
+ %a.gep0.cast = bitcast i8** %a.gep0 to i64*
|
||||||
|
+ %a.gep1.cast = bitcast i8** %a.gep1 to i64*
|
||||||
|
+ store i64 42, i64* %a.gep1.cast
|
||||||
|
+ store i64 0, i64* %a.gep0.cast
|
||||||
|
+ %load = load i8*, i8** %a.gep1, align 8, !nonnull !0
|
||||||
|
+ ret i8* %load
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
!0 = !{}
|
11
debian/patches/series
vendored
11
debian/patches/series
vendored
@ -38,7 +38,6 @@ disable-llvm-symbolizer-test.diff
|
|||||||
clang-tidy-run-bin.diff
|
clang-tidy-run-bin.diff
|
||||||
#bug-30342.diff
|
#bug-30342.diff
|
||||||
fix-scan-view-path.diff
|
fix-scan-view-path.diff
|
||||||
#0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
|
|
||||||
clang-fix-cmpxchg8-detection-on-i386.patch
|
clang-fix-cmpxchg8-detection-on-i386.patch
|
||||||
lldb-addversion-suffix-to-llvm-server-exec.patch
|
lldb-addversion-suffix-to-llvm-server-exec.patch
|
||||||
lldb-missing-install.diff
|
lldb-missing-install.diff
|
||||||
@ -49,3 +48,13 @@ add_symbols_versioning.patch
|
|||||||
ftfbs-gcc.diff
|
ftfbs-gcc.diff
|
||||||
pr81066.diff
|
pr81066.diff
|
||||||
armhf-bitfield.diff
|
armhf-bitfield.diff
|
||||||
|
# rust LLVM PR84, LLVM PR32488
|
||||||
|
# This is actually Rust's backport of upstream RL305193 (which doesn't apply cleanly to LLVM 4)
|
||||||
|
# https://github.com/rust-lang/llvm/commit/2b622a393ce
|
||||||
|
rL305193-backport.diff
|
||||||
|
# rust LLVM PR90, LLVM PR32902, PR31142
|
||||||
|
rL298540.diff
|
||||||
|
rL306267.diff
|
||||||
|
rL306353.diff
|
||||||
|
rL306358.diff
|
||||||
|
rL306379.diff
|
||||||
|
Loading…
Reference in New Issue
Block a user