diff --git a/debian/changelog b/debian/changelog index c72bc370..0589323f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +llvm-toolchain-4.0 (1:4.0.1-7.1) UNRELEASED; urgency=medium + + * Non-maintainer upload. + * Backport some patches (originally from rust, and upstreamed) to fix two + failing tests in rustc. + + -- Ximin Luo Wed, 18 Oct 2017 15:28:20 +0200 + llvm-toolchain-4.0 (1:4.0.1-7) unstable; urgency=medium * Force the deactivation of ocaml until the transition is done diff --git a/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch b/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch deleted file mode 100644 index b184a402..00000000 --- a/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch +++ /dev/null @@ -1,84 +0,0 @@ -From eee68eafa7e8e4ce996b49f5551636639a6c331a Mon Sep 17 00:00:00 2001 -From: David Majnemer -Date: Mon, 29 Aug 2016 17:14:08 +0000 -Subject: [PATCH 11/17] [SimplifyCFG] Hoisting invalidates metadata - -We forgot to remove optimization metadata when performing hosting during -FoldTwoEntryPHINode. - -This fixes PR29163. - -git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279980 91177308-0d34-0410-b5e6-96231b3b80d8 ---- - lib/Transforms/Utils/SimplifyCFG.cpp | 10 ++++++++-- - test/Transforms/SimplifyCFG/PR29163.ll | 31 +++++++++++++++++++++++++++++++ - 2 files changed, 39 insertions(+), 2 deletions(-) - create mode 100644 test/Transforms/SimplifyCFG/PR29163.ll - -diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp -index 0504646..c197317 100644 ---- a/lib/Transforms/Utils/SimplifyCFG.cpp -+++ b/lib/Transforms/Utils/SimplifyCFG.cpp -@@ -2024,14 +2024,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, - - // Move all 'aggressive' instructions, which are defined in the - // conditional parts of the if's up to the dominating block. -- if (IfBlock1) -+ if (IfBlock1) { -+ for (auto &I : *IfBlock1) -+ I.dropUnknownNonDebugMetadata(); - DomBlock->getInstList().splice(InsertPt->getIterator(), - IfBlock1->getInstList(), IfBlock1->begin(), - IfBlock1->getTerminator()->getIterator()); -- if (IfBlock2) -+ } -+ if (IfBlock2) { -+ for (auto &I : *IfBlock2) -+ I.dropUnknownNonDebugMetadata(); - DomBlock->getInstList().splice(InsertPt->getIterator(), - IfBlock2->getInstList(), IfBlock2->begin(), - IfBlock2->getTerminator()->getIterator()); -+ } - - while (PHINode *PN = dyn_cast(BB->begin())) { - // Change the PHI node into a select instruction. -diff --git a/test/Transforms/SimplifyCFG/PR29163.ll b/test/Transforms/SimplifyCFG/PR29163.ll -new file mode 100644 -index 0000000..65f9090 ---- /dev/null -+++ b/test/Transforms/SimplifyCFG/PR29163.ll -@@ -0,0 +1,31 @@ -+; RUN: opt -S -simplifycfg < %s | FileCheck %s -+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -+target triple = "x86_64-unknown-linux-gnu" -+ -+@GV = external constant i64* -+ -+define i64* @test1(i1 %cond, i8* %P) { -+entry: -+ br i1 %cond, label %if, label %then -+ -+then: -+ %bc = bitcast i8* %P to i64* -+ br label %join -+ -+if: -+ %load = load i64*, i64** @GV, align 8, !dereferenceable !0 -+ br label %join -+ -+join: -+ %phi = phi i64* [ %bc, %then ], [ %load, %if ] -+ ret i64* %phi -+} -+ -+; CHECK-LABEL: define i64* @test1( -+; CHECK: %[[bc:.*]] = bitcast i8* %P to i64* -+; CHECK: %[[load:.*]] = load i64*, i64** @GV, align 8{{$}} -+; CHECK: %[[phi:.*]] = select i1 %cond, i64* %[[load]], i64* %[[bc]] -+; CHECK: ret i64* %[[phi]] -+ -+ -+!0 = !{i64 8} --- -2.10.1 - diff --git a/debian/patches/rL298540.diff b/debian/patches/rL298540.diff new file mode 100644 index 00000000..23f328f6 --- /dev/null +++ b/debian/patches/rL298540.diff @@ -0,0 +1,292 @@ +commit 2b66aeed20b61d42cd2782b94d5419c6faca49a1 +Author: Luqman Aden +Date: Wed Mar 22 19:16:39 2017 +0000 + + Preserve nonnull metadata on Loads through SROA & mem2reg. + + Summary: + https://llvm.org/bugs/show_bug.cgi?id=31142 : + + SROA was dropping the nonnull metadata on loads from allocas that got optimized out. This patch simply preserves nonnull metadata on loads through SROA and mem2reg. + + Reviewers: chandlerc, efriedma + + Reviewed By: efriedma + + Subscribers: hfinkel, spatel, efriedma, arielb1, davide, llvm-commits + + Differential Revision: https://reviews.llvm.org/D27114 + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298540 91177308-0d34-0410-b5e6-96231b3b80d8 + +--- a/lib/Transforms/Scalar/SROA.cpp ++++ b/lib/Transforms/Scalar/SROA.cpp +@@ -2387,6 +2387,10 @@ + LI.isVolatile(), LI.getName()); + if (LI.isVolatile()) + NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); ++ ++ // Try to preserve nonnull metadata ++ if (TargetTy->isPointerTy()) ++ NewLI->copyMetadata(LI, LLVMContext::MD_nonnull); + V = NewLI; + + // If this is an integer load past the end of the slice (which means the +--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp ++++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +@@ -15,7 +15,6 @@ + // + //===----------------------------------------------------------------------===// + +-#include "llvm/Transforms/Utils/PromoteMemToReg.h" + #include "llvm/ADT/ArrayRef.h" + #include "llvm/ADT/DenseMap.h" + #include "llvm/ADT/STLExtras.h" +@@ -23,6 +22,7 @@ + #include "llvm/ADT/SmallVector.h" + #include "llvm/ADT/Statistic.h" + #include "llvm/Analysis/AliasSetTracker.h" ++#include "llvm/Analysis/AssumptionCache.h" + #include "llvm/Analysis/InstructionSimplify.h" + #include "llvm/Analysis/IteratedDominanceFrontier.h" + #include "llvm/Analysis/ValueTracking.h" +@@ -38,6 +38,7 @@ + #include "llvm/IR/Metadata.h" + #include "llvm/IR/Module.h" + #include "llvm/Transforms/Utils/Local.h" ++#include "llvm/Transforms/Utils/PromoteMemToReg.h" + #include + using namespace llvm; + +@@ -301,6 +302,18 @@ + + } // end of anonymous namespace + ++/// Given a LoadInst LI this adds assume(LI != null) after it. ++static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { ++ Function *AssumeIntrinsic = ++ Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume); ++ ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI, ++ Constant::getNullValue(LI->getType())); ++ LoadNotNull->insertAfter(LI); ++ CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull}); ++ CI->insertAfter(LoadNotNull); ++ AC->registerAssumption(CI); ++} ++ + static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { + // Knowing that this alloca is promotable, we know that it's safe to kill all + // instructions except for load and store. +@@ -334,9 +347,9 @@ + /// and thus must be phi-ed with undef. We fall back to the standard alloca + /// promotion algorithm in that case. + static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, +- LargeBlockInfo &LBI, +- DominatorTree &DT, +- AliasSetTracker *AST) { ++ LargeBlockInfo &LBI, DominatorTree &DT, ++ AliasSetTracker *AST, ++ AssumptionCache *AC) { + StoreInst *OnlyStore = Info.OnlyStore; + bool StoringGlobalVal = !isa(OnlyStore->getOperand(0)); + BasicBlock *StoreBB = OnlyStore->getParent(); +@@ -387,6 +400,14 @@ + // code. + if (ReplVal == LI) + ReplVal = UndefValue::get(LI->getType()); ++ ++ // If the load was marked as nonnull we don't want to lose ++ // that information when we erase this Load. So we preserve ++ // it with an assume. ++ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && ++ !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) ++ addAssumeNonNull(AC, LI); ++ + LI->replaceAllUsesWith(ReplVal); + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); +@@ -435,7 +456,9 @@ + /// } + static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, + LargeBlockInfo &LBI, +- AliasSetTracker *AST) { ++ AliasSetTracker *AST, ++ DominatorTree &DT, ++ AssumptionCache *AC) { + // The trickiest case to handle is when we have large blocks. Because of this, + // this code is optimized assuming that large blocks happen. This does not + // significantly pessimize the small block case. This uses LargeBlockInfo to +@@ -476,10 +499,17 @@ + // There is no store before this load, bail out (load may be affected + // by the following stores - see main comment). + return false; +- } +- else ++ } else { + // Otherwise, there was a store before this load, the load takes its value. +- LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0)); ++ // Note, if the load was marked as nonnull we don't want to lose that ++ // information when we erase it. So we preserve it with an assume. ++ Value *ReplVal = std::prev(I)->second->getOperand(0); ++ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && ++ !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) ++ addAssumeNonNull(AC, LI); ++ ++ LI->replaceAllUsesWith(ReplVal); ++ } + + if (AST && LI->getType()->isPointerTy()) + AST->deleteValue(LI); +@@ -553,7 +583,7 @@ + // If there is only a single store to this value, replace any loads of + // it that are directly dominated by the definition with the value stored. + if (Info.DefiningBlocks.size() == 1) { +- if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { ++ if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST, AC)) { + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + ++NumSingleStore; +@@ -564,7 +594,7 @@ + // If the alloca is only read and written in one basic block, just perform a + // linear sweep over the block to eliminate it. + if (Info.OnlyUsedInOneBlock && +- promoteSingleBlockAlloca(AI, Info, LBI, AST)) { ++ promoteSingleBlockAlloca(AI, Info, LBI, AST, DT, AC)) { + // The alloca has been processed, move on. + RemoveFromAllocasList(AllocaNum); + continue; +@@ -940,6 +970,13 @@ + + Value *V = IncomingVals[AI->second]; + ++ // If the load was marked as nonnull we don't want to lose ++ // that information when we erase this Load. So we preserve ++ // it with an assume. ++ if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && ++ !llvm::isKnownNonNullAt(V, LI, &DT)) ++ addAssumeNonNull(AC, LI); ++ + // Anything using the load now uses the current value. + LI->replaceAllUsesWith(V); + if (AST && LI->getType()->isPointerTy()) +--- /dev/null ++++ b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll +@@ -0,0 +1,89 @@ ++; RUN: opt < %s -mem2reg -S | FileCheck %s ++ ++; This tests that mem2reg preserves the !nonnull metadata on loads ++; from allocas that get optimized out. ++ ++; Check the case where the alloca in question has a single store. ++define float* @single_store(float** %arg) { ++; CHECK-LABEL: define float* @single_store ++; CHECK: %arg.load = load float*, float** %arg, align 8 ++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null ++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]]) ++; CHECK: ret float* %arg.load ++entry: ++ %buf = alloca float* ++ %arg.load = load float*, float** %arg, align 8 ++ store float* %arg.load, float** %buf, align 8 ++ %buf.load = load float*, float **%buf, !nonnull !0 ++ ret float* %buf.load ++} ++ ++; Check the case where the alloca in question has more than one ++; store but still within one basic block. ++define float* @single_block(float** %arg) { ++; CHECK-LABEL: define float* @single_block ++; CHECK: %arg.load = load float*, float** %arg, align 8 ++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null ++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]]) ++; CHECK: ret float* %arg.load ++entry: ++ %buf = alloca float* ++ %arg.load = load float*, float** %arg, align 8 ++ store float* null, float** %buf, align 8 ++ store float* %arg.load, float** %buf, align 8 ++ %buf.load = load float*, float **%buf, !nonnull !0 ++ ret float* %buf.load ++} ++ ++; Check the case where the alloca in question has more than one ++; store and also reads ands writes in multiple blocks. ++define float* @multi_block(float** %arg) { ++; CHECK-LABEL: define float* @multi_block ++; CHECK-LABEL: entry: ++; CHECK: %arg.load = load float*, float** %arg, align 8 ++; CHECK: br label %next ++; CHECK-LABEL: next: ++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null ++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]]) ++; CHECK: ret float* %arg.load ++entry: ++ %buf = alloca float* ++ %arg.load = load float*, float** %arg, align 8 ++ store float* null, float** %buf, align 8 ++ br label %next ++next: ++ store float* %arg.load, float** %buf, align 8 ++ %buf.load = load float*, float** %buf, !nonnull !0 ++ ret float* %buf.load ++} ++ ++; Check that we don't add an assume if it's not ++; necessary i.e. the value is already implied to be nonnull ++define float* @no_assume(float** %arg) { ++; CHECK-LABEL: define float* @no_assume ++; CHECK-LABEL: entry: ++; CHECK: %arg.load = load float*, float** %arg, align 8 ++; CHECK: %cn = icmp ne float* %arg.load, null ++; CHECK: br i1 %cn, label %next, label %fin ++; CHECK-LABEL: next: ++; CHECK-NOT: call void @llvm.assume ++; CHECK: ret float* %arg.load ++; CHECK-LABEL: fin: ++; CHECK: ret float* null ++entry: ++ %buf = alloca float* ++ %arg.load = load float*, float** %arg, align 8 ++ %cn = icmp ne float* %arg.load, null ++ br i1 %cn, label %next, label %fin ++next: ++; At this point the above nonnull check ensures that ++; the value %arg.load is nonnull in this block and thus ++; we need not add the assume. ++ store float* %arg.load, float** %buf, align 8 ++ %buf.load = load float*, float** %buf, !nonnull !0 ++ ret float* %buf.load ++fin: ++ ret float* null ++} ++ ++!0 = !{} +--- /dev/null ++++ b/test/Transforms/SROA/preserve-nonnull.ll +@@ -0,0 +1,26 @@ ++; RUN: opt < %s -sroa -S | FileCheck %s ++; ++; Make sure that SROA doesn't lose nonnull metadata ++; on loads from allocas that get optimized out. ++ ++; CHECK-LABEL: define float* @yummy_nonnull ++; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8 ++; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null ++; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]]) ++; CHECK: ret float* {{.*}}[[RETURN]] ++ ++define float* @yummy_nonnull(float** %arg) { ++entry-block: ++ %buf = alloca float* ++ ++ %_arg_i8 = bitcast float** %arg to i8* ++ %_buf_i8 = bitcast float** %buf to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false) ++ ++ %ret = load float*, float** %buf, align 8, !nonnull !0 ++ ret float* %ret ++} ++ ++declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) ++ ++!0 = !{} diff --git a/debian/patches/rL305193-backport.diff b/debian/patches/rL305193-backport.diff new file mode 100644 index 00000000..0f6d9ff0 --- /dev/null +++ b/debian/patches/rL305193-backport.diff @@ -0,0 +1,442 @@ +commit 2b622a393ce80c6157d32a50bf67d6b830729469 +Author: Than McIntosh +Date: Mon Jun 12 14:56:02 2017 +0000 + + StackColoring: smarter check for slot overlap + + Summary: + The old check for slot overlap treated 2 slots `S` and `T` as + overlapping if there existed a CFG node in which both of the slots could + possibly be active. That is overly conservative and caused stack blowups + in Rust programs. Instead, check whether there is a single CFG node in + which both of the slots are possibly active *together*. + + Fixes PR32488. + + Patch by Ariel Ben-Yehuda + + Reviewers: thanm, nagisa, llvm-commits, efriedma, rnk + + Reviewed By: thanm + + Subscribers: dotdash + + Differential Revision: https://reviews.llvm.org/D31583 + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305193 91177308-0d34-0410-b5e6-96231b3b80d8 + +--- a/lib/CodeGen/StackColoring.cpp ++++ b/lib/CodeGen/StackColoring.cpp +@@ -87,10 +87,134 @@ + STATISTIC(StackSlotMerged, "Number of stack slot merged."); + STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); + ++//===----------------------------------------------------------------------===// ++// StackColoring Pass ++//===----------------------------------------------------------------------===// ++// ++// Stack Coloring reduces stack usage by merging stack slots when they ++// can't be used together. For example, consider the following C program: ++// ++// void bar(char *, int); ++// void foo(bool var) { ++// A: { ++// char z[4096]; ++// bar(z, 0); ++// } ++// ++// char *p; ++// char x[4096]; ++// char y[4096]; ++// if (var) { ++// p = x; ++// } else { ++// bar(y, 1); ++// p = y + 1024; ++// } ++// B: ++// bar(p, 2); ++// } ++// ++// Naively-compiled, this program would use 12k of stack space. However, the ++// stack slot corresponding to `z` is always destroyed before either of the ++// stack slots for `x` or `y` are used, and then `x` is only used if `var` ++// is true, while `y` is only used if `var` is false. So in no time are 2 ++// of the stack slots used together, and therefore we can merge them, ++// compiling the function using only a single 4k alloca: ++// ++// void foo(bool var) { // equivalent ++// char x[4096]; ++// char *p; ++// bar(x, 0); ++// if (var) { ++// p = x; ++// } else { ++// bar(x, 1); ++// p = x + 1024; ++// } ++// bar(p, 2); ++// } ++// ++// This is an important optimization if we want stack space to be under ++// control in large functions, both open-coded ones and ones created by ++// inlining. + // + // Implementation Notes: + // --------------------- + // ++// An important part of the above reasoning is that `z` can't be accessed ++// while the latter 2 calls to `bar` are running. This is justified because ++// `z`'s lifetime is over after we exit from block `A:`, so any further ++// accesses to it would be UB. The way we represent this information ++// in LLVM is by having frontends delimit blocks with `lifetime.start` ++// and `lifetime.end` intrinsics. ++// ++// The effect of these intrinsics seems to be as follows (maybe I should ++// specify this in the reference?): ++// ++// L1) at start, each stack-slot is marked as *out-of-scope*, unless no ++// lifetime intrinsic refers to that stack slot, in which case ++// it is marked as *in-scope*. ++// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and ++// the stack slot is overwritten with `undef`. ++// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*. ++// L4) on function exit, all stack slots are marked as *out-of-scope*. ++// L5) `lifetime.end` is a no-op when called on a slot that is already ++// *out-of-scope*. ++// L6) memory accesses to *out-of-scope* stack slots are UB. ++// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it ++// are invalidated, unless the slot is "degenerate". This is used to ++// justify not marking slots as in-use until the pointer to them is ++// used, but feels a bit hacky in the presence of things like LICM. See ++// the "Degenerate Slots" section for more details. ++// ++// Now, let's ground stack coloring on these rules. We'll define a slot ++// as *in-use* at a (dynamic) point in execution if it either can be ++// written to at that point, or if it has a live and non-undef content ++// at that point. ++// ++// Obviously, slots that are never *in-use* together can be merged, and ++// in our example `foo`, the slots for `x`, `y` and `z` are never ++// in-use together (of course, sometimes slots that *are* in-use together ++// might still be mergable, but we don't care about that here). ++// ++// In this implementation, we successively merge pairs of slots that are ++// not *in-use* together. We could be smarter - for example, we could merge ++// a single large slot with 2 small slots, or we could construct the ++// interference graph and run a "smart" graph coloring algorithm, but with ++// that aside, how do we find out whether a pair of slots might be *in-use* ++// together? ++// ++// From our rules, we see that *out-of-scope* slots are never *in-use*, ++// and from (L7) we see that "non-degenerate" slots remain non-*in-use* ++// until their address is taken. Therefore, we can approximate slot activity ++// using dataflow. ++// ++// A subtle point: naively, we might try to figure out which pairs of ++// stack-slots interfere by propagating `S in-use` through the CFG for every ++// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in ++// which they are both *in-use*. ++// ++// That is sound, but overly conservative in some cases: in our (artificial) ++// example `foo`, either `x` or `y` might be in use at the label `B:`, but ++// as `x` is only in use if we came in from the `var` edge and `y` only ++// if we came from the `!var` edge, they still can't be in use together. ++// See PR32488 for an important real-life case. ++// ++// If we wanted to find all points of interference precisely, we could ++// propagate `S in-use` and `S&T in-use` predicates through the CFG. That ++// would be precise, but requires propagating `O(n^2)` dataflow facts. ++// ++// However, we aren't interested in the *set* of points of interference ++// between 2 stack slots, only *whether* there *is* such a point. So we ++// can rely on a little trick: for `S` and `T` to be in-use together, ++// one of them needs to become in-use while the other is in-use (or ++// they might both become in use simultaneously). We can check this ++// by also keeping track of the points at which a stack slot might *start* ++// being in-use. ++// ++// Exact first use: ++// ---------------- ++// + // Consider the following motivating example: + // + // int foo() { +@@ -159,6 +283,9 @@ + // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024 + // byte stack (better). + // ++// Degenerate Slots: ++// ----------------- ++// + // Relying entirely on first-use of stack slots is problematic, + // however, due to the fact that optimizations can sometimes migrate + // uses of a variable outside of its lifetime start/end region. Here +@@ -238,10 +365,6 @@ + // for "b" then it will appear that 'b' has a degenerate lifetime. + // + +-//===----------------------------------------------------------------------===// +-// StackColoring Pass +-//===----------------------------------------------------------------------===// +- + namespace { + /// StackColoring - A machine pass for merging disjoint stack allocations, + /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. +@@ -272,8 +395,11 @@ + /// Maps basic blocks to a serial number. + SmallVector BasicBlockNumbering; + +- /// Maps liveness intervals for each slot. ++ /// Maps slots to their use interval. Outside of this interval, slots ++ /// values are either dead or `undef` and they will not be written to. + SmallVector, 16> Intervals; ++ /// Maps slots to the points where they can become in-use. ++ SmallVector, 16> LiveStarts; + /// VNInfo is used for the construction of LiveIntervals. + VNInfo::Allocator VNInfoAllocator; + /// SlotIndex analysis object. +@@ -676,15 +802,22 @@ + + void StackColoring::calculateLiveIntervals(unsigned NumSlots) { + SmallVector Starts; +- SmallVector Finishes; ++ SmallVector DefinitelyInUse; + + // For each block, find which slots are active within this block + // and update the live intervals. + for (const MachineBasicBlock &MBB : *MF) { + Starts.clear(); + Starts.resize(NumSlots); +- Finishes.clear(); +- Finishes.resize(NumSlots); ++ DefinitelyInUse.clear(); ++ DefinitelyInUse.resize(NumSlots); ++ ++ // Start the interval of the slots that we previously found to be 'in-use'. ++ BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; ++ for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; ++ pos = MBBLiveness.LiveIn.find_next(pos)) { ++ Starts[pos] = Indexes->getMBBStartIdx(&MBB); ++ } + + // Create the interval for the basic blocks containing lifetime begin/end. + for (const MachineInstr &MI : MBB) { +@@ -696,68 +829,35 @@ + SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); + for (auto Slot : slots) { + if (IsStart) { +- if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) ++ // If a slot is already definitely in use, we don't have to emit ++ // a new start marker because there is already a pre-existing ++ // one. ++ if (!DefinitelyInUse[Slot]) { ++ LiveStarts[Slot].push_back(ThisIndex); ++ DefinitelyInUse[Slot] = true; ++ } ++ if (!Starts[Slot].isValid()) + Starts[Slot] = ThisIndex; + } else { +- if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) +- Finishes[Slot] = ThisIndex; ++ if (Starts[Slot].isValid()) { ++ VNInfo *VNI = Intervals[Slot]->getValNumInfo(0); ++ Intervals[Slot]->addSegment( ++ LiveInterval::Segment(Starts[Slot], ThisIndex, VNI)); ++ Starts[Slot] = SlotIndex(); // Invalidate the start index ++ DefinitelyInUse[Slot] = false; ++ } + } + } + } + +- // Create the interval of the blocks that we previously found to be 'alive'. +- BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; +- for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; +- pos = MBBLiveness.LiveIn.find_next(pos)) { +- Starts[pos] = Indexes->getMBBStartIdx(&MBB); +- } +- for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1; +- pos = MBBLiveness.LiveOut.find_next(pos)) { +- Finishes[pos] = Indexes->getMBBEndIdx(&MBB); +- } +- ++ // Finish up started segments + for (unsigned i = 0; i < NumSlots; ++i) { +- // +- // When LifetimeStartOnFirstUse is turned on, data flow analysis +- // is forward (from starts to ends), not bidirectional. A +- // consequence of this is that we can wind up in situations +- // where Starts[i] is invalid but Finishes[i] is valid and vice +- // versa. Example: +- // +- // LIFETIME_START x +- // if (...) { +- // +- // throw ...; +- // } +- // LIFETIME_END x +- // return 2; +- // +- // +- // Here the slot for "x" will not be live into the block +- // containing the "return 2" (since lifetimes start with first +- // use, not at the dominating LIFETIME_START marker). +- // +- if (Starts[i].isValid() && !Finishes[i].isValid()) { +- Finishes[i] = Indexes->getMBBEndIdx(&MBB); +- } + if (!Starts[i].isValid()) + continue; + +- assert(Starts[i] && Finishes[i] && "Invalid interval"); +- VNInfo *ValNum = Intervals[i]->getValNumInfo(0); +- SlotIndex S = Starts[i]; +- SlotIndex F = Finishes[i]; +- if (S < F) { +- // We have a single consecutive region. +- Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); +- } else { +- // We have two non-consecutive regions. This happens when +- // LIFETIME_START appears after the LIFETIME_END marker. +- SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB); +- SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB); +- Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); +- Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); +- } ++ SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB); ++ VNInfo *VNI = Intervals[i]->getValNumInfo(0); ++ Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI)); + } + } + } +@@ -987,6 +1087,7 @@ + BasicBlockNumbering.clear(); + Markers.clear(); + Intervals.clear(); ++ LiveStarts.clear(); + VNInfoAllocator.Reset(); + + unsigned NumSlots = MFI->getObjectIndexEnd(); +@@ -998,6 +1099,7 @@ + SmallVector SortedSlots; + SortedSlots.reserve(NumSlots); + Intervals.reserve(NumSlots); ++ LiveStarts.resize(NumSlots); + + unsigned NumMarkers = collectMarkers(NumSlots); + +@@ -1069,6 +1171,9 @@ + return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); + }); + ++ for (auto &s : LiveStarts) ++ std::sort(s.begin(), s.end()); ++ + bool Changed = true; + while (Changed) { + Changed = false; +@@ -1084,12 +1189,22 @@ + int SecondSlot = SortedSlots[J]; + LiveInterval *First = &*Intervals[FirstSlot]; + LiveInterval *Second = &*Intervals[SecondSlot]; ++ auto &FirstS = LiveStarts[FirstSlot]; ++ auto &SecondS = LiveStarts[SecondSlot]; + assert (!First->empty() && !Second->empty() && "Found an empty range"); + +- // Merge disjoint slots. +- if (!First->overlaps(*Second)) { ++ // Merge disjoint slots. This is a little bit tricky - see the ++ // Implementation Notes section for an explanation. ++ if (!First->isLiveAtIndexes(SecondS) && ++ !Second->isLiveAtIndexes(FirstS)) { + Changed = true; + First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); ++ ++ int OldSize = FirstS.size(); ++ FirstS.append(SecondS.begin(), SecondS.end()); ++ auto Mid = FirstS.begin() + OldSize; ++ std::inplace_merge(FirstS.begin(), Mid, FirstS.end()); ++ + SlotRemap[SecondSlot] = FirstSlot; + SortedSlots[J] = -1; + DEBUG(dbgs()<<"Merging #"< +Date: Mon Jun 26 03:31:31 2017 +0000 + + [InstCombine] Factor the logic for propagating !nonnull and !range + metadata out of InstCombine and into helpers. + + NFC, this just exposes the logic used by InstCombine when propagating + metadata from one load instruction to another. The plan is to use this + in SROA to address PR32902. + + If anyone has better ideas about how to factor this or name variables, + I'm all ears, but this seemed like a pretty good start and lets us make + progress on the PR. + + This is based on a patch by Ariel Ben-Yehuda (D34285). + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306267 91177308-0d34-0410-b5e6-96231b3b80d8 + +--- a/include/llvm/Transforms/Utils/Local.h ++++ b/include/llvm/Transforms/Utils/Local.h +@@ -366,6 +366,19 @@ + /// during lowering by the GC infrastructure. + bool callsGCLeafFunction(ImmutableCallSite CS); + ++/// Copy a nonnull metadata node to a new load instruction. ++/// ++/// This handles mapping it to range metadata if the new load is an integer ++/// load instead of a pointer load. ++void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI); ++ ++/// Copy a range metadata node to a new load instruction. ++/// ++/// This handles mapping it to nonnull metadata if the new load is a pointer ++/// load instead of an integer load and the range doesn't cover null. ++void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, ++ LoadInst &NewLI); ++ + //===----------------------------------------------------------------------===// + // Intrinsic pattern matching + // +--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp ++++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +@@ -471,21 +471,7 @@ + break; + + case LLVMContext::MD_nonnull: +- // This only directly applies if the new type is also a pointer. +- if (NewTy->isPointerTy()) { +- NewLoad->setMetadata(ID, N); +- break; +- } +- // If it's integral now, translate it to !range metadata. +- if (NewTy->isIntegerTy()) { +- auto *ITy = cast(NewTy); +- auto *NullInt = ConstantExpr::getPtrToInt( +- ConstantPointerNull::get(cast(Ptr->getType())), ITy); +- auto *NonNullInt = +- ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); +- NewLoad->setMetadata(LLVMContext::MD_range, +- MDB.createRange(NonNullInt, NullInt)); +- } ++ copyNonnullMetadata(LI, N, *NewLoad); + break; + case LLVMContext::MD_align: + case LLVMContext::MD_dereferenceable: +@@ -495,17 +481,7 @@ + NewLoad->setMetadata(ID, N); + break; + case LLVMContext::MD_range: +- // FIXME: It would be nice to propagate this in some way, but the type +- // conversions make it hard. +- +- // If it's a pointer now and the range does not contain 0, make it !nonnull. +- if (NewTy->isPointerTy()) { +- unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy); +- if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { +- MDNode *NN = MDNode::get(LI.getContext(), None); +- NewLoad->setMetadata(LLVMContext::MD_nonnull, NN); +- } +- } ++ copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad); + break; + } + } +--- a/lib/Transforms/Utils/Local.cpp ++++ b/lib/Transforms/Utils/Local.cpp +@@ -26,6 +26,7 @@ + #include "llvm/Analysis/LazyValueInfo.h" + #include "llvm/Analysis/ValueTracking.h" + #include "llvm/IR/CFG.h" ++#include "llvm/IR/ConstantRange.h" + #include "llvm/IR/Constants.h" + #include "llvm/IR/DIBuilder.h" + #include "llvm/IR/DataLayout.h" +@@ -1069,7 +1070,7 @@ + } + + /// See if there is a dbg.value intrinsic for DIVar for the PHI node. +-static bool PhiHasDebugValue(DILocalVariable *DIVar, ++static bool PhiHasDebugValue(DILocalVariable *DIVar, + DIExpression *DIExpr, + PHINode *APN) { + // Since we can't guarantee that the original dbg.declare instrinsic +@@ -1152,7 +1153,7 @@ + DbgValue->insertAfter(LI); + } + +-/// Inserts a llvm.dbg.value intrinsic after a phi ++/// Inserts a llvm.dbg.value intrinsic after a phi + /// that has an associated llvm.dbg.decl intrinsic. + void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + PHINode *APN, DIBuilder &Builder) { +@@ -1723,12 +1724,12 @@ + // Preserve !invariant.group in K. + break; + case LLVMContext::MD_align: +- K->setMetadata(Kind, ++ K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; + case LLVMContext::MD_dereferenceable: + case LLVMContext::MD_dereferenceable_or_null: +- K->setMetadata(Kind, ++ K->setMetadata(Kind, + MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD)); + break; + } +@@ -1812,6 +1813,49 @@ + return false; + } + ++void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, ++ LoadInst &NewLI) { ++ auto *NewTy = NewLI.getType(); ++ ++ // This only directly applies if the new type is also a pointer. ++ if (NewTy->isPointerTy()) { ++ NewLI.setMetadata(LLVMContext::MD_nonnull, N); ++ return; ++ } ++ ++ // The only other translation we can do is to integral loads with !range ++ // metadata. ++ if (!NewTy->isIntegerTy()) ++ return; ++ ++ MDBuilder MDB(NewLI.getContext()); ++ const Value *Ptr = OldLI.getPointerOperand(); ++ auto *ITy = cast(NewTy); ++ auto *NullInt = ConstantExpr::getPtrToInt( ++ ConstantPointerNull::get(cast(Ptr->getType())), ITy); ++ auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1)); ++ NewLI.setMetadata(LLVMContext::MD_range, ++ MDB.createRange(NonNullInt, NullInt)); ++} ++ ++void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, ++ MDNode *N, LoadInst &NewLI) { ++ auto *NewTy = NewLI.getType(); ++ ++ // Give up unless it is converted to a pointer where there is a single very ++ // valuable mapping we can do reliably. ++ // FIXME: It would be nice to propagate this in more ways, but the type ++ // conversions make it hard. ++ if (!NewTy->isPointerTy()) ++ return; ++ ++ unsigned BitWidth = DL.getTypeSizeInBits(NewTy); ++ if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) { ++ MDNode *NN = MDNode::get(OldLI.getContext(), None); ++ NewLI.setMetadata(LLVMContext::MD_nonnull, NN); ++ } ++} ++ + namespace { + /// A potential constituent of a bitreverse or bswap expression. See + /// collectBitParts for a fuller explanation. +@@ -1933,7 +1977,7 @@ + unsigned NumMaskedBits = AndMask.countPopulation(); + if (!MatchBitReversals && NumMaskedBits % 8 != 0) + return Result; +- ++ + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) diff --git a/debian/patches/rL306353.diff b/debian/patches/rL306353.diff new file mode 100644 index 00000000..fafe3235 --- /dev/null +++ b/debian/patches/rL306353.diff @@ -0,0 +1,47 @@ +commit 477bd758b48cb96477d3dd4cf3b36bf2706e8c1d +Author: Chandler Carruth +Date: Tue Jun 27 02:23:15 2017 +0000 + + [SROA] Clean up a test case a bit prior to adding more testing for + nonnull as part of fixing PR32902. + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306353 91177308-0d34-0410-b5e6-96231b3b80d8 + +--- a/test/Transforms/SROA/preserve-nonnull.ll ++++ b/test/Transforms/SROA/preserve-nonnull.ll +@@ -3,22 +3,20 @@ + ; Make sure that SROA doesn't lose nonnull metadata + ; on loads from allocas that get optimized out. + +-; CHECK-LABEL: define float* @yummy_nonnull +-; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8 +-; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null +-; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]]) +-; CHECK: ret float* {{.*}}[[RETURN]] +- + define float* @yummy_nonnull(float** %arg) { +-entry-block: +- %buf = alloca float* +- +- %_arg_i8 = bitcast float** %arg to i8* +- %_buf_i8 = bitcast float** %buf to i8* +- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false) +- +- %ret = load float*, float** %buf, align 8, !nonnull !0 +- ret float* %ret ++; CHECK-LABEL: define float* @yummy_nonnull( ++; CHECK-NEXT: entry: ++; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8 ++; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null ++; CHECK-NEXT: call void @llvm.assume(i1 %[[ASSUME]]) ++; CHECK-NEXT: ret float* %[[RETURN]] ++entry: ++ %buf = alloca float* ++ %_arg_i8 = bitcast float** %arg to i8* ++ %_buf_i8 = bitcast float** %buf to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false) ++ %ret = load float*, float** %buf, align 8, !nonnull !0 ++ ret float* %ret + } + + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) diff --git a/debian/patches/rL306358.diff b/debian/patches/rL306358.diff new file mode 100644 index 00000000..6f9f05f5 --- /dev/null +++ b/debian/patches/rL306358.diff @@ -0,0 +1,52 @@ +commit 156cc49e505986a1659adaa3a0b5a070372377c8 +Author: Chandler Carruth +Date: Tue Jun 27 03:08:45 2017 +0000 + + [SROA] Further test cleanup and add a test for the actual propagation of + the nonnull attribute distinct from rewriting it into an assume. + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306358 91177308-0d34-0410-b5e6-96231b3b80d8 + +--- a/test/Transforms/SROA/preserve-nonnull.ll ++++ b/test/Transforms/SROA/preserve-nonnull.ll +@@ -3,8 +3,31 @@ + ; Make sure that SROA doesn't lose nonnull metadata + ; on loads from allocas that get optimized out. + +-define float* @yummy_nonnull(float** %arg) { +-; CHECK-LABEL: define float* @yummy_nonnull( ++declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) ++ ++; Check that we do basic propagation of nonnull when rewriting. ++define i8* @propagate_nonnull(i32* %v) { ++; CHECK-LABEL: define i8* @propagate_nonnull( ++; CHECK-NEXT: entry: ++; CHECK-NEXT: %[[A:.*]] = alloca i8* ++; CHECK-NEXT: %[[V_CAST:.*]] = bitcast i32* %v to i8* ++; CHECK-NEXT: store i8* %[[V_CAST]], i8** %[[A]] ++; CHECK-NEXT: %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0 ++; CHECK-NEXT: ret i8* %[[LOAD]] ++entry: ++ %a = alloca [2 x i8*] ++ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0 ++ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1 ++ %a.gep0.cast = bitcast i8** %a.gep0 to i32** ++ %a.gep1.cast = bitcast i8** %a.gep1 to i32** ++ store i32* %v, i32** %a.gep1.cast ++ store i32* null, i32** %a.gep0.cast ++ %load = load volatile i8*, i8** %a.gep1, !nonnull !0 ++ ret i8* %load ++} ++ ++define float* @turn_nonnull_into_assume(float** %arg) { ++; CHECK-LABEL: define float* @turn_nonnull_into_assume( + ; CHECK-NEXT: entry: + ; CHECK-NEXT: %[[RETURN:.*]] = load float*, float** %arg, align 8 + ; CHECK-NEXT: %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null +@@ -19,6 +42,4 @@ + ret float* %ret + } + +-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) +- + !0 = !{} diff --git a/debian/patches/rL306379.diff b/debian/patches/rL306379.diff new file mode 100644 index 00000000..58a901b3 --- /dev/null +++ b/debian/patches/rL306379.diff @@ -0,0 +1,147 @@ +commit 7df06519765b14e1b08d7034c82c45a0a653eb25 +Author: Chandler Carruth +Date: Tue Jun 27 08:32:03 2017 +0000 + + [SROA] Fix PR32902 by more carefully propagating !nonnull metadata. + + This is based heavily on the work done ni D34285. I mostly wanted to do + test cleanup for the author to save them some time, but I had a really + hard time understanding why it was so hard to write better test cases + for these issues. + + The problem is that because SROA does a second rewrite of the loads and + because we *don't* propagate !nonnull for non-pointer loads, we first + introduced invalid !nonnull metadata and then stripped it back off just + in time to avoid most ways of this PR manifesting. Moving to the more + careful utility only fixes this by changing the predicate to look at the + new load's type rather than the target type. However, that *does* fix + the bug, and the utility is much nicer including adding range metadata + to model the nonnull property after a conversion to an integer. + + However, we have bigger problems because we don't actually propagate + *range* metadata, and the utility to do this extracted from instcombine + isn't really in good shape to do this currently. It *only* handles the + case of copying range metadata from an integer load to a pointer load. + It doesn't even handle the trivial cases of propagating from one integer + load to another when they are the same width! This utility will need to + be beefed up prior to using in this location to get the metadata to + fully survive. + + And even then, we need to go and teach things to turn the range metadata + into an assume the way we do with nonnull so that when we *promote* an + integer we don't lose the information. + + All of this will require a new test case that looks kind-of like + `preserve-nonnull.ll` does here but focuses on range metadata. It will + also likely require more testing because it needs to correctly handle + changes to the integer width, especially as SROA actively tries to + change the integer width! + + Last but not least, I'm a little worried about hooking the range + metadata up here because the instcombine logic for converting from + a range metadata *to* a nonnull metadata node seems broken in the face + of non-zero address spaces where null is not mapped to the integer `0`. + So that probably needs to get fixed with test cases both in SROA and in + instcombine to cover it. + + But this *does* extract the core PR fix from D34285 of preventing the + !nonnull metadata from being propagated in a broken state just long + enough to feed into promotion and crash value tracking. + + On D34285 there is some discussion of zero-extend handling because it + isn't necessary. First, the new load size covers all of the non-undef + (ie, possibly initialized) bits. This may even extend past the original + alloca if loading those bits could produce valid data. The only way its + valid for us to zero-extend an integer load in SROA is if the original + code had a zero extend or those bits were undef. And we get to assume + things like undef *never* satifies nonnull, so non undef bits can + participate here. No need to special case the zero-extend handling, it + just falls out correctly. + + The original credit goes to Ariel Ben-Yehuda! I'm mostly landing this to + save a few rounds of trivial edits fixing style issues and test case + formulation. + + Differental Revision: D34285 + + git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306379 91177308-0d34-0410-b5e6-96231b3b80d8 + +--- a/lib/Transforms/Scalar/SROA.cpp ++++ b/lib/Transforms/Scalar/SROA.cpp +@@ -2388,9 +2388,20 @@ + if (LI.isVolatile()) + NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + ++ // Any !nonnull metadata or !range metadata on the old load is also valid ++ // on the new load. This is even true in some cases even when the loads ++ // are different types, for example by mapping !nonnull metadata to ++ // !range metadata by modeling the null pointer constant converted to the ++ // integer type. ++ // FIXME: Add support for range metadata here. Currently the utilities ++ // for this don't propagate range metadata in trivial cases from one ++ // integer load to another, don't handle non-addrspace-0 null pointers ++ // correctly, and don't have any support for mapping ranges as the ++ // integer type becomes winder or narrower. ++ if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull)) ++ copyNonnullMetadata(LI, N, *NewLI); ++ + // Try to preserve nonnull metadata +- if (TargetTy->isPointerTy()) +- NewLI->copyMetadata(LI, LLVMContext::MD_nonnull); + V = NewLI; + + // If this is an integer load past the end of the slice (which means the +--- a/test/Transforms/SROA/preserve-nonnull.ll ++++ b/test/Transforms/SROA/preserve-nonnull.ll +@@ -42,4 +42,51 @@ + ret float* %ret + } + ++; Make sure we properly handle the !nonnull attribute when we convert ++; a pointer load to an integer load. ++; FIXME: While this doesn't do anythnig actively harmful today, it really ++; should propagate the !nonnull metadata to range metadata. The irony is, it ++; *does* initially, but then we lose that !range metadata before we finish ++; SROA. ++define i8* @propagate_nonnull_to_int() { ++; CHECK-LABEL: define i8* @propagate_nonnull_to_int( ++; CHECK-NEXT: entry: ++; CHECK-NEXT: %[[A:.*]] = alloca i64 ++; CHECK-NEXT: store i64 42, i64* %[[A]] ++; CHECK-NEXT: %[[LOAD:.*]] = load volatile i64, i64* %[[A]] ++; CHECK-NEXT: %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8* ++; CHECK-NEXT: ret i8* %[[CAST]] ++entry: ++ %a = alloca [2 x i8*] ++ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0 ++ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1 ++ %a.gep0.cast = bitcast i8** %a.gep0 to i64* ++ %a.gep1.cast = bitcast i8** %a.gep1 to i64* ++ store i64 42, i64* %a.gep1.cast ++ store i64 0, i64* %a.gep0.cast ++ %load = load volatile i8*, i8** %a.gep1, !nonnull !0 ++ ret i8* %load ++} ++ ++; Make sure we properly handle the !nonnull attribute when we convert ++; a pointer load to an integer load and immediately promote it to an SSA ++; register. This can fail in interesting ways due to the rewrite iteration of ++; SROA, resulting in PR32902. ++define i8* @propagate_nonnull_to_int_and_promote() { ++; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote( ++; CHECK-NEXT: entry: ++; CHECK-NEXT: %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8* ++; CHECK-NEXT: ret i8* %[[PROMOTED_VALUE]] ++entry: ++ %a = alloca [2 x i8*], align 8 ++ %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0 ++ %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1 ++ %a.gep0.cast = bitcast i8** %a.gep0 to i64* ++ %a.gep1.cast = bitcast i8** %a.gep1 to i64* ++ store i64 42, i64* %a.gep1.cast ++ store i64 0, i64* %a.gep0.cast ++ %load = load i8*, i8** %a.gep1, align 8, !nonnull !0 ++ ret i8* %load ++} ++ + !0 = !{} diff --git a/debian/patches/series b/debian/patches/series index afc43743..b3a3aa51 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -38,7 +38,6 @@ disable-llvm-symbolizer-test.diff clang-tidy-run-bin.diff #bug-30342.diff fix-scan-view-path.diff -#0011-SimplifyCFG-Hoisting-invalidates-metadata.patch clang-fix-cmpxchg8-detection-on-i386.patch lldb-addversion-suffix-to-llvm-server-exec.patch lldb-missing-install.diff @@ -49,3 +48,13 @@ add_symbols_versioning.patch ftfbs-gcc.diff pr81066.diff armhf-bitfield.diff +# rust LLVM PR84, LLVM PR32488 +# This is actually Rust's backport of upstream RL305193 (which doesn't apply cleanly to LLVM 4) +# https://github.com/rust-lang/llvm/commit/2b622a393ce +rL305193-backport.diff +# rust LLVM PR90, LLVM PR32902, PR31142 +rL298540.diff +rL306267.diff +rL306353.diff +rL306358.diff +rL306379.diff