* Non-maintainer upload.

* Backport some patches (originally from rust, and upstreamed) to fix two failing tests in rustc.
2025-07-27 13:25:37 +00:00 · 2017-10-18 14:13:52 +00:00 · 2017-10-18 14:13:52 +00:00 · be94a5a691
commit be94a5a691
parent e784e58e94
9 changed files with 1185 additions and 85 deletions
--- a/debian/changelog
+++ b/debian/changelog
@ -1,3 +1,11 @@
 llvm-toolchain-4.0 (1:4.0.1-7.1) UNRELEASED; urgency=medium
  * Non-maintainer upload.
  * Backport some patches (originally from rust, and upstreamed) to fix two
    failing tests in rustc.
 -- Ximin Luo <infinity0@debian.org>  Wed, 18 Oct 2017 15:28:20 +0200
 llvm-toolchain-4.0 (1:4.0.1-7) unstable; urgency=medium
  * Force the deactivation of ocaml until the transition is done
--- a/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
+++ b/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
@ -1,84 +0,0 @@
 From eee68eafa7e8e4ce996b49f5551636639a6c331a Mon Sep 17 00:00:00 2001
 From: David Majnemer <david.majnemer@gmail.com>
 Date: Mon, 29 Aug 2016 17:14:08 +0000
 Subject: [PATCH 11/17] [SimplifyCFG] Hoisting invalidates metadata
 We forgot to remove optimization metadata when performing hosting during
 FoldTwoEntryPHINode.
 This fixes PR29163.
 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279980 91177308-0d34-0410-b5e6-96231b3b80d8
 ---
 lib/Transforms/Utils/SimplifyCFG.cpp   | 10 ++++++++--
 test/Transforms/SimplifyCFG/PR29163.ll | 31 +++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)
 create mode 100644 test/Transforms/SimplifyCFG/PR29163.ll
 diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
 index 0504646..c197317 100644
 --- a/lib/Transforms/Utils/SimplifyCFG.cpp
 +++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2024,14 +2024,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   // Move all 'aggressive' instructions, which are defined in the
   // conditional parts of the if's up to the dominating block.
 -  if (IfBlock1)
 +  if (IfBlock1) {
 +    for (auto &I : *IfBlock1)
 +      I.dropUnknownNonDebugMetadata();
     DomBlock->getInstList().splice(InsertPt->getIterator(),
                                    IfBlock1->getInstList(), IfBlock1->begin(),
                                    IfBlock1->getTerminator()->getIterator());
 -  if (IfBlock2)
 +  }
 +  if (IfBlock2) {
 +    for (auto &I : *IfBlock2)
 +      I.dropUnknownNonDebugMetadata();
     DomBlock->getInstList().splice(InsertPt->getIterator(),
                                    IfBlock2->getInstList(), IfBlock2->begin(),
                                    IfBlock2->getTerminator()->getIterator());
 +  }
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
     // Change the PHI node into a select instruction.
 diff --git a/test/Transforms/SimplifyCFG/PR29163.ll b/test/Transforms/SimplifyCFG/PR29163.ll
 new file mode 100644
 index 0000000..65f9090
 --- /dev/null
 +++ b/test/Transforms/SimplifyCFG/PR29163.ll
@@ -0,0 +1,31 @@
 +; RUN: opt -S -simplifycfg < %s | FileCheck %s
 +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 +target triple = "x86_64-unknown-linux-gnu"
 +
 +@GV = external constant i64*
 +
 +define i64* @test1(i1 %cond, i8* %P) {
 +entry:
 +  br i1 %cond, label %if, label %then
 +
 +then:
 +  %bc = bitcast i8* %P to i64*
 +  br label %join
 +
 +if:
 +  %load = load i64*, i64** @GV, align 8, !dereferenceable !0
 +  br label %join
 +
 +join:
 +  %phi = phi i64* [ %bc, %then ], [ %load, %if ]
 +  ret i64* %phi
 +}
 +
 +; CHECK-LABEL: define i64* @test1(
 +; CHECK: %[[bc:.*]] = bitcast i8* %P to i64*
 +; CHECK: %[[load:.*]] = load i64*, i64** @GV, align 8{{$}}
 +; CHECK: %[[phi:.*]] = select i1 %cond, i64* %[[load]], i64* %[[bc]]
 +; CHECK: ret i64* %[[phi]]
 +
 +
 +!0 = !{i64 8}
 -- 
 2.10.1
--- a/debian/patches/rL298540.diff
+++ b/debian/patches/rL298540.diff
@ -0,0 +1,292 @@
 commit 2b66aeed20b61d42cd2782b94d5419c6faca49a1
 Author: Luqman Aden <me@luqman.ca>
 Date:   Wed Mar 22 19:16:39 2017 +0000
    Preserve nonnull metadata on Loads through SROA & mem2reg.
    Summary:
    https://llvm.org/bugs/show_bug.cgi?id=31142 :
    SROA was dropping the nonnull metadata on loads from allocas that got optimized out. This patch simply preserves nonnull metadata on loads through SROA and mem2reg.
    Reviewers: chandlerc, efriedma
    Reviewed By: efriedma
    Subscribers: hfinkel, spatel, efriedma, arielb1, davide, llvm-commits
    Differential Revision: https://reviews.llvm.org/D27114
    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298540 91177308-0d34-0410-b5e6-96231b3b80d8
 --- a/lib/Transforms/Scalar/SROA.cpp
 +++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2387,6 +2387,10 @@
                                               LI.isVolatile(), LI.getName());
       if (LI.isVolatile())
         NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
 +
 +      // Try to preserve nonnull metadata
 +      if (TargetTy->isPointerTy())
 +        NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
       V = NewLI;
       // If this is an integer load past the end of the slice (which means the
 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
 +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -15,7 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 -#include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
@@ -23,6 +22,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 +#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/IteratedDominanceFrontier.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -38,6 +38,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/Local.h"
 +#include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <algorithm>
 using namespace llvm;
@@ -301,6 +302,18 @@
 } // end of anonymous namespace
 +/// Given a LoadInst LI this adds assume(LI != null) after it.
 +static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
 +  Function *AssumeIntrinsic =
 +      Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
 +  ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
 +                                       Constant::getNullValue(LI->getType()));
 +  LoadNotNull->insertAfter(LI);
 +  CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
 +  CI->insertAfter(LoadNotNull);
 +  AC->registerAssumption(CI);
 +}
 +
 static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
   // Knowing that this alloca is promotable, we know that it's safe to kill all
   // instructions except for load and store.
@@ -334,9 +347,9 @@
 /// and thus must be phi-ed with undef. We fall back to the standard alloca
 /// promotion algorithm in that case.
 static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
 -                                     LargeBlockInfo &LBI,
 -                                     DominatorTree &DT,
 -                                     AliasSetTracker *AST) {
 +                                     LargeBlockInfo &LBI, DominatorTree &DT,
 +                                     AliasSetTracker *AST,
 +                                     AssumptionCache *AC) {
   StoreInst *OnlyStore = Info.OnlyStore;
   bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
   BasicBlock *StoreBB = OnlyStore->getParent();
@@ -387,6 +400,14 @@
     // code.
     if (ReplVal == LI)
       ReplVal = UndefValue::get(LI->getType());
 +
 +    // If the load was marked as nonnull we don't want to lose
 +    // that information when we erase this Load. So we preserve
 +    // it with an assume.
 +    if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
 +        !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
 +      addAssumeNonNull(AC, LI);
 +
     LI->replaceAllUsesWith(ReplVal);
     if (AST && LI->getType()->isPointerTy())
       AST->deleteValue(LI);
@@ -435,7 +456,9 @@
 ///  }
 static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
                                      LargeBlockInfo &LBI,
 -                                     AliasSetTracker *AST) {
 +                                     AliasSetTracker *AST,
 +                                     DominatorTree &DT,
 +                                     AssumptionCache *AC) {
   // The trickiest case to handle is when we have large blocks. Because of this,
   // this code is optimized assuming that large blocks happen.  This does not
   // significantly pessimize the small block case.  This uses LargeBlockInfo to
@@ -476,10 +499,17 @@
         // There is no store before this load, bail out (load may be affected
         // by the following stores - see main comment).
         return false;
 -    }
 -    else
 +    } else {
       // Otherwise, there was a store before this load, the load takes its value.
 -      LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
 +      // Note, if the load was marked as nonnull we don't want to lose that
 +      // information when we erase it. So we preserve it with an assume.
 +      Value *ReplVal = std::prev(I)->second->getOperand(0);
 +      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
 +          !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
 +        addAssumeNonNull(AC, LI);
 +
 +      LI->replaceAllUsesWith(ReplVal);
 +    }
     if (AST && LI->getType()->isPointerTy())
       AST->deleteValue(LI);
@@ -553,7 +583,7 @@
     // If there is only a single store to this value, replace any loads of
     // it that are directly dominated by the definition with the value stored.
     if (Info.DefiningBlocks.size() == 1) {
 -      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
 +      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST, AC)) {
         // The alloca has been processed, move on.
         RemoveFromAllocasList(AllocaNum);
         ++NumSingleStore;
@@ -564,7 +594,7 @@
     // If the alloca is only read and written in one basic block, just perform a
     // linear sweep over the block to eliminate it.
     if (Info.OnlyUsedInOneBlock &&
 -        promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
 +        promoteSingleBlockAlloca(AI, Info, LBI, AST, DT, AC)) {
       // The alloca has been processed, move on.
       RemoveFromAllocasList(AllocaNum);
       continue;
@@ -940,6 +970,13 @@
       Value *V = IncomingVals[AI->second];
 +      // If the load was marked as nonnull we don't want to lose
 +      // that information when we erase this Load. So we preserve
 +      // it with an assume.
 +      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
 +          !llvm::isKnownNonNullAt(V, LI, &DT))
 +        addAssumeNonNull(AC, LI);
 +
       // Anything using the load now uses the current value.
       LI->replaceAllUsesWith(V);
       if (AST && LI->getType()->isPointerTy())
 --- /dev/null
 +++ b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll
@@ -0,0 +1,89 @@
 +; RUN: opt < %s -mem2reg -S | FileCheck %s
 +
 +; This tests that mem2reg preserves the !nonnull metadata on loads
 +; from allocas that get optimized out.
 +
 +; Check the case where the alloca in question has a single store.
 +define float* @single_store(float** %arg) {
 +; CHECK-LABEL: define float* @single_store
 +; CHECK: %arg.load = load float*, float** %arg, align 8
 +; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
 +; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
 +; CHECK: ret float* %arg.load
 +entry:
 +  %buf = alloca float*
 +  %arg.load = load float*, float** %arg, align 8
 +  store float* %arg.load, float** %buf, align 8
 +  %buf.load = load float*, float **%buf, !nonnull !0
 +  ret float* %buf.load
 +}
 +
 +; Check the case where the alloca in question has more than one
 +; store but still within one basic block.
 +define float* @single_block(float** %arg) {
 +; CHECK-LABEL: define float* @single_block
 +; CHECK: %arg.load = load float*, float** %arg, align 8
 +; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
 +; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
 +; CHECK: ret float* %arg.load
 +entry:
 +  %buf = alloca float*
 +  %arg.load = load float*, float** %arg, align 8
 +  store float* null, float** %buf, align 8
 +  store float* %arg.load, float** %buf, align 8
 +  %buf.load = load float*, float **%buf, !nonnull !0
 +  ret float* %buf.load
 +}
 +
 +; Check the case where the alloca in question has more than one
 +; store and also reads ands writes in multiple blocks.
 +define float* @multi_block(float** %arg) {
 +; CHECK-LABEL: define float* @multi_block
 +; CHECK-LABEL: entry:
 +; CHECK: %arg.load = load float*, float** %arg, align 8
 +; CHECK: br label %next
 +; CHECK-LABEL: next:
 +; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
 +; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
 +; CHECK: ret float* %arg.load
 +entry:
 +  %buf = alloca float*
 +  %arg.load = load float*, float** %arg, align 8
 +  store float* null, float** %buf, align 8
 +  br label %next
 +next:
 +  store float* %arg.load, float** %buf, align 8
 +  %buf.load = load float*, float** %buf, !nonnull !0
 +  ret float* %buf.load
 +}
 +
 +; Check that we don't add an assume if it's not
 +; necessary i.e. the value is already implied to be nonnull
 +define float* @no_assume(float** %arg) {
 +; CHECK-LABEL: define float* @no_assume
 +; CHECK-LABEL: entry:
 +; CHECK: %arg.load = load float*, float** %arg, align 8
 +; CHECK: %cn = icmp ne float* %arg.load, null
 +; CHECK: br i1 %cn, label %next, label %fin
 +; CHECK-LABEL: next:
 +; CHECK-NOT: call void @llvm.assume
 +; CHECK: ret float* %arg.load
 +; CHECK-LABEL: fin:
 +; CHECK: ret float* null
 +entry:
 +  %buf = alloca float*
 +  %arg.load = load float*, float** %arg, align 8
 +  %cn = icmp ne float* %arg.load, null
 +  br i1 %cn, label %next, label %fin
 +next:
 +; At this point the above nonnull check ensures that
 +; the value %arg.load is nonnull in this block and thus
 +; we need not add the assume.
 +  store float* %arg.load, float** %buf, align 8
 +  %buf.load = load float*, float** %buf, !nonnull !0
 +  ret float* %buf.load
 +fin:
 +  ret float* null
 +}
 +
 +!0 = !{}
 --- /dev/null
 +++ b/test/Transforms/SROA/preserve-nonnull.ll
@@ -0,0 +1,26 @@
 +; RUN: opt < %s -sroa -S | FileCheck %s
 +;
 +; Make sure that SROA doesn't lose nonnull metadata
 +; on loads from allocas that get optimized out.
 +
 +; CHECK-LABEL: define float* @yummy_nonnull
 +; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
 +; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
 +; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
 +; CHECK: ret float* {{.*}}[[RETURN]]
 +
 +define float* @yummy_nonnull(float** %arg) {
 +entry-block:
 +	%buf = alloca float*
 +
 +	%_arg_i8 = bitcast float** %arg to i8*
 +	%_buf_i8 = bitcast float** %buf to i8*
 +	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
 +
 +	%ret = load float*, float** %buf, align 8, !nonnull !0
 +	ret float* %ret
 +}
 +
 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
 +
 +!0 = !{}
--- a/debian/patches/rL305193-backport.diff
+++ b/debian/patches/rL305193-backport.diff
@ -0,0 +1,442 @@
 commit 2b622a393ce80c6157d32a50bf67d6b830729469
 Author: Than McIntosh <thanm@google.com>
 Date:   Mon Jun 12 14:56:02 2017 +0000
    StackColoring: smarter check for slot overlap
    Summary:
    The old check for slot overlap treated 2 slots `S` and `T` as
    overlapping if there existed a CFG node in which both of the slots could
    possibly be active. That is overly conservative and caused stack blowups
    in Rust programs. Instead, check whether there is a single CFG node in
    which both of the slots are possibly active *together*.
    Fixes PR32488.
    Patch by Ariel Ben-Yehuda <ariel.byd@gmail.com>
    Reviewers: thanm, nagisa, llvm-commits, efriedma, rnk
    Reviewed By: thanm
    Subscribers: dotdash
    Differential Revision: https://reviews.llvm.org/D31583
    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305193 91177308-0d34-0410-b5e6-96231b3b80d8
 --- a/lib/CodeGen/StackColoring.cpp
 +++ b/lib/CodeGen/StackColoring.cpp
@@ -87,10 +87,134 @@
 STATISTIC(StackSlotMerged, "Number of stack slot merged.");
 STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
 +//===----------------------------------------------------------------------===//
 +//                           StackColoring Pass
 +//===----------------------------------------------------------------------===//
 +//
 +// Stack Coloring reduces stack usage by merging stack slots when they
 +// can't be used together. For example, consider the following C program:
 +//
 +//     void bar(char *, int);
 +//     void foo(bool var) {
 +//         A: {
 +//             char z[4096];
 +//             bar(z, 0);
 +//         }
 +//
 +//         char *p;
 +//         char x[4096];
 +//         char y[4096];
 +//         if (var) {
 +//             p = x;
 +//         } else {
 +//             bar(y, 1);
 +//             p = y + 1024;
 +//         }
 +//     B:
 +//         bar(p, 2);
 +//     }
 +//
 +// Naively-compiled, this program would use 12k of stack space. However, the
 +// stack slot corresponding to `z` is always destroyed before either of the
 +// stack slots for `x` or `y` are used, and then `x` is only used if `var`
 +// is true, while `y` is only used if `var` is false. So in no time are 2
 +// of the stack slots used together, and therefore we can merge them,
 +// compiling the function using only a single 4k alloca:
 +//
 +//     void foo(bool var) { // equivalent
 +//         char x[4096];
 +//         char *p;
 +//         bar(x, 0);
 +//         if (var) {
 +//             p = x;
 +//         } else {
 +//             bar(x, 1);
 +//             p = x + 1024;
 +//         }
 +//         bar(p, 2);
 +//     }
 +//
 +// This is an important optimization if we want stack space to be under
 +// control in large functions, both open-coded ones and ones created by
 +// inlining.
 //
 // Implementation Notes:
 // ---------------------
 //
 +// An important part of the above reasoning is that `z` can't be accessed
 +// while the latter 2 calls to `bar` are running. This is justified because
 +// `z`'s lifetime is over after we exit from block `A:`, so any further
 +// accesses to it would be UB. The way we represent this information
 +// in LLVM is by having frontends delimit blocks with `lifetime.start`
 +// and `lifetime.end` intrinsics.
 +//
 +// The effect of these intrinsics seems to be as follows (maybe I should
 +// specify this in the reference?):
 +//
 +//   L1) at start, each stack-slot is marked as *out-of-scope*, unless no
 +//   lifetime intrinsic refers to that stack slot, in which case
 +//   it is marked as *in-scope*.
 +//   L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
 +//   the stack slot is overwritten with `undef`.
 +//   L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
 +//   L4) on function exit, all stack slots are marked as *out-of-scope*.
 +//   L5) `lifetime.end` is a no-op when called on a slot that is already
 +//   *out-of-scope*.
 +//   L6) memory accesses to *out-of-scope* stack slots are UB.
 +//   L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
 +//   are invalidated, unless the slot is "degenerate". This is used to
 +//   justify not marking slots as in-use until the pointer to them is
 +//   used, but feels a bit hacky in the presence of things like LICM. See
 +//   the "Degenerate Slots" section for more details.
 +//
 +// Now, let's ground stack coloring on these rules. We'll define a slot
 +// as *in-use* at a (dynamic) point in execution if it either can be
 +// written to at that point, or if it has a live and non-undef content
 +// at that point.
 +//
 +// Obviously, slots that are never *in-use* together can be merged, and
 +// in our example `foo`, the slots for `x`, `y` and `z` are never
 +// in-use together (of course, sometimes slots that *are* in-use together
 +// might still be mergable, but we don't care about that here).
 +//
 +// In this implementation, we successively merge pairs of slots that are
 +// not *in-use* together. We could be smarter - for example, we could merge
 +// a single large slot with 2 small slots, or we could construct the
 +// interference graph and run a "smart" graph coloring algorithm, but with
 +// that aside, how do we find out whether a pair of slots might be *in-use*
 +// together?
 +//
 +// From our rules, we see that *out-of-scope* slots are never *in-use*,
 +// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
 +// until their address is taken. Therefore, we can approximate slot activity
 +// using dataflow.
 +//
 +// A subtle point: naively, we might try to figure out which pairs of
 +// stack-slots interfere by propagating `S in-use` through the CFG for every
 +// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
 +// which they are both *in-use*.
 +//
 +// That is sound, but overly conservative in some cases: in our (artificial)
 +// example `foo`, either `x` or `y` might be in use at the label `B:`, but
 +// as `x` is only in use if we came in from the `var` edge and `y` only
 +// if we came from the `!var` edge, they still can't be in use together.
 +// See PR32488 for an important real-life case.
 +//
 +// If we wanted to find all points of interference precisely, we could
 +// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
 +// would be precise, but requires propagating `O(n^2)` dataflow facts.
 +//
 +// However, we aren't interested in the *set* of points of interference
 +// between 2 stack slots, only *whether* there *is* such a point. So we
 +// can rely on a little trick: for `S` and `T` to be in-use together,
 +// one of them needs to become in-use while the other is in-use (or
 +// they might both become in use simultaneously). We can check this
 +// by also keeping track of the points at which a stack slot might *start*
 +// being in-use.
 +//
 +// Exact first use:
 +// ----------------
 +//
 // Consider the following motivating example:
 //
 //     int foo() {
@@ -159,6 +283,9 @@
 // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
 // byte stack (better).
 //
 +// Degenerate Slots:
 +// -----------------
 +//
 // Relying entirely on first-use of stack slots is problematic,
 // however, due to the fact that optimizations can sometimes migrate
 // uses of a variable outside of its lifetime start/end region. Here
@@ -238,10 +365,6 @@
 // for "b" then it will appear that 'b' has a degenerate lifetime.
 //
 -//===----------------------------------------------------------------------===//
 -//                           StackColoring Pass
 -//===----------------------------------------------------------------------===//
 -
 namespace {
 /// StackColoring - A machine pass for merging disjoint stack allocations,
 /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
@@ -272,8 +395,11 @@
   /// Maps basic blocks to a serial number.
   SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
 -  /// Maps liveness intervals for each slot.
 +  /// Maps slots to their use interval. Outside of this interval, slots
 +  /// values are either dead or `undef` and they will not be written to.
   SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
 +  /// Maps slots to the points where they can become in-use.
 +  SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
   /// VNInfo is used for the construction of LiveIntervals.
   VNInfo::Allocator VNInfoAllocator;
   /// SlotIndex analysis object.
@@ -676,15 +802,22 @@
 void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
   SmallVector<SlotIndex, 16> Starts;
 -  SmallVector<SlotIndex, 16> Finishes;
 +  SmallVector<bool, 16> DefinitelyInUse;
   // For each block, find which slots are active within this block
   // and update the live intervals.
   for (const MachineBasicBlock &MBB : *MF) {
     Starts.clear();
     Starts.resize(NumSlots);
 -    Finishes.clear();
 -    Finishes.resize(NumSlots);
 +    DefinitelyInUse.clear();
 +    DefinitelyInUse.resize(NumSlots);
 +
 +    // Start the interval of the slots that we previously found to be 'in-use'.
 +    BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
 +    for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
 +         pos = MBBLiveness.LiveIn.find_next(pos)) {
 +      Starts[pos] = Indexes->getMBBStartIdx(&MBB);
 +    }
     // Create the interval for the basic blocks containing lifetime begin/end.
     for (const MachineInstr &MI : MBB) {
@@ -696,68 +829,35 @@
       SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
       for (auto Slot : slots) {
         if (IsStart) {
 -          if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
 +          // If a slot is already definitely in use, we don't have to emit
 +          // a new start marker because there is already a pre-existing
 +          // one.
 +          if (!DefinitelyInUse[Slot]) {
 +            LiveStarts[Slot].push_back(ThisIndex);
 +            DefinitelyInUse[Slot] = true;
 +          }
 +          if (!Starts[Slot].isValid())
             Starts[Slot] = ThisIndex;
         } else {
 -          if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
 -            Finishes[Slot] = ThisIndex;
 +          if (Starts[Slot].isValid()) {
 +            VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
 +            Intervals[Slot]->addSegment(
 +                LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
 +            Starts[Slot] = SlotIndex(); // Invalidate the start index
 +            DefinitelyInUse[Slot] = false;
 +          }
         }
       }
     }
 -    // Create the interval of the blocks that we previously found to be 'alive'.
 -    BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
 -    for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
 -         pos = MBBLiveness.LiveIn.find_next(pos)) {
 -      Starts[pos] = Indexes->getMBBStartIdx(&MBB);
 -    }
 -    for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
 -         pos = MBBLiveness.LiveOut.find_next(pos)) {
 -      Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
 -    }
 -
 +    // Finish up started segments
     for (unsigned i = 0; i < NumSlots; ++i) {
 -      //
 -      // When LifetimeStartOnFirstUse is turned on, data flow analysis
 -      // is forward (from starts to ends), not bidirectional. A
 -      // consequence of this is that we can wind up in situations
 -      // where Starts[i] is invalid but Finishes[i] is valid and vice
 -      // versa. Example:
 -      //
 -      //     LIFETIME_START x
 -      //     if (...) {
 -      //       <use of x>
 -      //       throw ...;
 -      //     }
 -      //     LIFETIME_END x
 -      //     return 2;
 -      //
 -      //
 -      // Here the slot for "x" will not be live into the block
 -      // containing the "return 2" (since lifetimes start with first
 -      // use, not at the dominating LIFETIME_START marker).
 -      //
 -      if (Starts[i].isValid() && !Finishes[i].isValid()) {
 -        Finishes[i] = Indexes->getMBBEndIdx(&MBB);
 -      }
       if (!Starts[i].isValid())
         continue;
 -      assert(Starts[i] && Finishes[i] && "Invalid interval");
 -      VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
 -      SlotIndex S = Starts[i];
 -      SlotIndex F = Finishes[i];
 -      if (S < F) {
 -        // We have a single consecutive region.
 -        Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
 -      } else {
 -        // We have two non-consecutive regions. This happens when
 -        // LIFETIME_START appears after the LIFETIME_END marker.
 -        SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
 -        SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
 -        Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
 -        Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
 -      }
 +      SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
 +      VNInfo *VNI = Intervals[i]->getValNumInfo(0);
 +      Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
     }
   }
 }
@@ -987,6 +1087,7 @@
   BasicBlockNumbering.clear();
   Markers.clear();
   Intervals.clear();
 +  LiveStarts.clear();
   VNInfoAllocator.Reset();
   unsigned NumSlots = MFI->getObjectIndexEnd();
@@ -998,6 +1099,7 @@
   SmallVector<int, 8> SortedSlots;
   SortedSlots.reserve(NumSlots);
   Intervals.reserve(NumSlots);
 +  LiveStarts.resize(NumSlots);
   unsigned NumMarkers = collectMarkers(NumSlots);
@@ -1069,6 +1171,9 @@
     return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
   });
 +  for (auto &s : LiveStarts)
 +    std::sort(s.begin(), s.end());
 +
   bool Changed = true;
   while (Changed) {
     Changed = false;
@@ -1084,12 +1189,22 @@
         int SecondSlot = SortedSlots[J];
         LiveInterval *First = &*Intervals[FirstSlot];
         LiveInterval *Second = &*Intervals[SecondSlot];
 +        auto &FirstS = LiveStarts[FirstSlot];
 +        auto &SecondS = LiveStarts[SecondSlot];
         assert (!First->empty() && !Second->empty() && "Found an empty range");
 -        // Merge disjoint slots.
 -        if (!First->overlaps(*Second)) {
 +        // Merge disjoint slots. This is a little bit tricky - see the
 +        // Implementation Notes section for an explanation.
 +        if (!First->isLiveAtIndexes(SecondS) &&
 +            !Second->isLiveAtIndexes(FirstS)) {
           Changed = true;
           First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
 +
 +          int OldSize = FirstS.size();
 +          FirstS.append(SecondS.begin(), SecondS.end());
 +          auto Mid = FirstS.begin() + OldSize;
 +          std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
 +
           SlotRemap[SecondSlot] = FirstSlot;
           SortedSlots[J] = -1;
           DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
 --- a/test/CodeGen/X86/StackColoring.ll
 +++ b/test/CodeGen/X86/StackColoring.ll
@@ -582,12 +582,76 @@
   ret i32 %x.addr.0
 }
 +;CHECK-LABEL: multi_segment:
 +;YESCOLOR: subq  $256, %rsp
 +;NOFIRSTUSE: subq  $256, %rsp
 +;NOCOLOR: subq  $512, %rsp
 +define i1 @multi_segment(i1, i1)
 +{
 +entry-block:
 +  %foo = alloca [32 x i64]
 +  %bar = alloca [32 x i64]
 +  %foo_i8 = bitcast [32 x i64]* %foo to i8*
 +  %bar_i8 = bitcast [32 x i64]* %bar to i8*
 +  call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
 +  call void @baz([32 x i64]* %bar, i32 1)
 +  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
 +  call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
 +  call void @baz([32 x i64]* %foo, i32 1)
 +  call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
 +  call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
 +  call void @baz([32 x i64]* %bar, i32 1)
 +  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
 +  ret i1 true
 +}
 +
 +;CHECK-LABEL: pr32488:
 +;YESCOLOR: subq  $256, %rsp
 +;NOFIRSTUSE: subq  $256, %rsp
 +;NOCOLOR: subq  $512, %rsp
 +define i1 @pr32488(i1, i1)
 +{
 +entry-block:
 +  %foo = alloca [32 x i64]
 +  %bar = alloca [32 x i64]
 +  %foo_i8 = bitcast [32 x i64]* %foo to i8*
 +  %bar_i8 = bitcast [32 x i64]* %bar to i8*
 +  br i1 %0, label %if_false, label %if_true
 +if_false:
 +  call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
 +  call void @baz([32 x i64]* %bar, i32 0)
 +  br i1 %1, label %if_false.1, label %onerr
 +if_false.1:
 +  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
 +  br label %merge
 +if_true:
 +  call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
 +  call void @baz([32 x i64]* %foo, i32 1)
 +  br i1 %1, label %if_true.1, label %onerr
 +if_true.1:
 +  call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
 +  br label %merge
 +merge:
 +  ret i1 false
 +onerr:
 +  call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
 +  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
 +  call void @destructor()
 +  ret i1 true
 +}
 +
 +%Data = type { [32 x i64] }
 +
 +declare void @destructor()
 +
 declare void @inita(i32*)
 declare void @initb(i32*,i32*,i32*)
 declare void @bar([100 x i32]* , [100 x i32]*) nounwind
 +declare void @baz([32 x i64]*, i32)
 +
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
--- a/debian/patches/rL306267.diff
+++ b/debian/patches/rL306267.diff
@ -0,0 +1,187 @@
 commit 5a057dc8edbb63887f8c611dd8ddf1b76997f07c
 Author: Chandler Carruth <chandlerc@gmail.com>
 Date:   Mon Jun 26 03:31:31 2017 +0000
    [InstCombine] Factor the logic for propagating !nonnull and !range
    metadata out of InstCombine and into helpers.
    NFC, this just exposes the logic used by InstCombine when propagating
    metadata from one load instruction to another. The plan is to use this
    in SROA to address PR32902.
    If anyone has better ideas about how to factor this or name variables,
    I'm all ears, but this seemed like a pretty good start and lets us make
    progress on the PR.
    This is based on a patch by Ariel Ben-Yehuda (D34285).
    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306267 91177308-0d34-0410-b5e6-96231b3b80d8
 --- a/include/llvm/Transforms/Utils/Local.h
 +++ b/include/llvm/Transforms/Utils/Local.h
@@ -366,6 +366,19 @@
 /// during lowering by the GC infrastructure.
 bool callsGCLeafFunction(ImmutableCallSite CS);
 +/// Copy a nonnull metadata node to a new load instruction.
 +///
 +/// This handles mapping it to range metadata if the new load is an integer
 +/// load instead of a pointer load.
 +void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI);
 +
 +/// Copy a range metadata node to a new load instruction.
 +///
 +/// This handles mapping it to nonnull metadata if the new load is a pointer
 +/// load instead of an integer load and the range doesn't cover null.
 +void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N,
 +                       LoadInst &NewLI);
 +
 //===----------------------------------------------------------------------===//
 //  Intrinsic pattern matching
 //
 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
 +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -471,21 +471,7 @@
       break;
     case LLVMContext::MD_nonnull:
 -      // This only directly applies if the new type is also a pointer.
 -      if (NewTy->isPointerTy()) {
 -        NewLoad->setMetadata(ID, N);
 -        break;
 -      }
 -      // If it's integral now, translate it to !range metadata.
 -      if (NewTy->isIntegerTy()) {
 -        auto *ITy = cast<IntegerType>(NewTy);
 -        auto *NullInt = ConstantExpr::getPtrToInt(
 -            ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
 -        auto *NonNullInt =
 -            ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
 -        NewLoad->setMetadata(LLVMContext::MD_range,
 -                             MDB.createRange(NonNullInt, NullInt));
 -      }
 +      copyNonnullMetadata(LI, N, *NewLoad);
       break;
     case LLVMContext::MD_align:
     case LLVMContext::MD_dereferenceable:
@@ -495,17 +481,7 @@
         NewLoad->setMetadata(ID, N);
       break;
     case LLVMContext::MD_range:
 -      // FIXME: It would be nice to propagate this in some way, but the type
 -      // conversions make it hard.
 -
 -      // If it's a pointer now and the range does not contain 0, make it !nonnull.
 -      if (NewTy->isPointerTy()) {
 -        unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy);
 -        if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
 -          MDNode *NN = MDNode::get(LI.getContext(), None);
 -          NewLoad->setMetadata(LLVMContext::MD_nonnull, NN);
 -        }
 -      }
 +      copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
       break;
     }
   }
 --- a/lib/Transforms/Utils/Local.cpp
 +++ b/lib/Transforms/Utils/Local.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 +#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
@@ -1069,7 +1070,7 @@
 }
 /// See if there is a dbg.value intrinsic for DIVar for the PHI node.
 -static bool PhiHasDebugValue(DILocalVariable *DIVar, 
 +static bool PhiHasDebugValue(DILocalVariable *DIVar,
                              DIExpression *DIExpr,
                              PHINode *APN) {
   // Since we can't guarantee that the original dbg.declare instrinsic
@@ -1152,7 +1153,7 @@
   DbgValue->insertAfter(LI);
 }
 -/// Inserts a llvm.dbg.value intrinsic after a phi 
 +/// Inserts a llvm.dbg.value intrinsic after a phi
 /// that has an associated llvm.dbg.decl intrinsic.
 void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            PHINode *APN, DIBuilder &Builder) {
@@ -1723,12 +1724,12 @@
         // Preserve !invariant.group in K.
         break;
       case LLVMContext::MD_align:
 -        K->setMetadata(Kind, 
 +        K->setMetadata(Kind,
           MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
         break;
       case LLVMContext::MD_dereferenceable:
       case LLVMContext::MD_dereferenceable_or_null:
 -        K->setMetadata(Kind, 
 +        K->setMetadata(Kind,
           MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
         break;
     }
@@ -1812,6 +1813,49 @@
   return false;
 }
 +void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
 +                               LoadInst &NewLI) {
 +  auto *NewTy = NewLI.getType();
 +
 +  // This only directly applies if the new type is also a pointer.
 +  if (NewTy->isPointerTy()) {
 +    NewLI.setMetadata(LLVMContext::MD_nonnull, N);
 +    return;
 +  }
 +
 +  // The only other translation we can do is to integral loads with !range
 +  // metadata.
 +  if (!NewTy->isIntegerTy())
 +    return;
 +
 +  MDBuilder MDB(NewLI.getContext());
 +  const Value *Ptr = OldLI.getPointerOperand();
 +  auto *ITy = cast<IntegerType>(NewTy);
 +  auto *NullInt = ConstantExpr::getPtrToInt(
 +      ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
 +  auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
 +  NewLI.setMetadata(LLVMContext::MD_range,
 +                    MDB.createRange(NonNullInt, NullInt));
 +}
 +
 +void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
 +                             MDNode *N, LoadInst &NewLI) {
 +  auto *NewTy = NewLI.getType();
 +
 +  // Give up unless it is converted to a pointer where there is a single very
 +  // valuable mapping we can do reliably.
 +  // FIXME: It would be nice to propagate this in more ways, but the type
 +  // conversions make it hard.
 +  if (!NewTy->isPointerTy())
 +    return;
 +
 +  unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
 +  if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
 +    MDNode *NN = MDNode::get(OldLI.getContext(), None);
 +    NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
 +  }
 +}
 +
 namespace {
 /// A potential constituent of a bitreverse or bswap expression. See
 /// collectBitParts for a fuller explanation.
@@ -1933,7 +1977,7 @@
       unsigned NumMaskedBits = AndMask.countPopulation();
       if (!MatchBitReversals && NumMaskedBits % 8 != 0)
         return Result;
 -      
 +
       auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
                                   MatchBitReversals, BPS);
       if (!Res)
--- a/debian/patches/rL306353.diff
+++ b/debian/patches/rL306353.diff
@ -0,0 +1,47 @@
 commit 477bd758b48cb96477d3dd4cf3b36bf2706e8c1d
 Author: Chandler Carruth <chandlerc@gmail.com>
 Date:   Tue Jun 27 02:23:15 2017 +0000
    [SROA] Clean up a test case a bit prior to adding more testing for
    nonnull as part of fixing PR32902.
    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306353 91177308-0d34-0410-b5e6-96231b3b80d8
 --- a/test/Transforms/SROA/preserve-nonnull.ll
 +++ b/test/Transforms/SROA/preserve-nonnull.ll
@@ -3,22 +3,20 @@
 ; Make sure that SROA doesn't lose nonnull metadata
 ; on loads from allocas that get optimized out.
 -; CHECK-LABEL: define float* @yummy_nonnull
 -; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
 -; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
 -; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
 -; CHECK: ret float* {{.*}}[[RETURN]]
 -
 define float* @yummy_nonnull(float** %arg) {
 -entry-block:
 -	%buf = alloca float*
 -
 -	%_arg_i8 = bitcast float** %arg to i8*
 -	%_buf_i8 = bitcast float** %buf to i8*
 -	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
 -
 -	%ret = load float*, float** %buf, align 8, !nonnull !0
 -	ret float* %ret
 +; CHECK-LABEL: define float* @yummy_nonnull(
 +; CHECK-NEXT:  entry:
 +; CHECK-NEXT:    %[[RETURN:.*]] = load float*, float** %arg, align 8
 +; CHECK-NEXT:    %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
 +; CHECK-NEXT:    call void @llvm.assume(i1 %[[ASSUME]])
 +; CHECK-NEXT:    ret float* %[[RETURN]]
 +entry:
 +  %buf = alloca float*
 +  %_arg_i8 = bitcast float** %arg to i8*
 +  %_buf_i8 = bitcast float** %buf to i8*
 +  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
 +  %ret = load float*, float** %buf, align 8, !nonnull !0
 +  ret float* %ret
 }
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
--- a/debian/patches/rL306358.diff
+++ b/debian/patches/rL306358.diff
@ -0,0 +1,52 @@
 commit 156cc49e505986a1659adaa3a0b5a070372377c8
 Author: Chandler Carruth <chandlerc@gmail.com>
 Date:   Tue Jun 27 03:08:45 2017 +0000
    [SROA] Further test cleanup and add a test for the actual propagation of
    the nonnull attribute distinct from rewriting it into an assume.
    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306358 91177308-0d34-0410-b5e6-96231b3b80d8
 --- a/test/Transforms/SROA/preserve-nonnull.ll
 +++ b/test/Transforms/SROA/preserve-nonnull.ll
@@ -3,8 +3,31 @@
 ; Make sure that SROA doesn't lose nonnull metadata
 ; on loads from allocas that get optimized out.
 -define float* @yummy_nonnull(float** %arg) {
 -; CHECK-LABEL: define float* @yummy_nonnull(
 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
 +
 +; Check that we do basic propagation of nonnull when rewriting.
 +define i8* @propagate_nonnull(i32* %v) {
 +; CHECK-LABEL: define i8* @propagate_nonnull(
 +; CHECK-NEXT:  entry:
 +; CHECK-NEXT:    %[[A:.*]] = alloca i8*
 +; CHECK-NEXT:    %[[V_CAST:.*]] = bitcast i32* %v to i8*
 +; CHECK-NEXT:    store i8* %[[V_CAST]], i8** %[[A]]
 +; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0
 +; CHECK-NEXT:    ret i8* %[[LOAD]]
 +entry:
 +  %a = alloca [2 x i8*]
 +  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
 +  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
 +  %a.gep0.cast = bitcast i8** %a.gep0 to i32**
 +  %a.gep1.cast = bitcast i8** %a.gep1 to i32**
 +  store i32* %v, i32** %a.gep1.cast
 +  store i32* null, i32** %a.gep0.cast
 +  %load = load volatile i8*, i8** %a.gep1, !nonnull !0
 +  ret i8* %load
 +}
 +
 +define float* @turn_nonnull_into_assume(float** %arg) {
 +; CHECK-LABEL: define float* @turn_nonnull_into_assume(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    %[[RETURN:.*]] = load float*, float** %arg, align 8
 ; CHECK-NEXT:    %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
@@ -19,6 +42,4 @@
   ret float* %ret
 }
 -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
 -
 !0 = !{}
--- a/debian/patches/rL306379.diff
+++ b/debian/patches/rL306379.diff
@ -0,0 +1,147 @@
 commit 7df06519765b14e1b08d7034c82c45a0a653eb25
 Author: Chandler Carruth <chandlerc@gmail.com>
 Date:   Tue Jun 27 08:32:03 2017 +0000
    [SROA] Fix PR32902 by more carefully propagating !nonnull metadata.
    This is based heavily on the work done ni D34285. I mostly wanted to do
    test cleanup for the author to save them some time, but I had a really
    hard time understanding why it was so hard to write better test cases
    for these issues.
    The problem is that because SROA does a second rewrite of the loads and
    because we *don't* propagate !nonnull for non-pointer loads, we first
    introduced invalid !nonnull metadata and then stripped it back off just
    in time to avoid most ways of this PR manifesting. Moving to the more
    careful utility only fixes this by changing the predicate to look at the
    new load's type rather than the target type. However, that *does* fix
    the bug, and the utility is much nicer including adding range metadata
    to model the nonnull property after a conversion to an integer.
    However, we have bigger problems because we don't actually propagate
    *range* metadata, and the utility to do this extracted from instcombine
    isn't really in good shape to do this currently. It *only* handles the
    case of copying range metadata from an integer load to a pointer load.
    It doesn't even handle the trivial cases of propagating from one integer
    load to another when they are the same width! This utility will need to
    be beefed up prior to using in this location to get the metadata to
    fully survive.
    And even then, we need to go and teach things to turn the range metadata
    into an assume the way we do with nonnull so that when we *promote* an
    integer we don't lose the information.
    All of this will require a new test case that looks kind-of like
    `preserve-nonnull.ll` does here but focuses on range metadata. It will
    also likely require more testing because it needs to correctly handle
    changes to the integer width, especially as SROA actively tries to
    change the integer width!
    Last but not least, I'm a little worried about hooking the range
    metadata up here because the instcombine logic for converting from
    a range metadata *to* a nonnull metadata node seems broken in the face
    of non-zero address spaces where null is not mapped to the integer `0`.
    So that probably needs to get fixed with test cases both in SROA and in
    instcombine to cover it.
    But this *does* extract the core PR fix from D34285 of preventing the
    !nonnull metadata from being propagated in a broken state just long
    enough to feed into promotion and crash value tracking.
    On D34285 there is some discussion of zero-extend handling because it
    isn't necessary. First, the new load size covers all of the non-undef
    (ie, possibly initialized) bits. This may even extend past the original
    alloca if loading those bits could produce valid data. The only way its
    valid for us to zero-extend an integer load in SROA is if the original
    code had a zero extend or those bits were undef. And we get to assume
    things like undef *never* satifies nonnull, so non undef bits can
    participate here. No need to special case the zero-extend handling, it
    just falls out correctly.
    The original credit goes to Ariel Ben-Yehuda! I'm mostly landing this to
    save a few rounds of trivial edits fixing style issues and test case
    formulation.
    Differental Revision: D34285
    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306379 91177308-0d34-0410-b5e6-96231b3b80d8
 --- a/lib/Transforms/Scalar/SROA.cpp
 +++ b/lib/Transforms/Scalar/SROA.cpp
@@ -2388,9 +2388,20 @@
       if (LI.isVolatile())
         NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
 +      // Any !nonnull metadata or !range metadata on the old load is also valid
 +      // on the new load. This is even true in some cases even when the loads
 +      // are different types, for example by mapping !nonnull metadata to
 +      // !range metadata by modeling the null pointer constant converted to the
 +      // integer type.
 +      // FIXME: Add support for range metadata here. Currently the utilities
 +      // for this don't propagate range metadata in trivial cases from one
 +      // integer load to another, don't handle non-addrspace-0 null pointers
 +      // correctly, and don't have any support for mapping ranges as the
 +      // integer type becomes winder or narrower.
 +      if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
 +        copyNonnullMetadata(LI, N, *NewLI);
 +
       // Try to preserve nonnull metadata
 -      if (TargetTy->isPointerTy())
 -        NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
       V = NewLI;
       // If this is an integer load past the end of the slice (which means the
 --- a/test/Transforms/SROA/preserve-nonnull.ll
 +++ b/test/Transforms/SROA/preserve-nonnull.ll
@@ -42,4 +42,51 @@
   ret float* %ret
 }
 +; Make sure we properly handle the !nonnull attribute when we convert
 +; a pointer load to an integer load.
 +; FIXME: While this doesn't do anythnig actively harmful today, it really
 +; should propagate the !nonnull metadata to range metadata. The irony is, it
 +; *does* initially, but then we lose that !range metadata before we finish
 +; SROA.
 +define i8* @propagate_nonnull_to_int() {
 +; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
 +; CHECK-NEXT:  entry:
 +; CHECK-NEXT:    %[[A:.*]] = alloca i64
 +; CHECK-NEXT:    store i64 42, i64* %[[A]]
 +; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
 +; CHECK-NEXT:    %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
 +; CHECK-NEXT:    ret i8* %[[CAST]]
 +entry:
 +  %a = alloca [2 x i8*]
 +  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
 +  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
 +  %a.gep0.cast = bitcast i8** %a.gep0 to i64*
 +  %a.gep1.cast = bitcast i8** %a.gep1 to i64*
 +  store i64 42, i64* %a.gep1.cast
 +  store i64 0, i64* %a.gep0.cast
 +  %load = load volatile i8*, i8** %a.gep1, !nonnull !0
 +  ret i8* %load
 +}
 +
 +; Make sure we properly handle the !nonnull attribute when we convert
 +; a pointer load to an integer load and immediately promote it to an SSA
 +; register. This can fail in interesting ways due to the rewrite iteration of
 +; SROA, resulting in PR32902.
 +define i8* @propagate_nonnull_to_int_and_promote() {
 +; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
 +; CHECK-NEXT:  entry:
 +; CHECK-NEXT:    %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
 +; CHECK-NEXT:    ret i8* %[[PROMOTED_VALUE]]
 +entry:
 +  %a = alloca [2 x i8*], align 8
 +  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
 +  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
 +  %a.gep0.cast = bitcast i8** %a.gep0 to i64*
 +  %a.gep1.cast = bitcast i8** %a.gep1 to i64*
 +  store i64 42, i64* %a.gep1.cast
 +  store i64 0, i64* %a.gep0.cast
 +  %load = load i8*, i8** %a.gep1, align 8, !nonnull !0
 +  ret i8* %load
 +}
 +
 !0 = !{}
--- a/debian/patches/series
+++ b/debian/patches/series
@ -38,7 +38,6 @@ disable-llvm-symbolizer-test.diff
 clang-tidy-run-bin.diff
 #bug-30342.diff
 fix-scan-view-path.diff
 #0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
 clang-fix-cmpxchg8-detection-on-i386.patch
 lldb-addversion-suffix-to-llvm-server-exec.patch
 lldb-missing-install.diff
@ -49,3 +48,13 @@ add_symbols_versioning.patch
 ftfbs-gcc.diff
 pr81066.diff
 armhf-bitfield.diff
 # rust LLVM PR84, LLVM PR32488
 # This is actually Rust's backport of upstream RL305193 (which doesn't apply cleanly to LLVM 4)
 # https://github.com/rust-lang/llvm/commit/2b622a393ce
 rL305193-backport.diff
 # rust LLVM PR90, LLVM PR32902, PR31142
 rL298540.diff
 rL306267.diff
 rL306353.diff
 rL306358.diff
 rL306379.diff