* Non-maintainer upload.

* Backport some patches (originally from rust, and upstreamed) to fix two failing tests in rustc.
2025-07-27 11:00:08 +00:00 · 2017-10-18 14:13:52 +00:00 · 2017-10-18 14:13:52 +00:00 · be94a5a691
commit be94a5a691
parent e784e58e94
9 changed files with 1185 additions and 85 deletions
--- a/debian/changelog
+++ b/debian/changelog
@ -1,3 +1,11 @@
+llvm-toolchain-4.0 (1:4.0.1-7.1) UNRELEASED; urgency=medium
+
+  * Non-maintainer upload.
+  * Backport some patches (originally from rust, and upstreamed) to fix two
+    failing tests in rustc.
+
+ -- Ximin Luo <infinity0@debian.org>  Wed, 18 Oct 2017 15:28:20 +0200
+
 llvm-toolchain-4.0 (1:4.0.1-7) unstable; urgency=medium

  * Force the deactivation of ocaml until the transition is done
--- a/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
+++ b/debian/patches/0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
@ -1,84 +0,0 @@
-From eee68eafa7e8e4ce996b49f5551636639a6c331a Mon Sep 17 00:00:00 2001
-From: David Majnemer <david.majnemer@gmail.com>
-Date: Mon, 29 Aug 2016 17:14:08 +0000
-Subject: [PATCH 11/17] [SimplifyCFG] Hoisting invalidates metadata
-
-We forgot to remove optimization metadata when performing hosting during
-FoldTwoEntryPHINode.
-
-This fixes PR29163.
-
-git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@279980 91177308-0d34-0410-b5e6-96231b3b80d8
---
- lib/Transforms/Utils/SimplifyCFG.cpp   | 10 ++++++++--
- test/Transforms/SimplifyCFG/PR29163.ll | 31 +++++++++++++++++++++++++++++++
- 2 files changed, 39 insertions(+), 2 deletions(-)
- create mode 100644 test/Transforms/SimplifyCFG/PR29163.ll
-
-diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
-index 0504646..c197317 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
-+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
-@@ -2024,14 +2024,20 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
- 
-   // Move all 'aggressive' instructions, which are defined in the
-   // conditional parts of the if's up to the dominating block.
-  if (IfBlock1)
-+  if (IfBlock1) {
-+    for (auto &I : *IfBlock1)
-+      I.dropUnknownNonDebugMetadata();
-     DomBlock->getInstList().splice(InsertPt->getIterator(),
-                                    IfBlock1->getInstList(), IfBlock1->begin(),
-                                    IfBlock1->getTerminator()->getIterator());
-  if (IfBlock2)
-+  }
-+  if (IfBlock2) {
-+    for (auto &I : *IfBlock2)
-+      I.dropUnknownNonDebugMetadata();
-     DomBlock->getInstList().splice(InsertPt->getIterator(),
-                                    IfBlock2->getInstList(), IfBlock2->begin(),
-                                    IfBlock2->getTerminator()->getIterator());
-+  }
- 
-   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
-     // Change the PHI node into a select instruction.
-diff --git a/test/Transforms/SimplifyCFG/PR29163.ll b/test/Transforms/SimplifyCFG/PR29163.ll
-new file mode 100644
-index 0000000..65f9090
--- /dev/null
-+++ b/test/Transforms/SimplifyCFG/PR29163.ll
-@@ -0,0 +1,31 @@
-+; RUN: opt -S -simplifycfg < %s | FileCheck %s
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-+target triple = "x86_64-unknown-linux-gnu"
-+
-+@GV = external constant i64*
-+
-+define i64* @test1(i1 %cond, i8* %P) {
-+entry:
-+  br i1 %cond, label %if, label %then
-+
-+then:
-+  %bc = bitcast i8* %P to i64*
-+  br label %join
-+
-+if:
-+  %load = load i64*, i64** @GV, align 8, !dereferenceable !0
-+  br label %join
-+
-+join:
-+  %phi = phi i64* [ %bc, %then ], [ %load, %if ]
-+  ret i64* %phi
-+}
-+
-+; CHECK-LABEL: define i64* @test1(
-+; CHECK: %[[bc:.*]] = bitcast i8* %P to i64*
-+; CHECK: %[[load:.*]] = load i64*, i64** @GV, align 8{{$}}
-+; CHECK: %[[phi:.*]] = select i1 %cond, i64* %[[load]], i64* %[[bc]]
-+; CHECK: ret i64* %[[phi]]
-+
-+
-+!0 = !{i64 8}
-- 
-2.10.1
-
--- a/debian/patches/rL298540.diff
+++ b/debian/patches/rL298540.diff
@ -0,0 +1,292 @@
+commit 2b66aeed20b61d42cd2782b94d5419c6faca49a1
+Author: Luqman Aden <me@luqman.ca>
+Date:   Wed Mar 22 19:16:39 2017 +0000
+
+    Preserve nonnull metadata on Loads through SROA & mem2reg.
+    
+    Summary:
+    https://llvm.org/bugs/show_bug.cgi?id=31142 :
+    
+    SROA was dropping the nonnull metadata on loads from allocas that got optimized out. This patch simply preserves nonnull metadata on loads through SROA and mem2reg.
+    
+    Reviewers: chandlerc, efriedma
+    
+    Reviewed By: efriedma
+    
+    Subscribers: hfinkel, spatel, efriedma, arielb1, davide, llvm-commits
+    
+    Differential Revision: https://reviews.llvm.org/D27114
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298540 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
+@@ -2387,6 +2387,10 @@
+                                               LI.isVolatile(), LI.getName());
+       if (LI.isVolatile())
+         NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+
+      // Try to preserve nonnull metadata
+      if (TargetTy->isPointerTy())
+        NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
+       V = NewLI;
+ 
+       // If this is an integer load past the end of the slice (which means the
+--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+@@ -15,7 +15,6 @@
+ //
+ //===----------------------------------------------------------------------===//
+ 
+-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+ #include "llvm/ADT/ArrayRef.h"
+ #include "llvm/ADT/DenseMap.h"
+ #include "llvm/ADT/STLExtras.h"
+@@ -23,6 +22,7 @@
+ #include "llvm/ADT/SmallVector.h"
+ #include "llvm/ADT/Statistic.h"
+ #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AssumptionCache.h"
+ #include "llvm/Analysis/InstructionSimplify.h"
+ #include "llvm/Analysis/IteratedDominanceFrontier.h"
+ #include "llvm/Analysis/ValueTracking.h"
+@@ -38,6 +38,7 @@
+ #include "llvm/IR/Metadata.h"
+ #include "llvm/IR/Module.h"
+ #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+ #include <algorithm>
+ using namespace llvm;
+ 
+@@ -301,6 +302,18 @@
+ 
+ } // end of anonymous namespace
+ 
+/// Given a LoadInst LI this adds assume(LI != null) after it.
+static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
+  Function *AssumeIntrinsic =
+      Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
+  ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
+                                       Constant::getNullValue(LI->getType()));
+  LoadNotNull->insertAfter(LI);
+  CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
+  CI->insertAfter(LoadNotNull);
+  AC->registerAssumption(CI);
+}
+
+ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+   // Knowing that this alloca is promotable, we know that it's safe to kill all
+   // instructions except for load and store.
+@@ -334,9 +347,9 @@
+ /// and thus must be phi-ed with undef. We fall back to the standard alloca
+ /// promotion algorithm in that case.
+ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+-                                     LargeBlockInfo &LBI,
+-                                     DominatorTree &DT,
+-                                     AliasSetTracker *AST) {
+                                     LargeBlockInfo &LBI, DominatorTree &DT,
+                                     AliasSetTracker *AST,
+                                     AssumptionCache *AC) {
+   StoreInst *OnlyStore = Info.OnlyStore;
+   bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+   BasicBlock *StoreBB = OnlyStore->getParent();
+@@ -387,6 +400,14 @@
+     // code.
+     if (ReplVal == LI)
+       ReplVal = UndefValue::get(LI->getType());
+
+    // If the load was marked as nonnull we don't want to lose
+    // that information when we erase this Load. So we preserve
+    // it with an assume.
+    if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+        !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+      addAssumeNonNull(AC, LI);
+
+     LI->replaceAllUsesWith(ReplVal);
+     if (AST && LI->getType()->isPointerTy())
+       AST->deleteValue(LI);
+@@ -435,7 +456,9 @@
+ ///  }
+ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+                                      LargeBlockInfo &LBI,
+-                                     AliasSetTracker *AST) {
+                                     AliasSetTracker *AST,
+                                     DominatorTree &DT,
+                                     AssumptionCache *AC) {
+   // The trickiest case to handle is when we have large blocks. Because of this,
+   // this code is optimized assuming that large blocks happen.  This does not
+   // significantly pessimize the small block case.  This uses LargeBlockInfo to
+@@ -476,10 +499,17 @@
+         // There is no store before this load, bail out (load may be affected
+         // by the following stores - see main comment).
+         return false;
+-    }
+-    else
+    } else {
+       // Otherwise, there was a store before this load, the load takes its value.
+-      LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
+      // Note, if the load was marked as nonnull we don't want to lose that
+      // information when we erase it. So we preserve it with an assume.
+      Value *ReplVal = std::prev(I)->second->getOperand(0);
+      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+          !llvm::isKnownNonNullAt(ReplVal, LI, &DT))
+        addAssumeNonNull(AC, LI);
+
+      LI->replaceAllUsesWith(ReplVal);
+    }
+ 
+     if (AST && LI->getType()->isPointerTy())
+       AST->deleteValue(LI);
+@@ -553,7 +583,7 @@
+     // If there is only a single store to this value, replace any loads of
+     // it that are directly dominated by the definition with the value stored.
+     if (Info.DefiningBlocks.size() == 1) {
+-      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
+      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST, AC)) {
+         // The alloca has been processed, move on.
+         RemoveFromAllocasList(AllocaNum);
+         ++NumSingleStore;
+@@ -564,7 +594,7 @@
+     // If the alloca is only read and written in one basic block, just perform a
+     // linear sweep over the block to eliminate it.
+     if (Info.OnlyUsedInOneBlock &&
+-        promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
+        promoteSingleBlockAlloca(AI, Info, LBI, AST, DT, AC)) {
+       // The alloca has been processed, move on.
+       RemoveFromAllocasList(AllocaNum);
+       continue;
+@@ -940,6 +970,13 @@
+ 
+       Value *V = IncomingVals[AI->second];
+ 
+      // If the load was marked as nonnull we don't want to lose
+      // that information when we erase this Load. So we preserve
+      // it with an assume.
+      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
+          !llvm::isKnownNonNullAt(V, LI, &DT))
+        addAssumeNonNull(AC, LI);
+
+       // Anything using the load now uses the current value.
+       LI->replaceAllUsesWith(V);
+       if (AST && LI->getType()->isPointerTy())
+--- /dev/null
+++ b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll
+@@ -0,0 +1,89 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+; This tests that mem2reg preserves the !nonnull metadata on loads
+; from allocas that get optimized out.
+
+; Check the case where the alloca in question has a single store.
+define float* @single_store(float** %arg) {
+; CHECK-LABEL: define float* @single_store
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* %arg.load
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float **%buf, !nonnull !0
+  ret float* %buf.load
+}
+
+; Check the case where the alloca in question has more than one
+; store but still within one basic block.
+define float* @single_block(float** %arg) {
+; CHECK-LABEL: define float* @single_block
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* %arg.load
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  store float* null, float** %buf, align 8
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float **%buf, !nonnull !0
+  ret float* %buf.load
+}
+
+; Check the case where the alloca in question has more than one
+; store and also reads ands writes in multiple blocks.
+define float* @multi_block(float** %arg) {
+; CHECK-LABEL: define float* @multi_block
+; CHECK-LABEL: entry:
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: br label %next
+; CHECK-LABEL: next:
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* %arg.load
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  store float* null, float** %buf, align 8
+  br label %next
+next:
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float** %buf, !nonnull !0
+  ret float* %buf.load
+}
+
+; Check that we don't add an assume if it's not
+; necessary i.e. the value is already implied to be nonnull
+define float* @no_assume(float** %arg) {
+; CHECK-LABEL: define float* @no_assume
+; CHECK-LABEL: entry:
+; CHECK: %arg.load = load float*, float** %arg, align 8
+; CHECK: %cn = icmp ne float* %arg.load, null
+; CHECK: br i1 %cn, label %next, label %fin
+; CHECK-LABEL: next:
+; CHECK-NOT: call void @llvm.assume
+; CHECK: ret float* %arg.load
+; CHECK-LABEL: fin:
+; CHECK: ret float* null
+entry:
+  %buf = alloca float*
+  %arg.load = load float*, float** %arg, align 8
+  %cn = icmp ne float* %arg.load, null
+  br i1 %cn, label %next, label %fin
+next:
+; At this point the above nonnull check ensures that
+; the value %arg.load is nonnull in this block and thus
+; we need not add the assume.
+  store float* %arg.load, float** %buf, align 8
+  %buf.load = load float*, float** %buf, !nonnull !0
+  ret float* %buf.load
+fin:
+  ret float* null
+}
+
+!0 = !{}
+--- /dev/null
+++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -0,0 +1,26 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+;
+; Make sure that SROA doesn't lose nonnull metadata
+; on loads from allocas that get optimized out.
+
+; CHECK-LABEL: define float* @yummy_nonnull
+; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
+; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
+; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+; CHECK: ret float* {{.*}}[[RETURN]]
+
+define float* @yummy_nonnull(float** %arg) {
+entry-block:
+	%buf = alloca float*
+
+	%_arg_i8 = bitcast float** %arg to i8*
+	%_buf_i8 = bitcast float** %buf to i8*
+	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
+
+	%ret = load float*, float** %buf, align 8, !nonnull !0
+	ret float* %ret
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+
+!0 = !{}
--- a/debian/patches/rL305193-backport.diff
+++ b/debian/patches/rL305193-backport.diff
@ -0,0 +1,442 @@
+commit 2b622a393ce80c6157d32a50bf67d6b830729469
+Author: Than McIntosh <thanm@google.com>
+Date:   Mon Jun 12 14:56:02 2017 +0000
+
+    StackColoring: smarter check for slot overlap
+    
+    Summary:
+    The old check for slot overlap treated 2 slots `S` and `T` as
+    overlapping if there existed a CFG node in which both of the slots could
+    possibly be active. That is overly conservative and caused stack blowups
+    in Rust programs. Instead, check whether there is a single CFG node in
+    which both of the slots are possibly active *together*.
+    
+    Fixes PR32488.
+    
+    Patch by Ariel Ben-Yehuda <ariel.byd@gmail.com>
+    
+    Reviewers: thanm, nagisa, llvm-commits, efriedma, rnk
+    
+    Reviewed By: thanm
+    
+    Subscribers: dotdash
+    
+    Differential Revision: https://reviews.llvm.org/D31583
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305193 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
+@@ -87,10 +87,134 @@
+ STATISTIC(StackSlotMerged, "Number of stack slot merged.");
+ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
+ 
+//===----------------------------------------------------------------------===//
+//                           StackColoring Pass
+//===----------------------------------------------------------------------===//
+//
+// Stack Coloring reduces stack usage by merging stack slots when they
+// can't be used together. For example, consider the following C program:
+//
+//     void bar(char *, int);
+//     void foo(bool var) {
+//         A: {
+//             char z[4096];
+//             bar(z, 0);
+//         }
+//
+//         char *p;
+//         char x[4096];
+//         char y[4096];
+//         if (var) {
+//             p = x;
+//         } else {
+//             bar(y, 1);
+//             p = y + 1024;
+//         }
+//     B:
+//         bar(p, 2);
+//     }
+//
+// Naively-compiled, this program would use 12k of stack space. However, the
+// stack slot corresponding to `z` is always destroyed before either of the
+// stack slots for `x` or `y` are used, and then `x` is only used if `var`
+// is true, while `y` is only used if `var` is false. So in no time are 2
+// of the stack slots used together, and therefore we can merge them,
+// compiling the function using only a single 4k alloca:
+//
+//     void foo(bool var) { // equivalent
+//         char x[4096];
+//         char *p;
+//         bar(x, 0);
+//         if (var) {
+//             p = x;
+//         } else {
+//             bar(x, 1);
+//             p = x + 1024;
+//         }
+//         bar(p, 2);
+//     }
+//
+// This is an important optimization if we want stack space to be under
+// control in large functions, both open-coded ones and ones created by
+// inlining.
+ //
+ // Implementation Notes:
+ // ---------------------
+ //
+// An important part of the above reasoning is that `z` can't be accessed
+// while the latter 2 calls to `bar` are running. This is justified because
+// `z`'s lifetime is over after we exit from block `A:`, so any further
+// accesses to it would be UB. The way we represent this information
+// in LLVM is by having frontends delimit blocks with `lifetime.start`
+// and `lifetime.end` intrinsics.
+//
+// The effect of these intrinsics seems to be as follows (maybe I should
+// specify this in the reference?):
+//
+//   L1) at start, each stack-slot is marked as *out-of-scope*, unless no
+//   lifetime intrinsic refers to that stack slot, in which case
+//   it is marked as *in-scope*.
+//   L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
+//   the stack slot is overwritten with `undef`.
+//   L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
+//   L4) on function exit, all stack slots are marked as *out-of-scope*.
+//   L5) `lifetime.end` is a no-op when called on a slot that is already
+//   *out-of-scope*.
+//   L6) memory accesses to *out-of-scope* stack slots are UB.
+//   L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
+//   are invalidated, unless the slot is "degenerate". This is used to
+//   justify not marking slots as in-use until the pointer to them is
+//   used, but feels a bit hacky in the presence of things like LICM. See
+//   the "Degenerate Slots" section for more details.
+//
+// Now, let's ground stack coloring on these rules. We'll define a slot
+// as *in-use* at a (dynamic) point in execution if it either can be
+// written to at that point, or if it has a live and non-undef content
+// at that point.
+//
+// Obviously, slots that are never *in-use* together can be merged, and
+// in our example `foo`, the slots for `x`, `y` and `z` are never
+// in-use together (of course, sometimes slots that *are* in-use together
+// might still be mergable, but we don't care about that here).
+//
+// In this implementation, we successively merge pairs of slots that are
+// not *in-use* together. We could be smarter - for example, we could merge
+// a single large slot with 2 small slots, or we could construct the
+// interference graph and run a "smart" graph coloring algorithm, but with
+// that aside, how do we find out whether a pair of slots might be *in-use*
+// together?
+//
+// From our rules, we see that *out-of-scope* slots are never *in-use*,
+// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
+// until their address is taken. Therefore, we can approximate slot activity
+// using dataflow.
+//
+// A subtle point: naively, we might try to figure out which pairs of
+// stack-slots interfere by propagating `S in-use` through the CFG for every
+// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
+// which they are both *in-use*.
+//
+// That is sound, but overly conservative in some cases: in our (artificial)
+// example `foo`, either `x` or `y` might be in use at the label `B:`, but
+// as `x` is only in use if we came in from the `var` edge and `y` only
+// if we came from the `!var` edge, they still can't be in use together.
+// See PR32488 for an important real-life case.
+//
+// If we wanted to find all points of interference precisely, we could
+// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
+// would be precise, but requires propagating `O(n^2)` dataflow facts.
+//
+// However, we aren't interested in the *set* of points of interference
+// between 2 stack slots, only *whether* there *is* such a point. So we
+// can rely on a little trick: for `S` and `T` to be in-use together,
+// one of them needs to become in-use while the other is in-use (or
+// they might both become in use simultaneously). We can check this
+// by also keeping track of the points at which a stack slot might *start*
+// being in-use.
+//
+// Exact first use:
+// ----------------
+//
+ // Consider the following motivating example:
+ //
+ //     int foo() {
+@@ -159,6 +283,9 @@
+ // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
+ // byte stack (better).
+ //
+// Degenerate Slots:
+// -----------------
+//
+ // Relying entirely on first-use of stack slots is problematic,
+ // however, due to the fact that optimizations can sometimes migrate
+ // uses of a variable outside of its lifetime start/end region. Here
+@@ -238,10 +365,6 @@
+ // for "b" then it will appear that 'b' has a degenerate lifetime.
+ //
+ 
+-//===----------------------------------------------------------------------===//
+-//                           StackColoring Pass
+-//===----------------------------------------------------------------------===//
+-
+ namespace {
+ /// StackColoring - A machine pass for merging disjoint stack allocations,
+ /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
+@@ -272,8 +395,11 @@
+   /// Maps basic blocks to a serial number.
+   SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
+ 
+-  /// Maps liveness intervals for each slot.
+  /// Maps slots to their use interval. Outside of this interval, slots
+  /// values are either dead or `undef` and they will not be written to.
+   SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
+  /// Maps slots to the points where they can become in-use.
+  SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts;
+   /// VNInfo is used for the construction of LiveIntervals.
+   VNInfo::Allocator VNInfoAllocator;
+   /// SlotIndex analysis object.
+@@ -676,15 +802,22 @@
+ 
+ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
+   SmallVector<SlotIndex, 16> Starts;
+-  SmallVector<SlotIndex, 16> Finishes;
+  SmallVector<bool, 16> DefinitelyInUse;
+ 
+   // For each block, find which slots are active within this block
+   // and update the live intervals.
+   for (const MachineBasicBlock &MBB : *MF) {
+     Starts.clear();
+     Starts.resize(NumSlots);
+-    Finishes.clear();
+-    Finishes.resize(NumSlots);
+    DefinitelyInUse.clear();
+    DefinitelyInUse.resize(NumSlots);
+
+    // Start the interval of the slots that we previously found to be 'in-use'.
+    BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
+    for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
+         pos = MBBLiveness.LiveIn.find_next(pos)) {
+      Starts[pos] = Indexes->getMBBStartIdx(&MBB);
+    }
+ 
+     // Create the interval for the basic blocks containing lifetime begin/end.
+     for (const MachineInstr &MI : MBB) {
+@@ -696,68 +829,35 @@
+       SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
+       for (auto Slot : slots) {
+         if (IsStart) {
+-          if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex)
+          // If a slot is already definitely in use, we don't have to emit
+          // a new start marker because there is already a pre-existing
+          // one.
+          if (!DefinitelyInUse[Slot]) {
+            LiveStarts[Slot].push_back(ThisIndex);
+            DefinitelyInUse[Slot] = true;
+          }
+          if (!Starts[Slot].isValid())
+             Starts[Slot] = ThisIndex;
+         } else {
+-          if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex)
+-            Finishes[Slot] = ThisIndex;
+          if (Starts[Slot].isValid()) {
+            VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
+            Intervals[Slot]->addSegment(
+                LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
+            Starts[Slot] = SlotIndex(); // Invalidate the start index
+            DefinitelyInUse[Slot] = false;
+          }
+         }
+       }
+     }
+ 
+-    // Create the interval of the blocks that we previously found to be 'alive'.
+-    BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
+-    for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
+-         pos = MBBLiveness.LiveIn.find_next(pos)) {
+-      Starts[pos] = Indexes->getMBBStartIdx(&MBB);
+-    }
+-    for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
+-         pos = MBBLiveness.LiveOut.find_next(pos)) {
+-      Finishes[pos] = Indexes->getMBBEndIdx(&MBB);
+-    }
+-
+    // Finish up started segments
+     for (unsigned i = 0; i < NumSlots; ++i) {
+-      //
+-      // When LifetimeStartOnFirstUse is turned on, data flow analysis
+-      // is forward (from starts to ends), not bidirectional. A
+-      // consequence of this is that we can wind up in situations
+-      // where Starts[i] is invalid but Finishes[i] is valid and vice
+-      // versa. Example:
+-      //
+-      //     LIFETIME_START x
+-      //     if (...) {
+-      //       <use of x>
+-      //       throw ...;
+-      //     }
+-      //     LIFETIME_END x
+-      //     return 2;
+-      //
+-      //
+-      // Here the slot for "x" will not be live into the block
+-      // containing the "return 2" (since lifetimes start with first
+-      // use, not at the dominating LIFETIME_START marker).
+-      //
+-      if (Starts[i].isValid() && !Finishes[i].isValid()) {
+-        Finishes[i] = Indexes->getMBBEndIdx(&MBB);
+-      }
+       if (!Starts[i].isValid())
+         continue;
+ 
+-      assert(Starts[i] && Finishes[i] && "Invalid interval");
+-      VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
+-      SlotIndex S = Starts[i];
+-      SlotIndex F = Finishes[i];
+-      if (S < F) {
+-        // We have a single consecutive region.
+-        Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
+-      } else {
+-        // We have two non-consecutive regions. This happens when
+-        // LIFETIME_START appears after the LIFETIME_END marker.
+-        SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
+-        SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
+-        Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
+-        Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
+-      }
+      SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
+      VNInfo *VNI = Intervals[i]->getValNumInfo(0);
+      Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
+     }
+   }
+ }
+@@ -987,6 +1087,7 @@
+   BasicBlockNumbering.clear();
+   Markers.clear();
+   Intervals.clear();
+  LiveStarts.clear();
+   VNInfoAllocator.Reset();
+ 
+   unsigned NumSlots = MFI->getObjectIndexEnd();
+@@ -998,6 +1099,7 @@
+   SmallVector<int, 8> SortedSlots;
+   SortedSlots.reserve(NumSlots);
+   Intervals.reserve(NumSlots);
+  LiveStarts.resize(NumSlots);
+ 
+   unsigned NumMarkers = collectMarkers(NumSlots);
+ 
+@@ -1069,6 +1171,9 @@
+     return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
+   });
+ 
+  for (auto &s : LiveStarts)
+    std::sort(s.begin(), s.end());
+
+   bool Changed = true;
+   while (Changed) {
+     Changed = false;
+@@ -1084,12 +1189,22 @@
+         int SecondSlot = SortedSlots[J];
+         LiveInterval *First = &*Intervals[FirstSlot];
+         LiveInterval *Second = &*Intervals[SecondSlot];
+        auto &FirstS = LiveStarts[FirstSlot];
+        auto &SecondS = LiveStarts[SecondSlot];
+         assert (!First->empty() && !Second->empty() && "Found an empty range");
+ 
+-        // Merge disjoint slots.
+-        if (!First->overlaps(*Second)) {
+        // Merge disjoint slots. This is a little bit tricky - see the
+        // Implementation Notes section for an explanation.
+        if (!First->isLiveAtIndexes(SecondS) &&
+            !Second->isLiveAtIndexes(FirstS)) {
+           Changed = true;
+           First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
+
+          int OldSize = FirstS.size();
+          FirstS.append(SecondS.begin(), SecondS.end());
+          auto Mid = FirstS.begin() + OldSize;
+          std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
+
+           SlotRemap[SecondSlot] = FirstSlot;
+           SortedSlots[J] = -1;
+           DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
+--- a/test/CodeGen/X86/StackColoring.ll
+++ b/test/CodeGen/X86/StackColoring.ll
+@@ -582,12 +582,76 @@
+   ret i32 %x.addr.0
+ }
+ 
+;CHECK-LABEL: multi_segment:
+;YESCOLOR: subq  $256, %rsp
+;NOFIRSTUSE: subq  $256, %rsp
+;NOCOLOR: subq  $512, %rsp
+define i1 @multi_segment(i1, i1)
+{
+entry-block:
+  %foo = alloca [32 x i64]
+  %bar = alloca [32 x i64]
+  %foo_i8 = bitcast [32 x i64]* %foo to i8*
+  %bar_i8 = bitcast [32 x i64]* %bar to i8*
+  call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
+  call void @baz([32 x i64]* %bar, i32 1)
+  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
+  call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
+  call void @baz([32 x i64]* %foo, i32 1)
+  call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
+  call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
+  call void @baz([32 x i64]* %bar, i32 1)
+  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
+  ret i1 true
+}
+
+;CHECK-LABEL: pr32488:
+;YESCOLOR: subq  $256, %rsp
+;NOFIRSTUSE: subq  $256, %rsp
+;NOCOLOR: subq  $512, %rsp
+define i1 @pr32488(i1, i1)
+{
+entry-block:
+  %foo = alloca [32 x i64]
+  %bar = alloca [32 x i64]
+  %foo_i8 = bitcast [32 x i64]* %foo to i8*
+  %bar_i8 = bitcast [32 x i64]* %bar to i8*
+  br i1 %0, label %if_false, label %if_true
+if_false:
+  call void @llvm.lifetime.start.p0i8(i64 256, i8* %bar_i8)
+  call void @baz([32 x i64]* %bar, i32 0)
+  br i1 %1, label %if_false.1, label %onerr
+if_false.1:
+  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
+  br label %merge
+if_true:
+  call void @llvm.lifetime.start.p0i8(i64 256, i8* %foo_i8)
+  call void @baz([32 x i64]* %foo, i32 1)
+  br i1 %1, label %if_true.1, label %onerr
+if_true.1:
+  call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
+  br label %merge
+merge:
+  ret i1 false
+onerr:
+  call void @llvm.lifetime.end.p0i8(i64 256, i8* %foo_i8)
+  call void @llvm.lifetime.end.p0i8(i64 256, i8* %bar_i8)
+  call void @destructor()
+  ret i1 true
+}
+
+%Data = type { [32 x i64] }
+
+declare void @destructor()
+
+ declare void @inita(i32*)
+ 
+ declare void @initb(i32*,i32*,i32*)
+ 
+ declare void @bar([100 x i32]* , [100 x i32]*) nounwind
+ 
+declare void @baz([32 x i64]*, i32)
+
+ declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+ 
+ declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
--- a/debian/patches/rL306267.diff
+++ b/debian/patches/rL306267.diff
@ -0,0 +1,187 @@
+commit 5a057dc8edbb63887f8c611dd8ddf1b76997f07c
+Author: Chandler Carruth <chandlerc@gmail.com>
+Date:   Mon Jun 26 03:31:31 2017 +0000
+
+    [InstCombine] Factor the logic for propagating !nonnull and !range
+    metadata out of InstCombine and into helpers.
+    
+    NFC, this just exposes the logic used by InstCombine when propagating
+    metadata from one load instruction to another. The plan is to use this
+    in SROA to address PR32902.
+    
+    If anyone has better ideas about how to factor this or name variables,
+    I'm all ears, but this seemed like a pretty good start and lets us make
+    progress on the PR.
+    
+    This is based on a patch by Ariel Ben-Yehuda (D34285).
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306267 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
+@@ -366,6 +366,19 @@
+ /// during lowering by the GC infrastructure.
+ bool callsGCLeafFunction(ImmutableCallSite CS);
+ 
+/// Copy a nonnull metadata node to a new load instruction.
+///
+/// This handles mapping it to range metadata if the new load is an integer
+/// load instead of a pointer load.
+void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI);
+
+/// Copy a range metadata node to a new load instruction.
+///
+/// This handles mapping it to nonnull metadata if the new load is a pointer
+/// load instead of an integer load and the range doesn't cover null.
+void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N,
+                       LoadInst &NewLI);
+
+ //===----------------------------------------------------------------------===//
+ //  Intrinsic pattern matching
+ //
+--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+@@ -471,21 +471,7 @@
+       break;
+ 
+     case LLVMContext::MD_nonnull:
+-      // This only directly applies if the new type is also a pointer.
+-      if (NewTy->isPointerTy()) {
+-        NewLoad->setMetadata(ID, N);
+-        break;
+-      }
+-      // If it's integral now, translate it to !range metadata.
+-      if (NewTy->isIntegerTy()) {
+-        auto *ITy = cast<IntegerType>(NewTy);
+-        auto *NullInt = ConstantExpr::getPtrToInt(
+-            ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+-        auto *NonNullInt =
+-            ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+-        NewLoad->setMetadata(LLVMContext::MD_range,
+-                             MDB.createRange(NonNullInt, NullInt));
+-      }
+      copyNonnullMetadata(LI, N, *NewLoad);
+       break;
+     case LLVMContext::MD_align:
+     case LLVMContext::MD_dereferenceable:
+@@ -495,17 +481,7 @@
+         NewLoad->setMetadata(ID, N);
+       break;
+     case LLVMContext::MD_range:
+-      // FIXME: It would be nice to propagate this in some way, but the type
+-      // conversions make it hard.
+-
+-      // If it's a pointer now and the range does not contain 0, make it !nonnull.
+-      if (NewTy->isPointerTy()) {
+-        unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy);
+-        if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+-          MDNode *NN = MDNode::get(LI.getContext(), None);
+-          NewLoad->setMetadata(LLVMContext::MD_nonnull, NN);
+-        }
+-      }
+      copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
+       break;
+     }
+   }
+--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
+@@ -26,6 +26,7 @@
+ #include "llvm/Analysis/LazyValueInfo.h"
+ #include "llvm/Analysis/ValueTracking.h"
+ #include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
+ #include "llvm/IR/Constants.h"
+ #include "llvm/IR/DIBuilder.h"
+ #include "llvm/IR/DataLayout.h"
+@@ -1069,7 +1070,7 @@
+ }
+ 
+ /// See if there is a dbg.value intrinsic for DIVar for the PHI node.
+-static bool PhiHasDebugValue(DILocalVariable *DIVar, 
+static bool PhiHasDebugValue(DILocalVariable *DIVar,
+                              DIExpression *DIExpr,
+                              PHINode *APN) {
+   // Since we can't guarantee that the original dbg.declare instrinsic
+@@ -1152,7 +1153,7 @@
+   DbgValue->insertAfter(LI);
+ }
+ 
+-/// Inserts a llvm.dbg.value intrinsic after a phi 
+/// Inserts a llvm.dbg.value intrinsic after a phi
+ /// that has an associated llvm.dbg.decl intrinsic.
+ void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                            PHINode *APN, DIBuilder &Builder) {
+@@ -1723,12 +1724,12 @@
+         // Preserve !invariant.group in K.
+         break;
+       case LLVMContext::MD_align:
+-        K->setMetadata(Kind, 
+        K->setMetadata(Kind,
+           MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+         break;
+       case LLVMContext::MD_dereferenceable:
+       case LLVMContext::MD_dereferenceable_or_null:
+-        K->setMetadata(Kind, 
+        K->setMetadata(Kind,
+           MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+         break;
+     }
+@@ -1812,6 +1813,49 @@
+   return false;
+ }
+ 
+void llvm::copyNonnullMetadata(const LoadInst &OldLI, MDNode *N,
+                               LoadInst &NewLI) {
+  auto *NewTy = NewLI.getType();
+
+  // This only directly applies if the new type is also a pointer.
+  if (NewTy->isPointerTy()) {
+    NewLI.setMetadata(LLVMContext::MD_nonnull, N);
+    return;
+  }
+
+  // The only other translation we can do is to integral loads with !range
+  // metadata.
+  if (!NewTy->isIntegerTy())
+    return;
+
+  MDBuilder MDB(NewLI.getContext());
+  const Value *Ptr = OldLI.getPointerOperand();
+  auto *ITy = cast<IntegerType>(NewTy);
+  auto *NullInt = ConstantExpr::getPtrToInt(
+      ConstantPointerNull::get(cast<PointerType>(Ptr->getType())), ITy);
+  auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(ITy, 1));
+  NewLI.setMetadata(LLVMContext::MD_range,
+                    MDB.createRange(NonNullInt, NullInt));
+}
+
+void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
+                             MDNode *N, LoadInst &NewLI) {
+  auto *NewTy = NewLI.getType();
+
+  // Give up unless it is converted to a pointer where there is a single very
+  // valuable mapping we can do reliably.
+  // FIXME: It would be nice to propagate this in more ways, but the type
+  // conversions make it hard.
+  if (!NewTy->isPointerTy())
+    return;
+
+  unsigned BitWidth = DL.getTypeSizeInBits(NewTy);
+  if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+    MDNode *NN = MDNode::get(OldLI.getContext(), None);
+    NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
+  }
+}
+
+ namespace {
+ /// A potential constituent of a bitreverse or bswap expression. See
+ /// collectBitParts for a fuller explanation.
+@@ -1933,7 +1977,7 @@
+       unsigned NumMaskedBits = AndMask.countPopulation();
+       if (!MatchBitReversals && NumMaskedBits % 8 != 0)
+         return Result;
+-      
+
+       auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps,
+                                   MatchBitReversals, BPS);
+       if (!Res)
--- a/debian/patches/rL306353.diff
+++ b/debian/patches/rL306353.diff
@ -0,0 +1,47 @@
+commit 477bd758b48cb96477d3dd4cf3b36bf2706e8c1d
+Author: Chandler Carruth <chandlerc@gmail.com>
+Date:   Tue Jun 27 02:23:15 2017 +0000
+
+    [SROA] Clean up a test case a bit prior to adding more testing for
+    nonnull as part of fixing PR32902.
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306353 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/test/Transforms/SROA/preserve-nonnull.ll
+++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -3,22 +3,20 @@
+ ; Make sure that SROA doesn't lose nonnull metadata
+ ; on loads from allocas that get optimized out.
+ 
+-; CHECK-LABEL: define float* @yummy_nonnull
+-; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8
+-; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null
+-; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]])
+-; CHECK: ret float* {{.*}}[[RETURN]]
+-
+ define float* @yummy_nonnull(float** %arg) {
+-entry-block:
+-	%buf = alloca float*
+-
+-	%_arg_i8 = bitcast float** %arg to i8*
+-	%_buf_i8 = bitcast float** %buf to i8*
+-	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
+-
+-	%ret = load float*, float** %buf, align 8, !nonnull !0
+-	ret float* %ret
+; CHECK-LABEL: define float* @yummy_nonnull(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[RETURN:.*]] = load float*, float** %arg, align 8
+; CHECK-NEXT:    %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
+; CHECK-NEXT:    call void @llvm.assume(i1 %[[ASSUME]])
+; CHECK-NEXT:    ret float* %[[RETURN]]
+entry:
+  %buf = alloca float*
+  %_arg_i8 = bitcast float** %arg to i8*
+  %_buf_i8 = bitcast float** %buf to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false)
+  %ret = load float*, float** %buf, align 8, !nonnull !0
+  ret float* %ret
+ }
+ 
+ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
--- a/debian/patches/rL306358.diff
+++ b/debian/patches/rL306358.diff
@ -0,0 +1,52 @@
+commit 156cc49e505986a1659adaa3a0b5a070372377c8
+Author: Chandler Carruth <chandlerc@gmail.com>
+Date:   Tue Jun 27 03:08:45 2017 +0000
+
+    [SROA] Further test cleanup and add a test for the actual propagation of
+    the nonnull attribute distinct from rewriting it into an assume.
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306358 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/test/Transforms/SROA/preserve-nonnull.ll
+++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -3,8 +3,31 @@
+ ; Make sure that SROA doesn't lose nonnull metadata
+ ; on loads from allocas that get optimized out.
+ 
+-define float* @yummy_nonnull(float** %arg) {
+-; CHECK-LABEL: define float* @yummy_nonnull(
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+
+; Check that we do basic propagation of nonnull when rewriting.
+define i8* @propagate_nonnull(i32* %v) {
+; CHECK-LABEL: define i8* @propagate_nonnull(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[A:.*]] = alloca i8*
+; CHECK-NEXT:    %[[V_CAST:.*]] = bitcast i32* %v to i8*
+; CHECK-NEXT:    store i8* %[[V_CAST]], i8** %[[A]]
+; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i8*, i8** %[[A]], !nonnull !0
+; CHECK-NEXT:    ret i8* %[[LOAD]]
+entry:
+  %a = alloca [2 x i8*]
+  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+  %a.gep0.cast = bitcast i8** %a.gep0 to i32**
+  %a.gep1.cast = bitcast i8** %a.gep1 to i32**
+  store i32* %v, i32** %a.gep1.cast
+  store i32* null, i32** %a.gep0.cast
+  %load = load volatile i8*, i8** %a.gep1, !nonnull !0
+  ret i8* %load
+}
+
+define float* @turn_nonnull_into_assume(float** %arg) {
+; CHECK-LABEL: define float* @turn_nonnull_into_assume(
+ ; CHECK-NEXT:  entry:
+ ; CHECK-NEXT:    %[[RETURN:.*]] = load float*, float** %arg, align 8
+ ; CHECK-NEXT:    %[[ASSUME:.*]] = icmp ne float* %[[RETURN]], null
+@@ -19,6 +42,4 @@
+   ret float* %ret
+ }
+ 
+-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+-
+ !0 = !{}
--- a/debian/patches/rL306379.diff
+++ b/debian/patches/rL306379.diff
@ -0,0 +1,147 @@
+commit 7df06519765b14e1b08d7034c82c45a0a653eb25
+Author: Chandler Carruth <chandlerc@gmail.com>
+Date:   Tue Jun 27 08:32:03 2017 +0000
+
+    [SROA] Fix PR32902 by more carefully propagating !nonnull metadata.
+    
+    This is based heavily on the work done ni D34285. I mostly wanted to do
+    test cleanup for the author to save them some time, but I had a really
+    hard time understanding why it was so hard to write better test cases
+    for these issues.
+    
+    The problem is that because SROA does a second rewrite of the loads and
+    because we *don't* propagate !nonnull for non-pointer loads, we first
+    introduced invalid !nonnull metadata and then stripped it back off just
+    in time to avoid most ways of this PR manifesting. Moving to the more
+    careful utility only fixes this by changing the predicate to look at the
+    new load's type rather than the target type. However, that *does* fix
+    the bug, and the utility is much nicer including adding range metadata
+    to model the nonnull property after a conversion to an integer.
+    
+    However, we have bigger problems because we don't actually propagate
+    *range* metadata, and the utility to do this extracted from instcombine
+    isn't really in good shape to do this currently. It *only* handles the
+    case of copying range metadata from an integer load to a pointer load.
+    It doesn't even handle the trivial cases of propagating from one integer
+    load to another when they are the same width! This utility will need to
+    be beefed up prior to using in this location to get the metadata to
+    fully survive.
+    
+    And even then, we need to go and teach things to turn the range metadata
+    into an assume the way we do with nonnull so that when we *promote* an
+    integer we don't lose the information.
+    
+    All of this will require a new test case that looks kind-of like
+    `preserve-nonnull.ll` does here but focuses on range metadata. It will
+    also likely require more testing because it needs to correctly handle
+    changes to the integer width, especially as SROA actively tries to
+    change the integer width!
+    
+    Last but not least, I'm a little worried about hooking the range
+    metadata up here because the instcombine logic for converting from
+    a range metadata *to* a nonnull metadata node seems broken in the face
+    of non-zero address spaces where null is not mapped to the integer `0`.
+    So that probably needs to get fixed with test cases both in SROA and in
+    instcombine to cover it.
+    
+    But this *does* extract the core PR fix from D34285 of preventing the
+    !nonnull metadata from being propagated in a broken state just long
+    enough to feed into promotion and crash value tracking.
+    
+    On D34285 there is some discussion of zero-extend handling because it
+    isn't necessary. First, the new load size covers all of the non-undef
+    (ie, possibly initialized) bits. This may even extend past the original
+    alloca if loading those bits could produce valid data. The only way its
+    valid for us to zero-extend an integer load in SROA is if the original
+    code had a zero extend or those bits were undef. And we get to assume
+    things like undef *never* satifies nonnull, so non undef bits can
+    participate here. No need to special case the zero-extend handling, it
+    just falls out correctly.
+    
+    The original credit goes to Ariel Ben-Yehuda! I'm mostly landing this to
+    save a few rounds of trivial edits fixing style issues and test case
+    formulation.
+    
+    Differental Revision: D34285
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306379 91177308-0d34-0410-b5e6-96231b3b80d8
+
+--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
+@@ -2388,9 +2388,20 @@
+       if (LI.isVolatile())
+         NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
+ 
+      // Any !nonnull metadata or !range metadata on the old load is also valid
+      // on the new load. This is even true in some cases even when the loads
+      // are different types, for example by mapping !nonnull metadata to
+      // !range metadata by modeling the null pointer constant converted to the
+      // integer type.
+      // FIXME: Add support for range metadata here. Currently the utilities
+      // for this don't propagate range metadata in trivial cases from one
+      // integer load to another, don't handle non-addrspace-0 null pointers
+      // correctly, and don't have any support for mapping ranges as the
+      // integer type becomes winder or narrower.
+      if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
+        copyNonnullMetadata(LI, N, *NewLI);
+
+       // Try to preserve nonnull metadata
+-      if (TargetTy->isPointerTy())
+-        NewLI->copyMetadata(LI, LLVMContext::MD_nonnull);
+       V = NewLI;
+ 
+       // If this is an integer load past the end of the slice (which means the
+--- a/test/Transforms/SROA/preserve-nonnull.ll
+++ b/test/Transforms/SROA/preserve-nonnull.ll
+@@ -42,4 +42,51 @@
+   ret float* %ret
+ }
+ 
+; Make sure we properly handle the !nonnull attribute when we convert
+; a pointer load to an integer load.
+; FIXME: While this doesn't do anythnig actively harmful today, it really
+; should propagate the !nonnull metadata to range metadata. The irony is, it
+; *does* initially, but then we lose that !range metadata before we finish
+; SROA.
+define i8* @propagate_nonnull_to_int() {
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[A:.*]] = alloca i64
+; CHECK-NEXT:    store i64 42, i64* %[[A]]
+; CHECK-NEXT:    %[[LOAD:.*]] = load volatile i64, i64* %[[A]]
+; CHECK-NEXT:    %[[CAST:.*]] = inttoptr i64 %[[LOAD]] to i8*
+; CHECK-NEXT:    ret i8* %[[CAST]]
+entry:
+  %a = alloca [2 x i8*]
+  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+  %a.gep0.cast = bitcast i8** %a.gep0 to i64*
+  %a.gep1.cast = bitcast i8** %a.gep1 to i64*
+  store i64 42, i64* %a.gep1.cast
+  store i64 0, i64* %a.gep0.cast
+  %load = load volatile i8*, i8** %a.gep1, !nonnull !0
+  ret i8* %load
+}
+
+; Make sure we properly handle the !nonnull attribute when we convert
+; a pointer load to an integer load and immediately promote it to an SSA
+; register. This can fail in interesting ways due to the rewrite iteration of
+; SROA, resulting in PR32902.
+define i8* @propagate_nonnull_to_int_and_promote() {
+; CHECK-LABEL: define i8* @propagate_nonnull_to_int_and_promote(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    %[[PROMOTED_VALUE:.*]] = inttoptr i64 42 to i8*
+; CHECK-NEXT:    ret i8* %[[PROMOTED_VALUE]]
+entry:
+  %a = alloca [2 x i8*], align 8
+  %a.gep0 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 0
+  %a.gep1 = getelementptr [2 x i8*], [2 x i8*]* %a, i32 0, i32 1
+  %a.gep0.cast = bitcast i8** %a.gep0 to i64*
+  %a.gep1.cast = bitcast i8** %a.gep1 to i64*
+  store i64 42, i64* %a.gep1.cast
+  store i64 0, i64* %a.gep0.cast
+  %load = load i8*, i8** %a.gep1, align 8, !nonnull !0
+  ret i8* %load
+}
+
+ !0 = !{}
--- a/debian/patches/series
+++ b/debian/patches/series
@ -38,7 +38,6 @@ disable-llvm-symbolizer-test.diff
 clang-tidy-run-bin.diff
 #bug-30342.diff
 fix-scan-view-path.diff
-#0011-SimplifyCFG-Hoisting-invalidates-metadata.patch
 clang-fix-cmpxchg8-detection-on-i386.patch
 lldb-addversion-suffix-to-llvm-server-exec.patch
 lldb-missing-install.diff
@ -49,3 +48,13 @@ add_symbols_versioning.patch
 ftfbs-gcc.diff
 pr81066.diff
 armhf-bitfield.diff
+# rust LLVM PR84, LLVM PR32488
+# This is actually Rust's backport of upstream RL305193 (which doesn't apply cleanly to LLVM 4)
+# https://github.com/rust-lang/llvm/commit/2b622a393ce
+rL305193-backport.diff
+# rust LLVM PR90, LLVM PR32902, PR31142
+rL298540.diff
+rL306267.diff
+rL306353.diff
+rL306358.diff
+rL306379.diff