Merge remote-tracking branch 'origin/6.0' into 7

2025-12-25 08:36:21 +00:00 · 2019-02-09 17:27:05 +01:00 · 2019-02-09 17:27:05 +01:00 · 3a82f68a05
commit 3a82f68a05
parent 4d5381ce58 408f329cd8
12 changed files with 8610 additions and 3 deletions
--- a/debian/changelog
+++ b/debian/changelog
@ -671,6 +671,15 @@ llvm-toolchain-snapshot (1:7~svn322880-1) unstable; urgency=medium

 -- Sylvestre Ledru <sylvestre@debian.org>  Thu, 18 Jan 2018 20:50:03 +0100

+llvm-toolchain-6.0 (1:6.0.1-11) unstable; urgency=medium
+
+  * Remove 'Multi-Arch: same' in libclang
+    (Closes: #874248)
+  * Cherry-pick various llvm fixes for Julia
+    (Closes: #919628)
+
+ -- Sylvestre Ledru <sylvestre@debian.org>  Sat, 09 Feb 2019 17:22:59 +0100
+
 llvm-toolchain-6.0 (1:6.0.1-10) unstable; urgency=medium

  * Fix a baseline violation on armhf (Closes: #914268)
--- a/debian/orig-tar.sh
+++ b/debian/orig-tar.sh
@ -18,18 +18,18 @@ set -e
 # To create an rc1 release:
 # sh 4.0/debian/orig-tar.sh RELEASE_40 rc1

-SVN_BASE_URL=http://llvm.org/svn/llvm-project/
+SVN_BASE_URL=https://llvm.org/svn/llvm-project/
 MAJOR_VERSION=7
 CURRENT_VERSION=7.1.0 # Should be changed to 3.5.1 later

 if test -n "$1"; then
-# http://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/branches/google/stable/
+# https://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/branches/google/stable/
 # For example: sh 4.0/debian/orig-tar.sh release_400
    BRANCH=$1
 fi

 if test -n "$1" -a -n "$2"; then
-# http://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/tags/RELEASE_34/rc1/
+# https://llvm.org/svn/llvm-project/{cfe,llvm,compiler-rt,...}/tags/RELEASE_34/rc1/
 # For example: sh 4.0/debian/orig-tar.sh RELEASE_401 rc3 4.0.1
    BRANCH=$1
    TAG=$2
--- a/debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch
+++ b/debian/patches/julia/llvm-D27629-AArch64-large_model_4.0.patch
@ -0,0 +1,72 @@
+From 6e7b660ee185445640110c80d80aafd436682fca Mon Sep 17 00:00:00 2001
+From: Yichao Yu <yyc1992@gmail.com>
+Date: Fri, 9 Dec 2016 15:59:46 -0500
+Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
+
+---
+ lib/MC/MCObjectFileInfo.cpp                            |  2 ++
+ .../AArch64/ELF_ARM64_BE-large-relocations.s           | 18 ++++++++++++++++++
+ .../RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s  | 18 ++++++++++++++++++
+ 3 files changed, 38 insertions(+)
+ create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
+ create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
+
+Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
+===================================================================
+--- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp
+++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
+@@ -328,6 +328,8 @@ void MCObjectFileInfo::initELFMCObjectFi
+                               dwarf::DW_EH_PE_sdata4
+                         : dwarf::DW_EH_PE_absptr;
+     break;
+  case Triple::aarch64:
+  case Triple::aarch64_be:
+   case Triple::x86_64:
+     if (PositionIndependent) {
+       PersonalityEncoding =
+Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
+===================================================================
+--- /dev/null
+++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_BE-large-relocations.s
+@@ -0,0 +1,18 @@
+# RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -code-model=large -filetype=obj -o %T/be-large-reloc.o %s
+# RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o
+
+        .text
+        .globl  g
+        .p2align        2
+        .type   g,@function
+g:
+        .cfi_startproc
+        mov      x0, xzr
+        ret
+        .Lfunc_end0:
+        .size   g, .Lfunc_end0-g
+        .cfi_endproc
+
+# Skip the CIE and load the 8 bytes PC begin pointer.
+# Assuming the CIE and the FDE length are both 4 bytes.
+# rtdyld-check: *{8}(section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(be-large-reloc.o, .eh_frame) + (*{4}(section_addr(be-large-reloc.o, .eh_frame))) + 0xc)
+Index: llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
+===================================================================
+--- /dev/null
+++ llvm-toolchain-6.0-6.0.1/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
+@@ -0,0 +1,18 @@
+# RUN: llvm-mc -triple=arm64-none-linux-gnu -code-model=large -filetype=obj -o %T/large-reloc.o %s
+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o
+
+        .text
+        .globl  g
+        .p2align        2
+        .type   g,@function
+g:
+        .cfi_startproc
+        mov      x0, xzr
+        ret
+        .Lfunc_end0:
+        .size   g, .Lfunc_end0-g
+        .cfi_endproc
+
+# Skip the CIE and load the 8 bytes PC begin pointer.
+# Assuming the CIE and the FDE length are both 4 bytes.
+# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc)
--- a/debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch
+++ b/debian/patches/julia/llvm-D27629-AArch64-large_model_6.0.1.patch
@ -0,0 +1,24 @@
+From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001
+From: Valentin Churavy <v.churavy@gmail.com>
+Date: Thu, 5 Jul 2018 12:37:50 -0400
+Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
+
+---
+ lib/MC/MCObjectFileInfo.cpp                   |  2 ++
+ .../AArch64/ELF_ARM64_large-relocations.s     | 20 +++++++++++++++++++
+ 2 files changed, 22 insertions(+)
+ create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
+
+Index: llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
+===================================================================
+--- llvm-toolchain-6.0-6.0.1.orig/lib/MC/MCObjectFileInfo.cpp
+++ llvm-toolchain-6.0-6.0.1/lib/MC/MCObjectFileInfo.cpp
+@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFi
+     break;
+   case Triple::ppc64:
+   case Triple::ppc64le:
+  case Triple::aarch64:
+  case Triple::aarch64_be:
+   case Triple::x86_64:
+     FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
+                      (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
--- a/debian/patches/julia/llvm-D34078-vectorize-fdiv.patch
+++ b/debian/patches/julia/llvm-D34078-vectorize-fdiv.patch
@ -0,0 +1,53 @@
+From f94d12b6108b944199b715f31f25a022f75d2feb Mon Sep 17 00:00:00 2001
+From: Yichao Yu <yyc1992@gmail.com>
+Date: Sat, 10 Jun 2017 08:45:13 -0400
+Subject: [PATCH 4/4] Enable support for floating-point division reductions
+
+Similar to fsub, fdiv can also be vectorized using fmul.
+---
+ lib/Transforms/Utils/LoopUtils.cpp               |  1 +
+ test/Transforms/LoopVectorize/float-reduction.ll | 22 ++++++++++++++++++++++
+ 2 files changed, 23 insertions(+)
+
+Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp
+===================================================================
+--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Utils/LoopUtils.cpp
+++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Utils/LoopUtils.cpp
+@@ -513,6 +513,7 @@ RecurrenceDescriptor::isRecurrenceInstr(
+     return InstDesc(Kind == RK_IntegerOr, I);
+   case Instruction::Xor:
+     return InstDesc(Kind == RK_IntegerXor, I);
+  case Instruction::FDiv:
+   case Instruction::FMul:
+     return InstDesc(Kind == RK_FloatMult, I, UAI);
+   case Instruction::FSub:
+Index: llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll
+===================================================================
+--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/LoopVectorize/float-reduction.ll
+++ llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/float-reduction.ll
+@@ -44,3 +44,25 @@ for.body:
+ for.end:                                          ; preds = %for.body
+   ret float %sub
+ }
+
+;CHECK-LABEL: @foodiv(
+;CHECK: fdiv fast <4 x float>
+;CHECK: ret
+define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4
+  %sub = fdiv fast float %sum.04, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 200
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret float %sub
+}
--- a/debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch
+++ b/debian/patches/julia/llvm-D42262-jumpthreading-not-i1.patch
@ -0,0 +1,82 @@
+commit 6a311a7a804831fea43cfb2f61322adcb407a1af
+Author: Keno Fischer <keno@juliacomputing.com>
+Date:   Thu Jan 18 15:57:05 2018 -0500
+
+    [JumpThreading] Don't restrict cast-traversal to i1
+    
+    Summary:
+    In D17663, JumpThreading learned to look trough simple cast instructions,
+    but only if the source of those cast instructions was a phi/cmp i1
+    (in an effort to limit compile time effects). I think this condition
+    is too restrictive. For switches with limited value range, InstCombine
+    will readily introduce an extra `trunc` instruction to a smaller
+    integer type (e.g. from i8 to i2), leaving us in the somewhat perverse
+    situation that jump-threading would work before running instcombine,
+    but not after. Since instcombine produces this pattern, I think we
+    need to consider it canonical and support it in JumpThreading.
+    In general, for limiting recursion, I think the existing restriction
+    to phi and cmp nodes should be sufficient to avoid looking through
+    unprofitable chains of instructions.
+    
+    Reviewers: haicheng, gberry, bmakam, mcrosier
+    
+    Subscribers: llvm-commits
+    
+    Differential Revision: https://reviews.llvm.org/D42262
+
+Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp
+===================================================================
+--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Scalar/JumpThreading.cpp
+++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/JumpThreading.cpp
+@@ -656,11 +656,9 @@ bool JumpThreadingPass::ComputeValueKnow
+   }
+ 
+   // Handle Cast instructions.  Only see through Cast when the source operand is
+-  // PHI or Cmp and the source type is i1 to save the compilation time.
+  // PHI or Cmp to save the compilation time.
+   if (CastInst *CI = dyn_cast<CastInst>(I)) {
+     Value *Source = CI->getOperand(0);
+-    if (!Source->getType()->isIntegerTy(1))
+-      return false;
+     if (!isa<PHINode>(Source) && !isa<CmpInst>(Source))
+       return false;
+     ComputeValueKnownInPredecessors(Source, BB, Result, Preference, CxtI);
+Index: llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll
+===================================================================
+--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/JumpThreading/basic.ll
+++ llvm-toolchain-6.0-6.0.1/test/Transforms/JumpThreading/basic.ll
+@@ -547,6 +547,34 @@ l5:
+ ; CHECK: }
+ }
+ 
+define i1 @trunc_switch(i1 %arg) {
+; CHECK-LABEL: @trunc_switch
+top:
+; CHECK: br i1 %arg, label %exitA, label %exitB
+  br i1 %arg, label %common, label %B
+
+B:
+  br label %common
+
+common:
+  %phi = phi i8 [ 2, %B ], [ 1, %top ]
+  %trunc = trunc i8 %phi to i2
+; CHECK-NOT: switch
+  switch i2 %trunc, label %unreach [
+    i2 1, label %exitA
+    i2 -2, label %exitB
+  ]
+
+unreach:
+  unreachable
+
+exitA:
+  ret i1 true
+
+exitB:
+  ret i1 false
+}
+
+ ; CHECK-LABEL: define void @h_con(i32 %p) {
+ define void @h_con(i32 %p) {
+   %x = icmp ult i32 %p, 5
--- a/debian/patches/julia/llvm-D44892-Perf-integration.patch
+++ b/debian/patches/julia/llvm-D44892-Perf-integration.patch
@ -0,0 +1,677 @@
+From 45bc0f0badbdbabaed7d204757c2aad7ab49a3fe Mon Sep 17 00:00:00 2001
+From: DokFaust <rodia@autistici.org>
+Date: Mon, 11 Jun 2018 12:59:42 +0200
+Subject: [PATCH] PerfJITEventListener integration, requires compile flag
+ LLVM_USE_PERF
+
+---
+ CMakeLists.txt                                |  13 +
+ include/llvm/Config/config.h.cmake            |   3 +
+ include/llvm/Config/llvm-config.h.cmake       |   3 +
+ .../llvm/ExecutionEngine/JITEventListener.h   |   9 +
+ lib/ExecutionEngine/CMakeLists.txt            |   4 +
+ lib/ExecutionEngine/LLVMBuild.txt             |   2 +-
+ lib/ExecutionEngine/Orc/LLVMBuild.txt         |   2 +-
+ .../PerfJITEvents/CMakeLists.txt              |   5 +
+ .../PerfJITEvents/LLVMBuild.txt               |  23 +
+ .../PerfJITEvents/PerfJITEventListener.cpp    | 492 ++++++++++++++++++
+ 10 files changed, 554 insertions(+), 2 deletions(-)
+ create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
+ create mode 100644 lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
+ create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index f8da6cf9211..fb92c825a46 100644
+--- a/CMakeLists.txt
+++ b/CMakeLists.txt
+@@ -426,6 +426,16 @@ if( LLVM_USE_OPROFILE )
+   endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+ endif( LLVM_USE_OPROFILE )
+
+option(LLVM_USE_PERF
+  "Use perf JIT interface to inform perf about JIT code" OFF)
+
+# If enabled, verify we are on a platform that supports perf.
+if( LLVM_USE_PERF )
+  if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+    message(FATAL_ERROR "perf support is available on Linux only.")
+  endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+endif( LLVM_USE_PERF )
+
+ set(LLVM_USE_SANITIZER "" CACHE STRING
+   "Define the sanitizer used to build binaries and tests.")
+ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
+@@ -634,6 +644,9 @@ endif (LLVM_USE_INTEL_JITEVENTS)
+ if (LLVM_USE_OPROFILE)
+   set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT)
+ endif (LLVM_USE_OPROFILE)
+if (LLVM_USE_PERF)
+    set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents)
+endif (LLVM_USE_PERF)
+
+ message(STATUS "Constructing LLVMBuild project information")
+ execute_process(
+diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
+index 940f8420304..17787ed779b 100644
+--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
+@@ -377,6 +377,9 @@
+ /* Define if we have the oprofile JIT-support library */
+ #cmakedefine01 LLVM_USE_OPROFILE
+
+/* Define if we have the perf JIT-support library */
+#cmakedefine01 LLVM_USE_PERF
+
+ /* LLVM version information */
+ #cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}"
+
+diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
+index 4daa00f3bc4..8d9c3b24d52 100644
+--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
+@@ -65,6 +65,9 @@
+ /* Define if we have the oprofile JIT-support library */
+ #cmakedefine01 LLVM_USE_OPROFILE
+
+/* Define if we have the perf JIT-support library */
+#cmakedefine01 LLVM_USE_PERF
+
+ /* Major version of the LLVM API */
+ #define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
+
+diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
+index ff7840f00a4..1cc2c423a8b 100644
+--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
+@@ -115,6 +115,15 @@ public:
+   }
+ #endif // USE_OPROFILE
+
+#ifdef LLVM_USE_PERF
+  static JITEventListener *createPerfJITEventListener();
+#else
+  static JITEventListener *createPerfJITEventListener()
+  {
+    return nullptr;
+  }
+#endif //USE_PERF
+
+ private:
+   virtual void anchor();
+ };
+diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
+index 84b34919e44..893d113a685 100644
+--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
+@@ -30,3 +30,7 @@ endif( LLVM_USE_OPROFILE )
+ if( LLVM_USE_INTEL_JITEVENTS )
+   add_subdirectory(IntelJITEvents)
+ endif( LLVM_USE_INTEL_JITEVENTS )
+
+if( LLVM_USE_PERF )
+    add_subdirectory(PerfJITEvents)
+endif( LLVM_USE_PERF )
+diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
+index 9d29a41f504..b6e1bda6a51 100644
+--- a/lib/ExecutionEngine/LLVMBuild.txt
+++ b/lib/ExecutionEngine/LLVMBuild.txt
+@@ -16,7 +16,7 @@
+ ;===------------------------------------------------------------------------===;
+
+ [common]
+-subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc
+subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents
+
+ [component_0]
+ type = Library
+diff --git a/lib/ExecutionEngine/Orc/LLVMBuild.txt b/lib/ExecutionEngine/Orc/LLVMBuild.txt
+index 8f05172e77a..ef4ae64e823 100644
+--- a/lib/ExecutionEngine/Orc/LLVMBuild.txt
+++ b/lib/ExecutionEngine/Orc/LLVMBuild.txt
+@@ -19,4 +19,4 @@
+ type = Library
+ name = OrcJIT
+ parent = ExecutionEngine
+-required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
+required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils
+diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
+new file mode 100644
+index 00000000000..136cc429d02
+--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
+@@ -0,0 +1,5 @@
+add_llvm_library(LLVMPerfJITEvents
+  PerfJITEventListener.cpp
+  )
+
+add_dependencies(LLVMPerfJITEvents LLVMCodeGen)
+diff --git a/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
+new file mode 100644
+index 00000000000..b1958a69260
+--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
+@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = OptionalLibrary
+name = PerfJITEvents
+parent = ExecutionEngine
+required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support TransformUtils
+
+diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+new file mode 100644
+index 00000000000..c2b97dd59f3
+--- /dev/null
+++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+@@ -0,0 +1,492 @@
+//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that tells perf about JITted
+// functions, including source line information.
+//
+// Documentation for perf jit integration is available at:
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <sys/mman.h>  // mmap()
+#include <sys/types.h> // getpid()
+#include <time.h>      // clock_gettime(), time(), localtime_r() */
+#include <unistd.h>    // for getpid(), read(), close()
+
+using namespace llvm;
+using namespace llvm::object;
+typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
+
+namespace {
+
+// language identifier (XXX: should we generate something better from debug
+// info?)
+#define JIT_LANG "llvm-IR"
+#define LLVM_PERF_JIT_MAGIC                                                    \
+  ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 |            \
+   (uint32_t)'D')
+#define LLVM_PERF_JIT_VERSION 1
+
+// bit 0: set if the jitdump file is using an architecture-specific timestamp
+// clock source
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0)
+
+struct LLVMPerfJitHeader;
+
+class PerfJITEventListener : public JITEventListener {
+public:
+  PerfJITEventListener();
+  ~PerfJITEventListener() {
+    if (MarkerAddr)
+      CloseMarker();
+  }
+
+  void NotifyObjectEmitted(const ObjectFile &Obj,
+                           const RuntimeDyld::LoadedObjectInfo &L) override;
+  void NotifyFreeingObject(const ObjectFile &Obj) override;
+
+private:
+  bool InitDebuggingDir();
+  bool OpenMarker();
+  void CloseMarker();
+  static bool FillMachine(LLVMPerfJitHeader &hdr);
+
+  void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr,
+                  uint64_t CodeSize);
+  void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
+
+  // cache lookups
+  pid_t Pid;
+
+  // base directory for output data
+  std::string JitPath;
+
+  // output data stream, closed via Dumpstream
+  int DumpFd = -1;
+
+  // output data stream
+  std::unique_ptr<raw_fd_ostream> Dumpstream;
+
+  // prevent concurrent dumps from messing up the output file
+  sys::Mutex Mutex;
+
+  // perf mmap marker
+  void *MarkerAddr = NULL;
+
+  // perf support ready
+  bool SuccessfullyInitialized = false;
+
+  // identifier for functions, primarily to identify when moving them around
+  uint64_t CodeGeneration = 1;
+};
+
+// The following are POD struct definitions from the perf jit specification
+
+enum LLVMPerfJitRecordType {
+  JIT_CODE_LOAD = 0,
+  JIT_CODE_MOVE = 1, // not emitted, code isn't moved
+  JIT_CODE_DEBUG_INFO = 2,
+  JIT_CODE_CLOSE = 3,          // not emitted, unnecessary
+  JIT_CODE_UNWINDING_INFO = 4, // not emitted
+
+  JIT_CODE_MAX
+};
+
+struct LLVMPerfJitHeader {
+  uint32_t Magic;     // characters "JiTD"
+  uint32_t Version;   // header version
+  uint32_t TotalSize; // total size of header
+  uint32_t ElfMach;   // elf mach target
+  uint32_t Pad1;      // reserved
+  uint32_t Pid;
+  uint64_t Timestamp; // timestamp
+  uint64_t Flags;     // flags
+};
+
+// record prefix (mandatory in each record)
+struct LLVMPerfJitRecordPrefix {
+  uint32_t Id; // record type identifier
+  uint32_t TotalSize;
+  uint64_t Timestamp;
+};
+
+struct LLVMPerfJitRecordCodeLoad {
+  LLVMPerfJitRecordPrefix Prefix;
+
+  uint32_t Pid;
+  uint32_t Tid;
+  uint64_t Vma;
+  uint64_t CodeAddr;
+  uint64_t CodeSize;
+  uint64_t CodeIndex;
+};
+
+struct LLVMPerfJitDebugEntry {
+  uint64_t Addr;
+  int Lineno;  // source line number starting at 1
+  int Discrim; // column discriminator, 0 is default
+  // followed by null terminated filename, \xff\0 if same as previous entry
+};
+
+struct LLVMPerfJitRecordDebugInfo {
+  LLVMPerfJitRecordPrefix Prefix;
+
+  uint64_t CodeAddr;
+  uint64_t NrEntry;
+  // followed by NrEntry LLVMPerfJitDebugEntry records
+};
+
+static inline uint64_t timespec_to_ns(const struct timespec *ts) {
+  const uint64_t NanoSecPerSec = 1000000000;
+  return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
+}
+
+static inline uint64_t perf_get_timestamp(void) {
+  struct timespec ts;
+  int ret;
+
+  ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+  if (ret)
+    return 0;
+
+  return timespec_to_ns(&ts);
+}
+
+PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
+  // check if clock-source is supported
+  if (!perf_get_timestamp()) {
+    errs() << "kernel does not support CLOCK_MONOTONIC\n";
+    return;
+  }
+
+  if (!InitDebuggingDir()) {
+    errs() << "could not initialize debugging directory\n";
+    return;
+  }
+
+  std::string Filename;
+  raw_string_ostream FilenameBuf(Filename);
+  FilenameBuf << JitPath << "/jit-" << Pid << ".dump";
+
+  // Need to open ourselves, because we need to hand the FD to OpenMarker() and
+  // raw_fd_ostream doesn't expose the FD.
+  using sys::fs::openFileForWrite;
+  if (auto EC =
+          openFileForWrite(FilenameBuf.str(), DumpFd, sys::fs::F_RW, 0666)) {
+    errs() << "could not open JIT dump file " << FilenameBuf.str() << ": "
+           << EC.message() << "\n";
+    return;
+  }
+
+  Dumpstream = make_unique<raw_fd_ostream>(DumpFd, true);
+
+  LLVMPerfJitHeader Header = {0};
+  if (!FillMachine(Header))
+    return;
+
+  // signal this process emits JIT information
+  if (!OpenMarker())
+    return;
+
+  // emit dumpstream header
+  Header.Magic = LLVM_PERF_JIT_MAGIC;
+  Header.Version = LLVM_PERF_JIT_VERSION;
+  Header.TotalSize = sizeof(Header);
+  Header.Pid = Pid;
+  Header.Timestamp = perf_get_timestamp();
+  Dumpstream->write(reinterpret_cast<const char *>(&Header), sizeof(Header));
+
+  // Everything initialized, can do profiling now.
+  if (!Dumpstream->has_error())
+    SuccessfullyInitialized = true;
+}
+
+void PerfJITEventListener::NotifyObjectEmitted(
+    const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) {
+
+  if (!SuccessfullyInitialized)
+    return;
+
+  OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
+  const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
+
+  // Get the address of the object image for use as a unique identifier
+  std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj);
+
+  // Use symbol info to iterate over functions in the object.
+  for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
+    SymbolRef Sym = P.first;
+    std::string SourceFileName;
+
+    Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
+    if (!SymTypeOrErr) {
+      // There's not much we can with errors here
+      consumeError(SymTypeOrErr.takeError());
+      continue;
+    }
+    SymbolRef::Type SymType = *SymTypeOrErr;
+    if (SymType != SymbolRef::ST_Function)
+      continue;
+
+    Expected<StringRef> Name = Sym.getName();
+    if (!Name) {
+      consumeError(Name.takeError());
+      continue;
+    }
+
+    Expected<uint64_t> AddrOrErr = Sym.getAddress();
+    if (!AddrOrErr) {
+      consumeError(AddrOrErr.takeError());
+      continue;
+    }
+    uint64_t Addr = *AddrOrErr;
+    uint64_t Size = P.second;
+
+    // According to spec debugging info has to come before loading the
+    // corresonding code load.
+    DILineInfoTable Lines = Context->getLineInfoForAddressRange(
+        Addr, Size, FileLineInfoKind::AbsoluteFilePath);
+
+    NotifyDebug(Addr, Lines);
+    NotifyCode(Name, Addr, Size);
+  }
+
+  Dumpstream->flush();
+}
+
+void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
+  // perf currently doesn't have an interface for unloading. But munmap()ing the
+  // code section does, so that's ok.
+}
+
+bool PerfJITEventListener::InitDebuggingDir() {
+  time_t Time;
+  struct tm LocalTime;
+  char TimeBuffer[sizeof("YYYYMMDD")];
+  SmallString<64> Path;
+
+  // search for location to dump data to
+  if (const char *BaseDir = getenv("JITDUMPDIR"))
+    Path.append(BaseDir);
+  else if (!sys::path::home_directory(Path))
+    Path = ".";
+
+  // create debug directory
+  Path += "/.debug/jit/";
+  if (auto EC = sys::fs::create_directories(Path)) {
+    errs() << "could not create jit cache directory " << Path << ": "
+           << EC.message() << "\n";
+    return false;
+  }
+
+  // create unique directory for dump data related to this process
+  time(&Time);
+  localtime_r(&Time, &LocalTime);
+  strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
+  Path += JIT_LANG "-jit-";
+  Path += TimeBuffer;
+
+  SmallString<128> UniqueDebugDir;
+
+  using sys::fs::createUniqueDirectory;
+  if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
+    errs() << "could not create unique jit cache directory " << UniqueDebugDir
+           << ": " << EC.message() << "\n";
+    return false;
+  }
+
+  JitPath = UniqueDebugDir.str();
+
+  return true;
+}
+
+bool PerfJITEventListener::OpenMarker() {
+  // We mmap the jitdump to create an MMAP RECORD in perf.data file.  The mmap
+  // is captured either live (perf record running when we mmap) or in deferred
+  // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
+  // file for more meta data info about the jitted code. Perf report/annotate
+  // detect this special filename and process the jitdump file.
+  //
+  // Mapping must be PROT_EXEC to ensure it is captured by perf record
+  // even when not using -d option.
+  MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC,
+                      MAP_PRIVATE, DumpFd, 0);
+
+  if (MarkerAddr == MAP_FAILED) {
+    errs() << "could not mmap JIT marker\n";
+    return false;
+  }
+  return true;
+}
+
+void PerfJITEventListener::CloseMarker() {
+  if (!MarkerAddr)
+    return;
+
+  munmap(MarkerAddr, sys::Process::getPageSize());
+  MarkerAddr = nullptr;
+}
+
+bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
+  char id[16];
+  struct {
+    uint16_t e_type;
+    uint16_t e_machine;
+  } info;
+
+  size_t RequiredMemory = sizeof(id) + sizeof(info);
+
+  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+    MemoryBuffer::getFileSlice("/proc/self/exe",
+			       RequiredMemory,
+			       0);
+
+  // This'll not guarantee that enough data was actually read from the
+  // underlying file. Instead the trailing part of the buffer would be
+  // zeroed. Given the ELF signature check below that seems ok though,
+  // it's unlikely that the file ends just after that, and the
+  // consequence would just be that perf wouldn't recognize the
+  // signature.
+  if (auto EC = MB.getError()) {
+    errs() << "could not open /proc/self/exe: " << EC.message() << "\n";
+    return false;
+  }
+
+  memcpy(&id, (*MB)->getBufferStart(), sizeof(id));
+  memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info));
+
+  // check ELF signature
+  if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
+    errs() << "invalid elf signature\n";
+    return false;
+  }
+
+  hdr.ElfMach = info.e_machine;
+
+  return true;
+}
+
+void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
+                                      uint64_t CodeAddr, uint64_t CodeSize) {
+  assert(SuccessfullyInitialized);
+
+  // 0 length functions can't have samples.
+  if (CodeSize == 0)
+    return;
+
+  LLVMPerfJitRecordCodeLoad rec;
+  rec.Prefix.Id = JIT_CODE_LOAD;
+  rec.Prefix.TotalSize = sizeof(rec) +        // debug record itself
+                         Symbol->size() + 1 + // symbol name
+                         CodeSize;            // and code
+  rec.Prefix.Timestamp = perf_get_timestamp();
+
+  rec.CodeSize = CodeSize;
+  rec.Vma = 0;
+  rec.CodeAddr = CodeAddr;
+  rec.Pid = Pid;
+  rec.Tid = get_threadid();
+
+  // avoid interspersing output
+  MutexGuard Guard(Mutex);
+
+  rec.CodeIndex = CodeGeneration++; // under lock!
+
+  Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
+  Dumpstream->write(Symbol->data(), Symbol->size() + 1);
+  Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
+}
+
+void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr,
+                                       DILineInfoTable Lines) {
+  assert(SuccessfullyInitialized);
+
+  // Didn't get useful debug info.
+  if (Lines.empty())
+    return;
+
+  LLVMPerfJitRecordDebugInfo rec;
+  rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
+  rec.Prefix.TotalSize = sizeof(rec); // will be increased further
+  rec.Prefix.Timestamp = perf_get_timestamp();
+  rec.CodeAddr = CodeAddr;
+  rec.NrEntry = Lines.size();
+
+  // compute total size size of record (variable due to filenames)
+  DILineInfoTable::iterator Begin = Lines.begin();
+  DILineInfoTable::iterator End = Lines.end();
+  for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+    DILineInfo &line = It->second;
+    rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
+    rec.Prefix.TotalSize += line.FileName.size() + 1;
+  }
+
+  // The debug_entry describes the source line information. It is defined as
+  // follows in order:
+  // * uint64_t code_addr: address of function for which the debug information
+  // is generated
+  // * uint32_t line     : source file line number (starting at 1)
+  // * uint32_t discrim  : column discriminator, 0 is default
+  // * char name[n]      : source file name in ASCII, including null termination
+
+  // avoid interspersing output
+  MutexGuard Guard(Mutex);
+
+  Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
+
+  for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
+    LLVMPerfJitDebugEntry LineInfo;
+    DILineInfo &Line = It->second;
+
+    LineInfo.Addr = It->first;
+    // The function re-created by perf is preceded by a elf
+    // header. Need to adjust for that, otherwise the results are
+    // wrong.
+    LineInfo.Addr += 0x40;
+    LineInfo.Lineno = Line.Line;
+    LineInfo.Discrim = Line.Discriminator;
+
+    Dumpstream->write(reinterpret_cast<const char *>(&LineInfo),
+                      sizeof(LineInfo));
+    Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
+  }
+}
+
+// There should be only a single event listener per process, otherwise perf gets
+// confused.
+llvm::ManagedStatic<PerfJITEventListener> PerfListener;
+
+} // end anonymous namespace
+
+namespace llvm {
+JITEventListener *JITEventListener::createPerfJITEventListener() {
+  return &*PerfListener;
+}
+
+} // namespace llvm
+
+--
+2.17.1
+
--- a/debian/patches/julia/llvm-D50010-VNCoercion-ni.patch
+++ b/debian/patches/julia/llvm-D50010-VNCoercion-ni.patch
@ -0,0 +1,89 @@
+commit 8eb2b102a203d83fb713f3bf79acf235dabdd8cd
+Author: Keno Fischer <keno@juliacomputing.com>
+Date:   Mon Jul 30 16:59:08 2018 -0400
+
+    [VNCoercion] Disallow coercion between different ni addrspaces
+    
+    Summary:
+    I'm not sure if it would be legal by the IR reference to introduce
+    an addrspacecast here, since the IR reference is a bit vague on
+    the exact semantics, but at least for our usage of it (and I
+    suspect for many other's usage) it is not. For us, addrspacecasts
+    between non-integral address spaces carry frontend information that the
+    optimizer cannot deduce afterwards in a generic way (though we
+    have frontend specific passes in our pipline that do propagate
+    these). In any case, I'm sure nobody is using it this way at
+    the moment, since it would have introduced inttoptrs, which
+    are definitely illegal.
+    
+    Fixes PR38375
+    
+    Reviewers: sanjoy, reames, dberlin
+    
+    Subscribers: llvm-commits
+    
+    Differential Revision: https://reviews.llvm.org/D50010
+
+diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
+index c3feea6a0a4..735d1e7b792 100644
+--- a/lib/Transforms/Utils/VNCoercion.cpp
+++ b/lib/Transforms/Utils/VNCoercion.cpp
+@@ -20,14 +20,21 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
+       StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
+     return false;
+ 
+  Type *StoredValTy = StoredVal->getType();
+
+   // The store has to be at least as big as the load.
+   if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy))
+     return false;
+ 
+-  // Don't coerce non-integral pointers to integers or vice versa.
+-  if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
+-      DL.isNonIntegralPointerType(LoadTy))
+  bool StoredNI = DL.isNonIntegralPointerType(StoredValTy);
+  bool LoadNI = DL.isNonIntegralPointerType(LoadTy);
+  if (StoredNI != LoadNI) {
+     return false;
+  } else if (StoredNI && LoadNI &&
+             cast<PointerType>(StoredValTy)->getAddressSpace() !=
+                 cast<PointerType>(LoadTy)->getAddressSpace()) {
+    return false;
+  }
+ 
+   return true;
+ }
+diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
+index 9ae4132231d..5217fc1a06a 100644
+--- a/test/Transforms/GVN/non-integral-pointers.ll
+++ b/test/Transforms/GVN/non-integral-pointers.ll
+@@ -1,6 +1,6 @@
+ ; RUN: opt -gvn -S < %s | FileCheck %s
+ 
+-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
+ target triple = "x86_64-unknown-linux-gnu"
+ 
+ define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
+@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
+  alwaysTaken:
+   ret i64 42
+ }
+
+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
+ ; CHECK-LABEL: @multini(
+ ; CHECK-NOT: inttoptr
+ ; CHECK-NOT: ptrtoint
+ ; CHECK-NOT: addrspacecast
+  entry:
+   store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
+   br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
+
+  neverTaken:
+   %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
+   %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
+   ret i8 addrspace(5)* %differentas
+
+  alwaysTaken:
+   ret i8 addrspace(5)* null
+ }
--- a/debian/patches/julia/llvm-D50167-scev-umin.patch
+++ b/debian/patches/julia/llvm-D50167-scev-umin.patch
--- a/debian/patches/julia/llvm-PPC-addrspaces.patch
+++ b/debian/patches/julia/llvm-PPC-addrspaces.patch
@ -0,0 +1,26 @@
+From 15899eaab58e96bb7bbe7a14099674e255656a50 Mon Sep 17 00:00:00 2001
+From: Valentin Churavy <v.churavy@gmail.com>
+Date: Fri, 23 Feb 2018 14:41:20 -0500
+Subject: [PATCH] Make AddrSpaceCast noops on PPC
+
+PPC as AArch64 doesn't have address-spaces so we can drop them in the backend
+---
+ lib/Target/PowerPC/PPCISelLowering.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+Index: llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h
+===================================================================
+--- llvm-toolchain-6.0-6.0.1.orig/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm-toolchain-6.0-6.0.1/lib/Target/PowerPC/PPCISelLowering.h
+@@ -889,6 +889,11 @@ namespace llvm {
+       return true;
+     }
+ 
+    bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+      // Addrspacecasts are always noops.
+      return true;
+    }
+
+     bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
+                              SelectionDAG &DAG,
+                              ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
--- a/debian/patches/julia/llvm-rL326967-aligned-load.patch
+++ b/debian/patches/julia/llvm-rL326967-aligned-load.patch
@ -0,0 +1,301 @@
+commit b398d8e1fa5a5a914957fa22d0a64db97f6c265e
+Author: Craig Topper <craig.topper@intel.com>
+Date:   Thu Mar 8 00:21:17 2018 +0000
+
+    [X86] Fix some isel patterns that used aligned vector load instructions with unaligned predicates.
+    
+    These patterns weren't checking the alignment of the load, but were using the aligned instructions. This will cause a GP fault if the data isn't aligned.
+    
+    I believe these were introduced in r312450.
+    
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@326967 91177308-0d34-0410-b5e6-96231b3b80d8
+
+diff --git a/lib/Target/X86/X86InstrVecCompiler.td b/lib/Target/X86/X86InstrVecCompiler.td
+index db3dfe56531..50c7763a2c3 100644
+--- a/lib/Target/X86/X86InstrVecCompiler.td
+++ b/lib/Target/X86/X86InstrVecCompiler.td
+@@ -261,10 +261,10 @@ let Predicates = [HasVLX] in {
+ // will zero the upper bits.
+ // TODO: Is there a safe way to detect whether the producing instruction
+ // already zeroed the upper bits?
+-multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
+-                                   ValueType DstTy, ValueType SrcTy,
+-                                   ValueType ZeroTy, PatFrag memop,
+-                                   SubRegIndex SubIdx> {
+multiclass subvector_zero_lowering<string MoveStr, string LoadStr,
+                                   RegisterClass RC, ValueType DstTy,
+                                   ValueType SrcTy, ValueType ZeroTy,
+                                   PatFrag memop, SubRegIndex SubIdx> {
+   def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)),
+                                      (SrcTy RC:$src), (iPTR 0))),
+             (SUBREG_TO_REG (i64 0),
+@@ -274,91 +274,91 @@ multiclass subvector_zero_lowering<string MoveStr, RegisterClass RC,
+                                      (SrcTy (bitconvert (memop addr:$src))),
+                                      (iPTR 0))),
+             (SUBREG_TO_REG (i64 0),
+-             (!cast<Instruction>("VMOV"#MoveStr#"rm") addr:$src), SubIdx)>;
+             (!cast<Instruction>("VMOV"#LoadStr#"rm") addr:$src), SubIdx)>;
+ }
+ 
+ let Predicates = [HasAVX, NoVLX] in {
+-  defm : subvector_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, loadv2f64,
+-                                 sub_xmm>;
+-  defm : subvector_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, loadv4f32,
+-                                 sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, loadv2i64,
+-                                 sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, loadv2i64,
+-                                 sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, loadv2i64,
+-                                 sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, loadv2i64,
+-                                 sub_xmm>;
+-}
+-
+-let Predicates = [HasVLX] in {
+-  defm : subvector_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32,
+  defm : subvector_zero_lowering<"APD", "UPD", VR128, v4f64, v2f64, v8i32,
+                                  loadv2f64, sub_xmm>;
+-  defm : subvector_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32,
+  defm : subvector_zero_lowering<"APS", "UPS", VR128, v8f32, v4f32, v8i32,
+                                  loadv4f32, sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32,
+  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v4i64, v2i64, v8i32,
+                                  loadv2i64, sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32,
+  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i32, v4i32, v8i32,
+                                  loadv2i64, sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32,
+  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i16, v8i16, v8i32,
+                                  loadv2i64, sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32,
+-                                 loadv2i64, sub_xmm>;
+-
+-  defm : subvector_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32,
+-                                 loadv2f64, sub_xmm>;
+-  defm : subvector_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32,
+-                                 loadv4f32, sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32,
+-                                 loadv2i64, sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32,
+-                                 loadv2i64, sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32,
+-                                 loadv2i64, sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32,
+  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i8, v16i8, v8i32,
+                                  loadv2i64, sub_xmm>;
+}
+ 
+-  defm : subvector_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32,
+-                                 loadv4f64, sub_ymm>;
+-  defm : subvector_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32,
+-                                 loadv8f32, sub_ymm>;
+-  defm : subvector_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32,
+-                                 loadv4i64, sub_ymm>;
+-  defm : subvector_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32,
+-                                 loadv4i64, sub_ymm>;
+-  defm : subvector_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32,
+-                                 loadv4i64, sub_ymm>;
+-  defm : subvector_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32,
+-                                 loadv4i64, sub_ymm>;
+let Predicates = [HasVLX] in {
+  defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v4f64,
+                                 v2f64, v8i32, loadv2f64, sub_xmm>;
+  defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v8f32,
+                                 v4f32, v8i32, loadv4f32, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v4i64,
+                                 v2i64, v8i32, loadv2i64, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i32,
+                                 v4i32, v8i32, loadv2i64, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i16,
+                                 v8i16, v8i32, loadv2i64, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i8,
+                                 v16i8, v8i32, loadv2i64, sub_xmm>;
+
+  defm : subvector_zero_lowering<"APDZ128", "UPDZ128", VR128X, v8f64,
+                                 v2f64, v16i32, loadv2f64, sub_xmm>;
+  defm : subvector_zero_lowering<"APSZ128", "UPSZ128", VR128X, v16f32,
+                                 v4f32, v16i32, loadv4f32, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v8i64,
+                                 v2i64, v16i32, loadv2i64, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v16i32,
+                                 v4i32, v16i32, loadv2i64, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v32i16,
+                                 v8i16, v16i32, loadv2i64, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA64Z128", "DQU64Z128", VR128X, v64i8,
+                                 v16i8, v16i32, loadv2i64, sub_xmm>;
+
+  defm : subvector_zero_lowering<"APDZ256", "UPDZ256", VR256X, v8f64,
+                                 v4f64, v16i32, loadv4f64, sub_ymm>;
+  defm : subvector_zero_lowering<"APSZ256", "UPDZ256", VR256X, v16f32,
+                                 v8f32, v16i32, loadv8f32, sub_ymm>;
+  defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v8i64,
+                                 v4i64, v16i32, loadv4i64, sub_ymm>;
+  defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v16i32,
+                                 v8i32, v16i32, loadv4i64, sub_ymm>;
+  defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v32i16,
+                                 v16i16, v16i32, loadv4i64, sub_ymm>;
+  defm : subvector_zero_lowering<"DQA64Z256", "DQU64Z256", VR256X, v64i8,
+                                 v32i8, v16i32, loadv4i64, sub_ymm>;
+ }
+ 
+ let Predicates = [HasAVX512, NoVLX] in {
+-  defm : subvector_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, loadv2f64,
+-                                 sub_xmm>;
+-  defm : subvector_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, loadv4f32,
+-                                 sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, loadv2i64,
+-                                 sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, loadv2i64,
+-                                 sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, loadv2i64,
+-                                 sub_xmm>;
+-  defm : subvector_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, loadv2i64,
+-                                 sub_xmm>;
+-
+-  defm : subvector_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32,
+-                                 loadv4f64, sub_ymm>;
+-  defm : subvector_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32,
+-                                 loadv8f32, sub_ymm>;
+-  defm : subvector_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32,
+-                                 loadv4i64, sub_ymm>;
+-  defm : subvector_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32,
+-                                 loadv4i64, sub_ymm>;
+-  defm : subvector_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32,
+-                                 loadv4i64, sub_ymm>;
+-  defm : subvector_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32,
+-                                 loadv4i64, sub_ymm>;
+  defm : subvector_zero_lowering<"APD", "UPD", VR128, v8f64, v2f64,
+                                 v16i32,loadv2f64, sub_xmm>;
+  defm : subvector_zero_lowering<"APS", "UPS", VR128, v16f32, v4f32,
+                                 v16i32, loadv4f32, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v8i64, v2i64,
+                                 v16i32, loadv2i64, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v16i32, v4i32,
+                                 v16i32, loadv2i64, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v32i16, v8i16,
+                                 v16i32, loadv2i64, sub_xmm>;
+  defm : subvector_zero_lowering<"DQA", "DQU", VR128, v64i8, v16i8,
+                                 v16i32, loadv2i64, sub_xmm>;
+
+  defm : subvector_zero_lowering<"APDY", "UPDY", VR256, v8f64, v4f64,
+                                 v16i32, loadv4f64, sub_ymm>;
+  defm : subvector_zero_lowering<"APSY", "UPSY", VR256, v16f32, v8f32,
+                                 v16i32, loadv8f32, sub_ymm>;
+  defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v8i64, v4i64,
+                                 v16i32, loadv4i64, sub_ymm>;
+  defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v16i32, v8i32,
+                                 v16i32, loadv4i64, sub_ymm>;
+  defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v32i16, v16i16,
+                                 v16i32, loadv4i64, sub_ymm>;
+  defm : subvector_zero_lowering<"DQAY", "DQUY", VR256, v64i8, v32i8,
+                                 v16i32, loadv4i64, sub_ymm>;
+ }
+ 
+ // List of opcodes that guaranteed to zero the upper elements of vector regs.
+diff --git a/test/CodeGen/X86/merge-consecutive-loads-256.ll b/test/CodeGen/X86/merge-consecutive-loads-256.ll
+index 6ecd8116443..0f2cf594b1c 100644
+--- a/test/CodeGen/X86/merge-consecutive-loads-256.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-256.ll
+@@ -28,13 +28,13 @@ define <4 x double> @merge_4f64_2f64_23(<2 x double>* %ptr) nounwind uwtable noi
+ define <4 x double> @merge_4f64_2f64_2z(<2 x double>* %ptr) nounwind uwtable noinline ssp {
+ ; AVX-LABEL: merge_4f64_2f64_2z:
+ ; AVX:       # %bb.0:
+-; AVX-NEXT:    vmovaps 32(%rdi), %xmm0
+; AVX-NEXT:    vmovups 32(%rdi), %xmm0
+ ; AVX-NEXT:    retq
+ ;
+ ; X32-AVX-LABEL: merge_4f64_2f64_2z:
+ ; X32-AVX:       # %bb.0:
+ ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+-; X32-AVX-NEXT:    vmovaps 32(%eax), %xmm0
+; X32-AVX-NEXT:    vmovups 32(%eax), %xmm0
+ ; X32-AVX-NEXT:    retl
+   %ptr0 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 2
+   %val0 = load <2 x double>, <2 x double>* %ptr0
+@@ -109,13 +109,13 @@ define <4 x double> @merge_4f64_f64_34uu(double* %ptr) nounwind uwtable noinline
+ define <4 x double> @merge_4f64_f64_45zz(double* %ptr) nounwind uwtable noinline ssp {
+ ; AVX-LABEL: merge_4f64_f64_45zz:
+ ; AVX:       # %bb.0:
+-; AVX-NEXT:    vmovaps 32(%rdi), %xmm0
+; AVX-NEXT:    vmovups 32(%rdi), %xmm0
+ ; AVX-NEXT:    retq
+ ;
+ ; X32-AVX-LABEL: merge_4f64_f64_45zz:
+ ; X32-AVX:       # %bb.0:
+ ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+-; X32-AVX-NEXT:    vmovaps 32(%eax), %xmm0
+; X32-AVX-NEXT:    vmovups 32(%eax), %xmm0
+ ; X32-AVX-NEXT:    retl
+   %ptr0 = getelementptr inbounds double, double* %ptr, i64 4
+   %ptr1 = getelementptr inbounds double, double* %ptr, i64 5
+@@ -155,13 +155,13 @@ define <4 x double> @merge_4f64_f64_34z6(double* %ptr) nounwind uwtable noinline
+ define <4 x i64> @merge_4i64_2i64_3z(<2 x i64>* %ptr) nounwind uwtable noinline ssp {
+ ; AVX-LABEL: merge_4i64_2i64_3z:
+ ; AVX:       # %bb.0:
+-; AVX-NEXT:    vmovaps 48(%rdi), %xmm0
+; AVX-NEXT:    vmovups 48(%rdi), %xmm0
+ ; AVX-NEXT:    retq
+ ;
+ ; X32-AVX-LABEL: merge_4i64_2i64_3z:
+ ; X32-AVX:       # %bb.0:
+ ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+-; X32-AVX-NEXT:    vmovaps 48(%eax), %xmm0
+; X32-AVX-NEXT:    vmovups 48(%eax), %xmm0
+ ; X32-AVX-NEXT:    retl
+   %ptr0 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 3
+   %val0 = load <2 x i64>, <2 x i64>* %ptr0
+@@ -217,13 +217,13 @@ define <4 x i64> @merge_4i64_i64_1zzu(i64* %ptr) nounwind uwtable noinline ssp {
+ define <4 x i64> @merge_4i64_i64_23zz(i64* %ptr) nounwind uwtable noinline ssp {
+ ; AVX-LABEL: merge_4i64_i64_23zz:
+ ; AVX:       # %bb.0:
+-; AVX-NEXT:    vmovaps 16(%rdi), %xmm0
+; AVX-NEXT:    vmovups 16(%rdi), %xmm0
+ ; AVX-NEXT:    retq
+ ;
+ ; X32-AVX-LABEL: merge_4i64_i64_23zz:
+ ; X32-AVX:       # %bb.0:
+ ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+-; X32-AVX-NEXT:    vmovaps 16(%eax), %xmm0
+; X32-AVX-NEXT:    vmovups 16(%eax), %xmm0
+ ; X32-AVX-NEXT:    retl
+   %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 2
+   %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 3
+diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll
+index 62102eb382c..3c6eaf65292 100644
+--- a/test/CodeGen/X86/merge-consecutive-loads-512.ll
+++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll
+@@ -106,13 +106,13 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin
+ define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
+ ; ALL-LABEL: merge_8f64_f64_12zzuuzz:
+ ; ALL:       # %bb.0:
+-; ALL-NEXT:    vmovaps 8(%rdi), %xmm0
+; ALL-NEXT:    vmovups 8(%rdi), %xmm0
+ ; ALL-NEXT:    retq
+ ;
+ ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
+ ; X32-AVX512F:       # %bb.0:
+ ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
+-; X32-AVX512F-NEXT:    vmovaps 8(%eax), %xmm0
+; X32-AVX512F-NEXT:    vmovups 8(%eax), %xmm0
+ ; X32-AVX512F-NEXT:    retl
+   %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
+   %ptr1 = getelementptr inbounds double, double* %ptr, i64 2
+@@ -190,7 +190,7 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline
+ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
+ ; ALL-LABEL: merge_8i64_i64_56zz9uzz:
+ ; ALL:       # %bb.0:
+-; ALL-NEXT:    vmovaps 40(%rdi), %xmm0
+; ALL-NEXT:    vmovups 40(%rdi), %xmm0
+ ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
+ ; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+ ; ALL-NEXT:    retq
+@@ -198,7 +198,7 @@ define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline s
+ ; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
+ ; X32-AVX512F:       # %bb.0:
+ ; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
+-; X32-AVX512F-NEXT:    vmovaps 40(%eax), %xmm0
+; X32-AVX512F-NEXT:    vmovups 40(%eax), %xmm0
+ ; X32-AVX512F-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
+ ; X32-AVX512F-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+ ; X32-AVX512F-NEXT:    retl
--- a/debian/patches/julia/llvm-rL327898.patch
+++ b/debian/patches/julia/llvm-rL327898.patch