From fdcd4d5e0c7d498f54af3906a7c76735686deef1 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 28 Aug 2018 15:33:00 +0200 Subject: [PATCH 01/12] rename the file for something more explicit --- debian/patches/{D51108.diff => D51108-rust-powerpc.diff} | 0 debian/patches/series | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename debian/patches/{D51108.diff => D51108-rust-powerpc.diff} (100%) diff --git a/debian/patches/D51108.diff b/debian/patches/D51108-rust-powerpc.diff similarity index 100% rename from debian/patches/D51108.diff rename to debian/patches/D51108-rust-powerpc.diff diff --git a/debian/patches/series b/debian/patches/series index c901e764..e3703458 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -60,4 +60,4 @@ D42717-JumpThreading-backport-2.diff llvm-D49832-SCEVPred.patch llvm-rL323946-LSRTy.patch PowerPC-Make-AddrSpaceCast-noop.diff -D51108.diff +D51108-rust-powerpc.diff From ae7f542c3a0e429f5609ccc93ba621da314f49ae Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 28 Aug 2018 15:35:09 +0200 Subject: [PATCH 02/12] patch lto+gpo --- debian/patches/pr38663-pgo-lto-crash.patch | 1571 ++++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 1572 insertions(+) create mode 100644 debian/patches/pr38663-pgo-lto-crash.patch diff --git a/debian/patches/pr38663-pgo-lto-crash.patch b/debian/patches/pr38663-pgo-lto-crash.patch new file mode 100644 index 00000000..bdf10725 --- /dev/null +++ b/debian/patches/pr38663-pgo-lto-crash.patch @@ -0,0 +1,1571 @@ +--- llvm-toolchain-6.0-6.0.1~+rc1.orig/lib/CodeGen/PeepholeOptimizer.cpp 2018/03/31 11:38:16 331838 ++++ llvm-toolchain-6.0-6.0.1~+rc1/lib/CodeGen/PeepholeOptimizer.cpp 2018/05/20 16:03:21 333926 +@@ -98,6 +98,8 @@ + #include + + using namespace llvm; ++using RegSubRegPair = TargetInstrInfo::RegSubRegPair; ++using RegSubRegPairAndIdx = TargetInstrInfo::RegSubRegPairAndIdx; + + #define DEBUG_TYPE "peephole-opt" + +@@ -110,6 +112,9 @@ + DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), + cl::desc("Disable the peephole optimizer")); + ++/// Specifiy whether or not the value tracking looks through ++/// complex instructions. When this is true, the value tracker ++/// bails on everything that is not a copy or a bitcast. + static cl::opt + DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), + cl::desc("Disable advanced copy optimization")); +@@ -132,11 +137,11 @@ + "of commuting operands")); + + +-STATISTIC(NumReuse, "Number of extension results reused"); +-STATISTIC(NumCmps, "Number of compares eliminated"); +-STATISTIC(NumImmFold, "Number of move immediate folded"); +-STATISTIC(NumLoadFold, "Number of loads folded"); +-STATISTIC(NumSelects, "Number of selects optimized"); ++STATISTIC(NumReuse, "Number of extension results reused"); ++STATISTIC(NumCmps, "Number of compares eliminated"); ++STATISTIC(NumImmFold, "Number of move immediate folded"); ++STATISTIC(NumLoadFold, "Number of loads folded"); ++STATISTIC(NumSelects, "Number of selects optimized"); + STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); + STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); + STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); +@@ -149,9 +154,9 @@ + class PeepholeOptimizer : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; +- MachineRegisterInfo *MRI; +- MachineDominatorTree *DT; // Machine dominator tree +- MachineLoopInfo *MLI; ++ MachineRegisterInfo *MRI; ++ MachineDominatorTree *DT; // Machine dominator tree ++ MachineLoopInfo *MLI; + + public: + static char ID; // Pass identification +@@ -173,31 +178,28 @@ + } + } + +- /// \brief Track Def -> Use info used for rewriting copies. +- using RewriteMapTy = +- SmallDenseMap; ++ /// Track Def -> Use info used for rewriting copies. ++ using RewriteMapTy = SmallDenseMap; + +- /// \brief Sequence of instructions that formulate recurrence cycle. ++ /// Sequence of instructions that formulate recurrence cycle. + using RecurrenceCycle = SmallVector; + + private: +- bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); +- bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, ++ bool optimizeCmpInstr(MachineInstr &MI); ++ bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, + SmallPtrSetImpl &LocalMIs); +- bool optimizeSelect(MachineInstr *MI, ++ bool optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &LocalMIs); +- bool optimizeCondBranch(MachineInstr *MI); +- bool optimizeCoalescableCopy(MachineInstr *MI); +- bool optimizeUncoalescableCopy(MachineInstr *MI, ++ bool optimizeCondBranch(MachineInstr &MI); ++ bool optimizeCoalescableCopy(MachineInstr &MI); ++ bool optimizeUncoalescableCopy(MachineInstr &MI, + SmallPtrSetImpl &LocalMIs); + bool optimizeRecurrence(MachineInstr &PHI); +- bool findNextSource(unsigned Reg, unsigned SubReg, +- RewriteMapTy &RewriteMap); +- bool isMoveImmediate(MachineInstr *MI, ++ bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap); ++ bool isMoveImmediate(MachineInstr &MI, + SmallSet &ImmDefRegs, + DenseMap &ImmDefMIs); +- bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, +- SmallSet &ImmDefRegs, ++ bool foldImmediate(MachineInstr &MI, SmallSet &ImmDefRegs, + DenseMap &ImmDefMIs); + + /// \brief Finds recurrence cycles, but only ones that formulated around +@@ -212,11 +214,11 @@ + /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was + /// previously seen as a copy, replace the uses of this copy with the + /// previously seen copy's destination register. +- bool foldRedundantCopy(MachineInstr *MI, ++ bool foldRedundantCopy(MachineInstr &MI, + SmallSet &CopySrcRegs, + DenseMap &CopyMIs); + +- /// \brief Is the register \p Reg a non-allocatable physical register? ++ /// Is the register \p Reg a non-allocatable physical register? + bool isNAPhysCopy(unsigned Reg); + + /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical +@@ -224,11 +226,10 @@ + /// non-allocatable physical register was previously copied to a virtual + /// registered and hasn't been clobbered, the virt->phys copy can be + /// deleted. +- bool foldRedundantNAPhysCopy( +- MachineInstr *MI, ++ bool foldRedundantNAPhysCopy(MachineInstr &MI, + DenseMap &NAPhysToVirtMIs); + +- bool isLoadFoldable(MachineInstr *MI, ++ bool isLoadFoldable(MachineInstr &MI, + SmallSet &FoldAsLoadDefCandidates); + + /// \brief Check whether \p MI is understood by the register coalescer +@@ -249,10 +250,13 @@ + (MI.isRegSequenceLike() || MI.isInsertSubregLike() || + MI.isExtractSubregLike())); + } ++ ++ MachineInstr &rewriteSource(MachineInstr &CopyLike, ++ RegSubRegPair Def, RewriteMapTy &RewriteMap); + }; + +- /// \brief Helper class to hold instructions that are inside recurrence +- /// cycles. The recurrence cycle is formulated around 1) a def operand and its ++ /// Helper class to hold instructions that are inside recurrence cycles. ++ /// The recurrence cycle is formulated around 1) a def operand and its + /// tied use operand, or 2) a def operand and a use operand that is commutable + /// with another use operand which is tied to the def operand. In the latter + /// case, index of the tied use operand and the commutable use operand are +@@ -273,13 +277,13 @@ + Optional CommutePair; + }; + +- /// \brief Helper class to hold a reply for ValueTracker queries. Contains the +- /// returned sources for a given search and the instructions where the sources +- /// were tracked from. ++ /// Helper class to hold a reply for ValueTracker queries. ++ /// Contains the returned sources for a given search and the instructions ++ /// where the sources were tracked from. + class ValueTrackerResult { + private: + /// Track all sources found by one ValueTracker query. +- SmallVector RegSrcs; ++ SmallVector RegSrcs; + + /// Instruction using the sources in 'RegSrcs'. + const MachineInstr *Inst = nullptr; +@@ -302,16 +306,20 @@ + } + + void addSource(unsigned SrcReg, unsigned SrcSubReg) { +- RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg)); ++ RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg)); + } + + void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) { + assert(Idx < getNumSources() && "Reg pair source out of index"); +- RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg); ++ RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg); + } + + int getNumSources() const { return RegSrcs.size(); } + ++ RegSubRegPair getSrc(int Idx) const { ++ return RegSrcs[Idx]; ++ } ++ + unsigned getSrcReg(int Idx) const { + assert(Idx < getNumSources() && "Reg source out of index"); + return RegSrcs[Idx].Reg; +@@ -367,59 +375,41 @@ + /// The register where the value can be found. + unsigned Reg; + +- /// Specifiy whether or not the value tracking looks through +- /// complex instructions. When this is false, the value tracker +- /// bails on everything that is not a copy or a bitcast. +- /// +- /// Note: This could have been implemented as a specialized version of +- /// the ValueTracker class but that would have complicated the code of +- /// the users of this class. +- bool UseAdvancedTracking; +- + /// MachineRegisterInfo used to perform tracking. + const MachineRegisterInfo &MRI; + +- /// Optional TargetInstrInfo used to perform some complex +- /// tracking. ++ /// Optional TargetInstrInfo used to perform some complex tracking. + const TargetInstrInfo *TII; + +- /// \brief Dispatcher to the right underlying implementation of +- /// getNextSource. ++ /// Dispatcher to the right underlying implementation of getNextSource. + ValueTrackerResult getNextSourceImpl(); + +- /// \brief Specialized version of getNextSource for Copy instructions. ++ /// Specialized version of getNextSource for Copy instructions. + ValueTrackerResult getNextSourceFromCopy(); + +- /// \brief Specialized version of getNextSource for Bitcast instructions. ++ /// Specialized version of getNextSource for Bitcast instructions. + ValueTrackerResult getNextSourceFromBitcast(); + +- /// \brief Specialized version of getNextSource for RegSequence +- /// instructions. ++ /// Specialized version of getNextSource for RegSequence instructions. + ValueTrackerResult getNextSourceFromRegSequence(); + +- /// \brief Specialized version of getNextSource for InsertSubreg +- /// instructions. ++ /// Specialized version of getNextSource for InsertSubreg instructions. + ValueTrackerResult getNextSourceFromInsertSubreg(); + +- /// \brief Specialized version of getNextSource for ExtractSubreg +- /// instructions. ++ /// Specialized version of getNextSource for ExtractSubreg instructions. + ValueTrackerResult getNextSourceFromExtractSubreg(); + +- /// \brief Specialized version of getNextSource for SubregToReg +- /// instructions. ++ /// Specialized version of getNextSource for SubregToReg instructions. + ValueTrackerResult getNextSourceFromSubregToReg(); + +- /// \brief Specialized version of getNextSource for PHI instructions. ++ /// Specialized version of getNextSource for PHI instructions. + ValueTrackerResult getNextSourceFromPHI(); + + public: +- /// \brief Create a ValueTracker instance for the value defined by \p Reg. ++ /// Create a ValueTracker instance for the value defined by \p Reg. + /// \p DefSubReg represents the sub register index the value tracker will + /// track. It does not need to match the sub register index used in the + /// definition of \p Reg. +- /// \p UseAdvancedTracking specifies whether or not the value tracker looks +- /// through complex instructions. By default (false), it handles only copy +- /// and bitcast instructions. + /// If \p Reg is a physical register, a value tracker constructed with + /// this constructor will not find any alternative source. + /// Indeed, when \p Reg is a physical register that constructor does not +@@ -427,46 +417,20 @@ + /// Use the next constructor to track a physical register. + ValueTracker(unsigned Reg, unsigned DefSubReg, + const MachineRegisterInfo &MRI, +- bool UseAdvancedTracking = false, + const TargetInstrInfo *TII = nullptr) +- : DefSubReg(DefSubReg), Reg(Reg), +- UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { ++ : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) { + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + Def = MRI.getVRegDef(Reg); + DefIdx = MRI.def_begin(Reg).getOperandNo(); + } + } + +- /// \brief Create a ValueTracker instance for the value defined by +- /// the pair \p MI, \p DefIdx. +- /// Unlike the other constructor, the value tracker produced by this one +- /// may be able to find a new source when the definition is a physical +- /// register. +- /// This could be useful to rewrite target specific instructions into +- /// generic copy instructions. +- ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, +- const MachineRegisterInfo &MRI, +- bool UseAdvancedTracking = false, +- const TargetInstrInfo *TII = nullptr) +- : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), +- UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { +- assert(DefIdx < Def->getDesc().getNumDefs() && +- Def->getOperand(DefIdx).isReg() && "Invalid definition"); +- Reg = Def->getOperand(DefIdx).getReg(); +- } +- + /// \brief Following the use-def chain, get the next available source + /// for the tracked value. + /// \return A ValueTrackerResult containing a set of registers + /// and sub registers with tracked values. A ValueTrackerResult with + /// an empty set of registers means no source was found. + ValueTrackerResult getNextSource(); +- +- /// \brief Get the last register where the initial value can be found. +- /// Initially this is the register of the definition. +- /// Then, after each successful call to getNextSource, this is the +- /// register of the last source. +- unsigned getReg() const { return Reg; } + }; + + } // end anonymous namespace +@@ -476,11 +440,11 @@ + char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; + + INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, +- "Peephole Optimizations", false, false) ++ "Peephole Optimizations", false, false) + INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) + INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) + INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE, +- "Peephole Optimizations", false, false) ++ "Peephole Optimizations", false, false) + + /// If instruction is a copy-like instruction, i.e. it reads a single register + /// and writes a single register and it does not modify the source, and if the +@@ -491,10 +455,10 @@ + /// the code. Since this code does not currently share EXTRACTs, just ignore all + /// debug uses. + bool PeepholeOptimizer:: +-optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, ++optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, + SmallPtrSetImpl &LocalMIs) { + unsigned SrcReg, DstReg, SubIdx; +- if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) ++ if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx)) + return false; + + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || +@@ -535,7 +499,7 @@ + bool ExtendLife = true; + for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { + MachineInstr *UseMI = UseMO.getParent(); +- if (UseMI == MI) ++ if (UseMI == &MI) + continue; + + if (UseMI->isPHI()) { +@@ -568,7 +532,7 @@ + continue; + + MachineBasicBlock *UseMBB = UseMI->getParent(); +- if (UseMBB == MBB) { ++ if (UseMBB == &MBB) { + // Local uses that come after the extension. + if (!LocalMIs.count(UseMI)) + Uses.push_back(&UseMO); +@@ -576,7 +540,7 @@ + // Non-local uses where the result of the extension is used. Always + // replace these unless it's a PHI. + Uses.push_back(&UseMO); +- } else if (Aggressive && DT->dominates(MBB, UseMBB)) { ++ } else if (Aggressive && DT->dominates(&MBB, UseMBB)) { + // We may want to extend the live range of the extension result in order + // to replace these uses. + ExtendedUses.push_back(&UseMO); +@@ -640,19 +604,18 @@ + /// against already sets (or could be modified to set) the same flag as the + /// compare, then we can remove the comparison and use the flag from the + /// previous instruction. +-bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, +- MachineBasicBlock *MBB) { ++bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) { + // If this instruction is a comparison against zero and isn't comparing a + // physical register, we can try to optimize it. + unsigned SrcReg, SrcReg2; + int CmpMask, CmpValue; +- if (!TII->analyzeCompare(*MI, SrcReg, SrcReg2, CmpMask, CmpValue) || ++ if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || + TargetRegisterInfo::isPhysicalRegister(SrcReg) || + (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) + return false; + + // Attempt to optimize the comparison instruction. +- if (TII->optimizeCompareInstr(*MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { ++ if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { + ++NumCmps; + return true; + } +@@ -661,27 +624,26 @@ + } + + /// Optimize a select instruction. +-bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, ++bool PeepholeOptimizer::optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &LocalMIs) { + unsigned TrueOp = 0; + unsigned FalseOp = 0; + bool Optimizable = false; + SmallVector Cond; +- if (TII->analyzeSelect(*MI, Cond, TrueOp, FalseOp, Optimizable)) ++ if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) + return false; + if (!Optimizable) + return false; +- if (!TII->optimizeSelect(*MI, LocalMIs)) ++ if (!TII->optimizeSelect(MI, LocalMIs)) + return false; +- MI->eraseFromParent(); ++ MI.eraseFromParent(); + ++NumSelects; + return true; + } + +-/// \brief Check if a simpler conditional branch can be +-/// generated +-bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { +- return TII->optimizeCondBranch(*MI); ++/// Check if a simpler conditional branch can be generated. ++bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) { ++ return TII->optimizeCondBranch(MI); + } + + /// \brief Try to find the next source that share the same register file +@@ -695,30 +657,29 @@ + /// share the same register file as \p Reg and \p SubReg. The client should + /// then be capable to rewrite all intermediate PHIs to get the next source. + /// \return False if no alternative sources are available. True otherwise. +-bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, ++bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, + RewriteMapTy &RewriteMap) { + // Do not try to find a new source for a physical register. + // So far we do not have any motivating example for doing that. + // Thus, instead of maintaining untested code, we will revisit that if + // that changes at some point. ++ unsigned Reg = RegSubReg.Reg; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); + +- SmallVector SrcToLook; +- TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg); ++ SmallVector SrcToLook; ++ RegSubRegPair CurSrcPair = RegSubReg; + SrcToLook.push_back(CurSrcPair); + + unsigned PHICount = 0; +- while (!SrcToLook.empty() && PHICount < RewritePHILimit) { +- TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val(); ++ do { ++ CurSrcPair = SrcToLook.pop_back_val(); + // As explained above, do not handle physical registers +- if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg)) ++ if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) + return false; + +- CurSrcPair = Pair; +- ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, +- !DisableAdvCopyOpt, TII); ++ ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII); + + // Follow the chain of copies until we find a more suitable source, a phi + // or have to abort. +@@ -747,14 +708,17 @@ + unsigned NumSrcs = Res.getNumSources(); + if (NumSrcs > 1) { + PHICount++; ++ if (PHICount >= RewritePHILimit) { ++ DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); ++ return false; ++ } ++ + for (unsigned i = 0; i < NumSrcs; ++i) +- SrcToLook.push_back(TargetInstrInfo::RegSubRegPair( +- Res.getSrcReg(i), Res.getSrcSubReg(i))); ++ SrcToLook.push_back(Res.getSrc(i)); + break; + } + +- CurSrcPair.Reg = Res.getSrcReg(0); +- CurSrcPair.SubReg = Res.getSrcSubReg(0); ++ CurSrcPair = Res.getSrc(0); + // Do not extend the live-ranges of physical registers as they add + // constraints to the register allocator. Moreover, if we want to extend + // the live-range of a physical register, unlike SSA virtual register, +@@ -764,7 +728,8 @@ + + // Keep following the chain if the value isn't any better yet. + const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); +- if (!TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, CurSrcPair.SubReg)) ++ if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC, ++ CurSrcPair.SubReg)) + continue; + + // We currently cannot deal with subreg operands on PHI instructions +@@ -775,7 +740,7 @@ + // We found a suitable source, and are done with this chain. + break; + } +- } ++ } while (!SrcToLook.empty()); + + // If we did not find a more suitable source, there is nothing to optimize. + return CurSrcPair.Reg != Reg; +@@ -786,54 +751,50 @@ + /// successfully traverse a PHI instruction and find suitable sources coming + /// from its edges. By inserting a new PHI, we provide a rewritten PHI def + /// suitable to be used in a new COPY instruction. +-static MachineInstr * +-insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, +- const SmallVectorImpl &SrcRegs, +- MachineInstr *OrigPHI) { ++static MachineInstr & ++insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII, ++ const SmallVectorImpl &SrcRegs, ++ MachineInstr &OrigPHI) { + assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); + +- const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg); ++ const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg); + // NewRC is only correct if no subregisters are involved. findNextSource() + // should have rejected those cases already. + assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand"); +- unsigned NewVR = MRI->createVirtualRegister(NewRC); +- MachineBasicBlock *MBB = OrigPHI->getParent(); +- MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(), +- TII->get(TargetOpcode::PHI), NewVR); ++ unsigned NewVR = MRI.createVirtualRegister(NewRC); ++ MachineBasicBlock *MBB = OrigPHI.getParent(); ++ MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(), ++ TII.get(TargetOpcode::PHI), NewVR); + + unsigned MBBOpIdx = 2; +- for (auto RegPair : SrcRegs) { ++ for (const RegSubRegPair &RegPair : SrcRegs) { + MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); +- MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB()); ++ MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB()); + // Since we're extended the lifetime of RegPair.Reg, clear the + // kill flags to account for that and make RegPair.Reg reaches + // the new PHI. +- MRI->clearKillFlags(RegPair.Reg); ++ MRI.clearKillFlags(RegPair.Reg); + MBBOpIdx += 2; + } + +- return MIB; ++ return *MIB; + } + + namespace { + +-/// \brief Helper class to rewrite the arguments of a copy-like instruction. +-class CopyRewriter { ++/// Interface to query instructions amenable to copy rewriting. ++class Rewriter { + protected: +- /// The copy-like instruction. + MachineInstr &CopyLike; +- +- /// The index of the source being rewritten. +- unsigned CurrentSrcIdx = 0; +- ++ unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten. + public: +- CopyRewriter(MachineInstr &MI) : CopyLike(MI) {} +- virtual ~CopyRewriter() = default; ++ Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {} ++ virtual ~Rewriter() {} + + /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and +- /// the related value that it affects (TrackReg, TrackSubReg). ++ /// the related value that it affects (DstReg, DstSubReg). + /// A source is considered rewritable if its register class and the +- /// register class of the related TrackReg may not be register ++ /// register class of the related DstReg may not be register + /// coalescer friendly. In other words, given a copy-like instruction + /// not all the arguments may be returned at rewritable source, since + /// some arguments are none to be register coalescer friendly. +@@ -848,137 +809,72 @@ + /// the only source this instruction has: + /// (SrcReg, SrcSubReg) = (src, srcSubIdx). + /// This source defines the whole definition, i.e., +- /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). ++ /// (DstReg, DstSubReg) = (dst, dstSubIdx). + /// + /// The second and subsequent calls will return false, as there is only one + /// rewritable source. + /// + /// \return True if a rewritable source has been found, false otherwise. + /// The output arguments are valid if and only if true is returned. +- virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, +- unsigned &TrackReg, +- unsigned &TrackSubReg) { +- // If CurrentSrcIdx == 1, this means this function has already been called +- // once. CopyLike has one definition and one argument, thus, there is +- // nothing else to rewrite. +- if (!CopyLike.isCopy() || CurrentSrcIdx == 1) ++ virtual bool getNextRewritableSource(RegSubRegPair &Src, ++ RegSubRegPair &Dst) = 0; ++ ++ /// Rewrite the current source with \p NewReg and \p NewSubReg if possible. ++ /// \return True if the rewriting was possible, false otherwise. ++ virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) = 0; ++}; ++ ++/// Rewriter for COPY instructions. ++class CopyRewriter : public Rewriter { ++public: ++ CopyRewriter(MachineInstr &MI) : Rewriter(MI) { ++ assert(MI.isCopy() && "Expected copy instruction"); ++ } ++ virtual ~CopyRewriter() = default; ++ ++ bool getNextRewritableSource(RegSubRegPair &Src, ++ RegSubRegPair &Dst) override { ++ // CurrentSrcIdx > 0 means this function has already been called. ++ if (CurrentSrcIdx > 0) + return false; + // This is the first call to getNextRewritableSource. + // Move the CurrentSrcIdx to remember that we made that call. + CurrentSrcIdx = 1; + // The rewritable source is the argument. + const MachineOperand &MOSrc = CopyLike.getOperand(1); +- SrcReg = MOSrc.getReg(); +- SrcSubReg = MOSrc.getSubReg(); ++ Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg()); + // What we track are the alternative sources of the definition. + const MachineOperand &MODef = CopyLike.getOperand(0); +- TrackReg = MODef.getReg(); +- TrackSubReg = MODef.getSubReg(); ++ Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); + return true; + } + +- /// \brief Rewrite the current source with \p NewReg and \p NewSubReg +- /// if possible. +- /// \return True if the rewriting was possible, false otherwise. +- virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { +- if (!CopyLike.isCopy() || CurrentSrcIdx != 1) ++ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { ++ if (CurrentSrcIdx != 1) + return false; + MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); + MOSrc.setReg(NewReg); + MOSrc.setSubReg(NewSubReg); + return true; + } +- +- /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find +- /// the new source to use for rewrite. If \p HandleMultipleSources is true and +- /// multiple sources for a given \p Def are found along the way, we found a +- /// PHI instructions that needs to be rewritten. +- /// TODO: HandleMultipleSources should be removed once we test PHI handling +- /// with coalescable copies. +- TargetInstrInfo::RegSubRegPair +- getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, +- TargetInstrInfo::RegSubRegPair Def, +- PeepholeOptimizer::RewriteMapTy &RewriteMap, +- bool HandleMultipleSources = true) { +- TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); +- do { +- ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); +- // If there are no entries on the map, LookupSrc is the new source. +- if (!Res.isValid()) +- return LookupSrc; +- +- // There's only one source for this definition, keep searching... +- unsigned NumSrcs = Res.getNumSources(); +- if (NumSrcs == 1) { +- LookupSrc.Reg = Res.getSrcReg(0); +- LookupSrc.SubReg = Res.getSrcSubReg(0); +- continue; +- } +- +- // TODO: Remove once multiple srcs w/ coalescable copies are supported. +- if (!HandleMultipleSources) +- break; +- +- // Multiple sources, recurse into each source to find a new source +- // for it. Then, rewrite the PHI accordingly to its new edges. +- SmallVector NewPHISrcs; +- for (unsigned i = 0; i < NumSrcs; ++i) { +- TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i), +- Res.getSrcSubReg(i)); +- NewPHISrcs.push_back( +- getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); +- } +- +- // Build the new PHI node and return its def register as the new source. +- MachineInstr *OrigPHI = const_cast(Res.getInst()); +- MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI); +- DEBUG(dbgs() << "-- getNewSource\n"); +- DEBUG(dbgs() << " Replacing: " << *OrigPHI); +- DEBUG(dbgs() << " With: " << *NewPHI); +- const MachineOperand &MODef = NewPHI->getOperand(0); +- return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg()); +- +- } while (true); +- +- return TargetInstrInfo::RegSubRegPair(0, 0); +- } +- +- /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap +- /// and create a new COPY instruction. More info about RewriteMap in +- /// PeepholeOptimizer::findNextSource. Right now this is only used to handle +- /// Uncoalescable copies, since they are copy like instructions that aren't +- /// recognized by the register allocator. +- virtual MachineInstr * +- RewriteSource(TargetInstrInfo::RegSubRegPair Def, +- PeepholeOptimizer::RewriteMapTy &RewriteMap) { +- return nullptr; +- } + }; + + /// \brief Helper class to rewrite uncoalescable copy like instructions + /// into new COPY (coalescable friendly) instructions. +-class UncoalescableRewriter : public CopyRewriter { +-protected: +- const TargetInstrInfo &TII; +- MachineRegisterInfo &MRI; +- +- /// The number of defs in the bitcast +- unsigned NumDefs; ++class UncoalescableRewriter : public Rewriter { ++ unsigned NumDefs; ///< Number of defs in the bitcast. + + public: +- UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII, +- MachineRegisterInfo &MRI) +- : CopyRewriter(MI), TII(TII), MRI(MRI) { ++ UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) { + NumDefs = MI.getDesc().getNumDefs(); + } + +- /// \brief Get the next rewritable def source (TrackReg, TrackSubReg) ++ /// \see See Rewriter::getNextRewritableSource() + /// All such sources need to be considered rewritable in order to + /// rewrite a uncoalescable copy-like instruction. This method return + /// each definition that must be checked if rewritable. +- bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, +- unsigned &TrackReg, +- unsigned &TrackSubReg) override { ++ bool getNextRewritableSource(RegSubRegPair &Src, ++ RegSubRegPair &Dst) override { + // Find the next non-dead definition and continue from there. + if (CurrentSrcIdx == NumDefs) + return false; +@@ -990,64 +886,27 @@ + } + + // What we track are the alternative sources of the definition. ++ Src = RegSubRegPair(0, 0); + const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); +- TrackReg = MODef.getReg(); +- TrackSubReg = MODef.getSubReg(); ++ Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); + + CurrentSrcIdx++; + return true; + } + +- /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap +- /// and create a new COPY instruction. More info about RewriteMap in +- /// PeepholeOptimizer::findNextSource. Right now this is only used to handle +- /// Uncoalescable copies, since they are copy like instructions that aren't +- /// recognized by the register allocator. +- MachineInstr * +- RewriteSource(TargetInstrInfo::RegSubRegPair Def, +- PeepholeOptimizer::RewriteMapTy &RewriteMap) override { +- assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && +- "We do not rewrite physical registers"); +- +- // Find the new source to use in the COPY rewrite. +- TargetInstrInfo::RegSubRegPair NewSrc = +- getNewSource(&MRI, &TII, Def, RewriteMap); +- +- // Insert the COPY. +- const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg); +- unsigned NewVR = MRI.createVirtualRegister(DefRC); +- +- MachineInstr *NewCopy = +- BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), +- TII.get(TargetOpcode::COPY), NewVR) +- .addReg(NewSrc.Reg, 0, NewSrc.SubReg); +- +- NewCopy->getOperand(0).setSubReg(Def.SubReg); +- if (Def.SubReg) +- NewCopy->getOperand(0).setIsUndef(); +- +- DEBUG(dbgs() << "-- RewriteSource\n"); +- DEBUG(dbgs() << " Replacing: " << CopyLike); +- DEBUG(dbgs() << " With: " << *NewCopy); +- MRI.replaceRegWith(Def.Reg, NewVR); +- MRI.clearKillFlags(NewVR); +- +- // We extended the lifetime of NewSrc.Reg, clear the kill flags to +- // account for that. +- MRI.clearKillFlags(NewSrc.Reg); +- +- return NewCopy; ++ bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { ++ return false; + } + }; + +-/// \brief Specialized rewriter for INSERT_SUBREG instruction. +-class InsertSubregRewriter : public CopyRewriter { ++/// Specialized rewriter for INSERT_SUBREG instruction. ++class InsertSubregRewriter : public Rewriter { + public: +- InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { ++ InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) { + assert(MI.isInsertSubreg() && "Invalid instruction"); + } + +- /// \brief See CopyRewriter::getNextRewritableSource. ++ /// \see See Rewriter::getNextRewritableSource() + /// Here CopyLike has the following form: + /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. + /// Src1 has the same register class has dst, hence, there is +@@ -1055,29 +914,27 @@ + /// Src2.src2SubIdx, may not be register coalescer friendly. + /// Therefore, the first call to this method returns: + /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). +- /// (TrackReg, TrackSubReg) = (dst, subIdx). ++ /// (DstReg, DstSubReg) = (dst, subIdx). + /// + /// Subsequence calls will return false. +- bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, +- unsigned &TrackReg, +- unsigned &TrackSubReg) override { ++ bool getNextRewritableSource(RegSubRegPair &Src, ++ RegSubRegPair &Dst) override { + // If we already get the only source we can rewrite, return false. + if (CurrentSrcIdx == 2) + return false; + // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. + CurrentSrcIdx = 2; + const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); +- SrcReg = MOInsertedReg.getReg(); +- SrcSubReg = MOInsertedReg.getSubReg(); ++ Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg()); + const MachineOperand &MODef = CopyLike.getOperand(0); + + // We want to track something that is compatible with the + // partial definition. +- TrackReg = MODef.getReg(); + if (MODef.getSubReg()) + // Bail if we have to compose sub-register indices. + return false; +- TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); ++ Dst = RegSubRegPair(MODef.getReg(), ++ (unsigned)CopyLike.getOperand(3).getImm()); + return true; + } + +@@ -1092,41 +949,39 @@ + } + }; + +-/// \brief Specialized rewriter for EXTRACT_SUBREG instruction. +-class ExtractSubregRewriter : public CopyRewriter { ++/// Specialized rewriter for EXTRACT_SUBREG instruction. ++class ExtractSubregRewriter : public Rewriter { + const TargetInstrInfo &TII; + + public: + ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) +- : CopyRewriter(MI), TII(TII) { ++ : Rewriter(MI), TII(TII) { + assert(MI.isExtractSubreg() && "Invalid instruction"); + } + +- /// \brief See CopyRewriter::getNextRewritableSource. ++ /// \see Rewriter::getNextRewritableSource() + /// Here CopyLike has the following form: + /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. + /// There is only one rewritable source: Src.subIdx, + /// which defines dst.dstSubIdx. +- bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, +- unsigned &TrackReg, +- unsigned &TrackSubReg) override { ++ bool getNextRewritableSource(RegSubRegPair &Src, ++ RegSubRegPair &Dst) override { + // If we already get the only source we can rewrite, return false. + if (CurrentSrcIdx == 1) + return false; + // We are looking at v1 = EXTRACT_SUBREG v0, sub0. + CurrentSrcIdx = 1; + const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); +- SrcReg = MOExtractedReg.getReg(); + // If we have to compose sub-register indices, bail out. + if (MOExtractedReg.getSubReg()) + return false; + +- SrcSubReg = CopyLike.getOperand(2).getImm(); ++ Src = RegSubRegPair(MOExtractedReg.getReg(), ++ CopyLike.getOperand(2).getImm()); + + // We want to track something that is compatible with the definition. + const MachineOperand &MODef = CopyLike.getOperand(0); +- TrackReg = MODef.getReg(); +- TrackSubReg = MODef.getSubReg(); ++ Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); + return true; + } + +@@ -1156,14 +1011,14 @@ + } + }; + +-/// \brief Specialized rewriter for REG_SEQUENCE instruction. +-class RegSequenceRewriter : public CopyRewriter { ++/// Specialized rewriter for REG_SEQUENCE instruction. ++class RegSequenceRewriter : public Rewriter { + public: +- RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { ++ RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) { + assert(MI.isRegSequence() && "Invalid instruction"); + } + +- /// \brief See CopyRewriter::getNextRewritableSource. ++ /// \see Rewriter::getNextRewritableSource() + /// Here CopyLike has the following form: + /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. + /// Each call will return a different source, walking all the available +@@ -1171,17 +1026,16 @@ + /// + /// The first call returns: + /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). +- /// (TrackReg, TrackSubReg) = (dst, subIdx1). ++ /// (DstReg, DstSubReg) = (dst, subIdx1). + /// + /// The second call returns: + /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). +- /// (TrackReg, TrackSubReg) = (dst, subIdx2). ++ /// (DstReg, DstSubReg) = (dst, subIdx2). + /// + /// And so on, until all the sources have been traversed, then + /// it returns false. +- bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, +- unsigned &TrackReg, +- unsigned &TrackSubReg) override { ++ bool getNextRewritableSource(RegSubRegPair &Src, ++ RegSubRegPair &Dst) override { + // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. + + // If this is the first call, move to the first argument. +@@ -1194,17 +1048,17 @@ + return false; + } + const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); +- SrcReg = MOInsertedReg.getReg(); ++ Src.Reg = MOInsertedReg.getReg(); + // If we have to compose sub-register indices, bail out. +- if ((SrcSubReg = MOInsertedReg.getSubReg())) ++ if ((Src.SubReg = MOInsertedReg.getSubReg())) + return false; + + // We want to track something that is compatible with the related + // partial definition. +- TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); ++ Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); + + const MachineOperand &MODef = CopyLike.getOperand(0); +- TrackReg = MODef.getReg(); ++ Dst.Reg = MODef.getReg(); + // If we have to compose sub-registers, bail. + return MODef.getSubReg() == 0; + } +@@ -1224,16 +1078,14 @@ + + } // end anonymous namespace + +-/// \brief Get the appropriated CopyRewriter for \p MI. +-/// \return A pointer to a dynamically allocated CopyRewriter or nullptr +-/// if no rewriter works for \p MI. +-static CopyRewriter *getCopyRewriter(MachineInstr &MI, +- const TargetInstrInfo &TII, +- MachineRegisterInfo &MRI) { ++/// Get the appropriated Rewriter for \p MI. ++/// \return A pointer to a dynamically allocated Rewriter or nullptr if no ++/// rewriter works for \p MI. ++static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) { + // Handle uncoalescable copy-like instructions. +- if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() || +- MI.isExtractSubregLike())) +- return new UncoalescableRewriter(MI, TII, MRI); ++ if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() || ++ MI.isExtractSubregLike()) ++ return new UncoalescableRewriter(MI); + + switch (MI.getOpcode()) { + default: +@@ -1247,53 +1099,102 @@ + case TargetOpcode::REG_SEQUENCE: + return new RegSequenceRewriter(MI); + } +- llvm_unreachable(nullptr); + } + +-/// \brief Optimize generic copy instructions to avoid cross +-/// register bank copy. The optimization looks through a chain of +-/// copies and tries to find a source that has a compatible register +-/// class. +-/// Two register classes are considered to be compatible if they share +-/// the same register bank. ++/// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find ++/// the new source to use for rewrite. If \p HandleMultipleSources is true and ++/// multiple sources for a given \p Def are found along the way, we found a ++/// PHI instructions that needs to be rewritten. ++/// TODO: HandleMultipleSources should be removed once we test PHI handling ++/// with coalescable copies. ++static RegSubRegPair ++getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, ++ RegSubRegPair Def, ++ const PeepholeOptimizer::RewriteMapTy &RewriteMap, ++ bool HandleMultipleSources = true) { ++ RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); ++ while (true) { ++ ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); ++ // If there are no entries on the map, LookupSrc is the new source. ++ if (!Res.isValid()) ++ return LookupSrc; ++ ++ // There's only one source for this definition, keep searching... ++ unsigned NumSrcs = Res.getNumSources(); ++ if (NumSrcs == 1) { ++ LookupSrc.Reg = Res.getSrcReg(0); ++ LookupSrc.SubReg = Res.getSrcSubReg(0); ++ continue; ++ } ++ ++ // TODO: Remove once multiple srcs w/ coalescable copies are supported. ++ if (!HandleMultipleSources) ++ break; ++ ++ // Multiple sources, recurse into each source to find a new source ++ // for it. Then, rewrite the PHI accordingly to its new edges. ++ SmallVector NewPHISrcs; ++ for (unsigned i = 0; i < NumSrcs; ++i) { ++ RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i)); ++ NewPHISrcs.push_back( ++ getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); ++ } ++ ++ // Build the new PHI node and return its def register as the new source. ++ MachineInstr &OrigPHI = const_cast(*Res.getInst()); ++ MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI); ++ DEBUG(dbgs() << "-- getNewSource\n"); ++ DEBUG(dbgs() << " Replacing: " << OrigPHI); ++ DEBUG(dbgs() << " With: " << NewPHI); ++ const MachineOperand &MODef = NewPHI.getOperand(0); ++ return RegSubRegPair(MODef.getReg(), MODef.getSubReg()); ++ } ++ ++ return RegSubRegPair(0, 0); ++} ++ ++/// Optimize generic copy instructions to avoid cross register bank copy. ++/// The optimization looks through a chain of copies and tries to find a source ++/// that has a compatible register class. ++/// Two register classes are considered to be compatible if they share the same ++/// register bank. + /// New copies issued by this optimization are register allocator + /// friendly. This optimization does not remove any copy as it may + /// overconstrain the register allocator, but replaces some operands + /// when possible. + /// \pre isCoalescableCopy(*MI) is true. + /// \return True, when \p MI has been rewritten. False otherwise. +-bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { +- assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); +- assert(MI->getDesc().getNumDefs() == 1 && ++bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { ++ assert(isCoalescableCopy(MI) && "Invalid argument"); ++ assert(MI.getDesc().getNumDefs() == 1 && + "Coalescer can understand multiple defs?!"); +- const MachineOperand &MODef = MI->getOperand(0); ++ const MachineOperand &MODef = MI.getOperand(0); + // Do not rewrite physical definitions. + if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) + return false; + + bool Changed = false; + // Get the right rewriter for the current copy. +- std::unique_ptr CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); ++ std::unique_ptr CpyRewriter(getCopyRewriter(MI, *TII)); + // If none exists, bail out. + if (!CpyRewriter) + return false; + // Rewrite each rewritable source. +- unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; +- while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, +- TrackSubReg)) { ++ RegSubRegPair Src; ++ RegSubRegPair TrackPair; ++ while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) { + // Keep track of PHI nodes and its incoming edges when looking for sources. + RewriteMapTy RewriteMap; + // Try to find a more suitable source. If we failed to do so, or get the + // actual source, move to the next source. +- if (!findNextSource(TrackReg, TrackSubReg, RewriteMap)) ++ if (!findNextSource(TrackPair, RewriteMap)) + continue; + + // Get the new source to rewrite. TODO: Only enable handling of multiple + // sources (PHIs) once we have a motivating example and testcases for it. +- TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg); +- TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource( +- MRI, TII, TrackPair, RewriteMap, false /* multiple sources */); +- if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0) ++ RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap, ++ /*HandleMultipleSources=*/false); ++ if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0) + continue; + + // Rewrite source. +@@ -1312,6 +1213,47 @@ + return Changed; + } + ++/// \brief Rewrite the source found through \p Def, by using the \p RewriteMap ++/// and create a new COPY instruction. More info about RewriteMap in ++/// PeepholeOptimizer::findNextSource. Right now this is only used to handle ++/// Uncoalescable copies, since they are copy like instructions that aren't ++/// recognized by the register allocator. ++MachineInstr & ++PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike, ++ RegSubRegPair Def, RewriteMapTy &RewriteMap) { ++ assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && ++ "We do not rewrite physical registers"); ++ ++ // Find the new source to use in the COPY rewrite. ++ RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap); ++ ++ // Insert the COPY. ++ const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); ++ unsigned NewVReg = MRI->createVirtualRegister(DefRC); ++ ++ MachineInstr *NewCopy = ++ BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), ++ TII->get(TargetOpcode::COPY), NewVReg) ++ .addReg(NewSrc.Reg, 0, NewSrc.SubReg); ++ ++ if (Def.SubReg) { ++ NewCopy->getOperand(0).setSubReg(Def.SubReg); ++ NewCopy->getOperand(0).setIsUndef(); ++ } ++ ++ DEBUG(dbgs() << "-- RewriteSource\n"); ++ DEBUG(dbgs() << " Replacing: " << CopyLike); ++ DEBUG(dbgs() << " With: " << *NewCopy); ++ MRI->replaceRegWith(Def.Reg, NewVReg); ++ MRI->clearKillFlags(NewVReg); ++ ++ // We extended the lifetime of NewSrc.Reg, clear the kill flags to ++ // account for that. ++ MRI->clearKillFlags(NewSrc.Reg); ++ ++ return *NewCopy; ++} ++ + /// \brief Optimize copy-like instructions to create + /// register coalescer friendly instruction. + /// The optimization tries to kill-off the \p MI by looking +@@ -1324,48 +1266,40 @@ + /// been removed from its parent. + /// All COPY instructions created, are inserted in \p LocalMIs. + bool PeepholeOptimizer::optimizeUncoalescableCopy( +- MachineInstr *MI, SmallPtrSetImpl &LocalMIs) { +- assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); +- +- // Check if we can rewrite all the values defined by this instruction. +- SmallVector RewritePairs; +- // Get the right rewriter for the current copy. +- std::unique_ptr CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); +- // If none exists, bail out. +- if (!CpyRewriter) +- return false; ++ MachineInstr &MI, SmallPtrSetImpl &LocalMIs) { ++ assert(isUncoalescableCopy(MI) && "Invalid argument"); ++ UncoalescableRewriter CpyRewriter(MI); + + // Rewrite each rewritable source by generating new COPYs. This works + // differently from optimizeCoalescableCopy since it first makes sure that all + // definitions can be rewritten. + RewriteMapTy RewriteMap; +- unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg; +- while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg, +- CopyDefSubReg)) { ++ RegSubRegPair Src; ++ RegSubRegPair Def; ++ SmallVector RewritePairs; ++ while (CpyRewriter.getNextRewritableSource(Src, Def)) { + // If a physical register is here, this is probably for a good reason. + // Do not rewrite that. +- if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg)) ++ if (TargetRegisterInfo::isPhysicalRegister(Def.Reg)) + return false; + + // If we do not know how to rewrite this definition, there is no point + // in trying to kill this instruction. +- TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg); +- if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap)) ++ if (!findNextSource(Def, RewriteMap)) + return false; + + RewritePairs.push_back(Def); + } + + // The change is possible for all defs, do it. +- for (const auto &Def : RewritePairs) { ++ for (const RegSubRegPair &Def : RewritePairs) { + // Rewrite the "copy" in a way the register coalescer understands. +- MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap); +- assert(NewCopy && "Should be able to always generate a new copy"); +- LocalMIs.insert(NewCopy); ++ MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap); ++ LocalMIs.insert(&NewCopy); + } + + // MI is now dead. +- MI->eraseFromParent(); ++ MI.eraseFromParent(); + ++NumUncoalescableCopies; + return true; + } +@@ -1374,18 +1308,18 @@ + /// We only fold loads to virtual registers and the virtual register defined + /// has a single use. + bool PeepholeOptimizer::isLoadFoldable( +- MachineInstr *MI, SmallSet &FoldAsLoadDefCandidates) { +- if (!MI->canFoldAsLoad() || !MI->mayLoad()) ++ MachineInstr &MI, SmallSet &FoldAsLoadDefCandidates) { ++ if (!MI.canFoldAsLoad() || !MI.mayLoad()) + return false; +- const MCInstrDesc &MCID = MI->getDesc(); ++ const MCInstrDesc &MCID = MI.getDesc(); + if (MCID.getNumDefs() != 1) + return false; + +- unsigned Reg = MI->getOperand(0).getReg(); ++ unsigned Reg = MI.getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. +- if (!MI->getOperand(0).getSubReg() && ++ if (!MI.getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneNonDBGUse(Reg)) { + FoldAsLoadDefCandidates.insert(Reg); +@@ -1395,16 +1329,16 @@ + } + + bool PeepholeOptimizer::isMoveImmediate( +- MachineInstr *MI, SmallSet &ImmDefRegs, ++ MachineInstr &MI, SmallSet &ImmDefRegs, + DenseMap &ImmDefMIs) { +- const MCInstrDesc &MCID = MI->getDesc(); +- if (!MI->isMoveImmediate()) ++ const MCInstrDesc &MCID = MI.getDesc(); ++ if (!MI.isMoveImmediate()) + return false; + if (MCID.getNumDefs() != 1) + return false; +- unsigned Reg = MI->getOperand(0).getReg(); ++ unsigned Reg = MI.getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { +- ImmDefMIs.insert(std::make_pair(Reg, MI)); ++ ImmDefMIs.insert(std::make_pair(Reg, &MI)); + ImmDefRegs.insert(Reg); + return true; + } +@@ -1415,11 +1349,11 @@ + /// Try folding register operands that are defined by move immediate + /// instructions, i.e. a trivial constant folding optimization, if + /// and only if the def and use are in the same BB. +-bool PeepholeOptimizer::foldImmediate( +- MachineInstr *MI, MachineBasicBlock *MBB, SmallSet &ImmDefRegs, ++bool PeepholeOptimizer::foldImmediate(MachineInstr &MI, ++ SmallSet &ImmDefRegs, + DenseMap &ImmDefMIs) { +- for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { +- MachineOperand &MO = MI->getOperand(i); ++ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { ++ MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + // Ignore dead implicit defs. +@@ -1432,7 +1366,7 @@ + continue; + DenseMap::iterator II = ImmDefMIs.find(Reg); + assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); +- if (TII->FoldImmediate(*MI, *II->second, Reg, MRI)) { ++ if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) { + ++NumImmFold; + return true; + } +@@ -1454,28 +1388,28 @@ + // %2 = COPY %0:sub1 + // + // Should replace %2 uses with %1:sub1 +-bool PeepholeOptimizer::foldRedundantCopy( +- MachineInstr *MI, SmallSet &CopySrcRegs, ++bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI, ++ SmallSet &CopySrcRegs, + DenseMap &CopyMIs) { +- assert(MI->isCopy() && "expected a COPY machine instruction"); ++ assert(MI.isCopy() && "expected a COPY machine instruction"); + +- unsigned SrcReg = MI->getOperand(1).getReg(); ++ unsigned SrcReg = MI.getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + +- unsigned DstReg = MI->getOperand(0).getReg(); ++ unsigned DstReg = MI.getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + return false; + + if (CopySrcRegs.insert(SrcReg).second) { + // First copy of this reg seen. +- CopyMIs.insert(std::make_pair(SrcReg, MI)); ++ CopyMIs.insert(std::make_pair(SrcReg, &MI)); + return false; + } + + MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second; + +- unsigned SrcSubReg = MI->getOperand(1).getSubReg(); ++ unsigned SrcSubReg = MI.getOperand(1).getSubReg(); + unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg(); + + // Can't replace different subregister extracts. +@@ -1504,19 +1438,19 @@ + } + + bool PeepholeOptimizer::foldRedundantNAPhysCopy( +- MachineInstr *MI, DenseMap &NAPhysToVirtMIs) { +- assert(MI->isCopy() && "expected a COPY machine instruction"); ++ MachineInstr &MI, DenseMap &NAPhysToVirtMIs) { ++ assert(MI.isCopy() && "expected a COPY machine instruction"); + + if (DisableNAPhysCopyOpt) + return false; + +- unsigned DstReg = MI->getOperand(0).getReg(); +- unsigned SrcReg = MI->getOperand(1).getReg(); ++ unsigned DstReg = MI.getOperand(0).getReg(); ++ unsigned SrcReg = MI.getOperand(1).getReg(); + if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { + // %vreg = COPY %physreg + // Avoid using a datastructure which can track multiple live non-allocatable + // phys->virt copies since LLVM doesn't seem to do this. +- NAPhysToVirtMIs.insert({SrcReg, MI}); ++ NAPhysToVirtMIs.insert({SrcReg, &MI}); + return false; + } + +@@ -1528,8 +1462,7 @@ + if (PrevCopy == NAPhysToVirtMIs.end()) { + // We can't remove the copy: there was an intervening clobber of the + // non-allocatable physical register after the copy to virtual. +- DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI +- << '\n'); ++ DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << MI); + return false; + } + +@@ -1537,7 +1470,7 @@ + if (PrevDstReg == SrcReg) { + // Remove the virt->phys copy: we saw the virtual register definition, and + // the non-allocatable physical register's state hasn't changed since then. +- DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n'); ++ DEBUG(dbgs() << "NAPhysCopy: erasing " << MI); + ++NumNAPhysCopies; + return true; + } +@@ -1546,7 +1479,7 @@ + // register get a copy of the non-allocatable physical register, and we only + // track one such copy. Avoid getting confused by this new non-allocatable + // physical register definition, and remove it from the tracked copies. +- DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n'); ++ DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI); + NAPhysToVirtMIs.erase(PrevCopy); + return false; + } +@@ -1611,11 +1544,11 @@ + return false; + } + +-/// \brief Phi instructions will eventually be lowered to copy instructions. If +-/// phi is in a loop header, a recurrence may formulated around the source and +-/// destination of the phi. For such case commuting operands of the instructions +-/// in the recurrence may enable coalescing of the copy instruction generated +-/// from the phi. For example, if there is a recurrence of ++/// Phi instructions will eventually be lowered to copy instructions. ++/// If phi is in a loop header, a recurrence may formulated around the source ++/// and destination of the phi. For such case commuting operands of the ++/// instructions in the recurrence may enable coalescing of the copy instruction ++/// generated from the phi. For example, if there is a recurrence of + /// + /// LoopHeader: + /// %1 = phi(%0, %100) +@@ -1725,27 +1658,25 @@ + } + + if (!MI->isCopy()) { +- for (const auto &Op : MI->operands()) { ++ for (const MachineOperand &MO : MI->operands()) { + // Visit all operands: definitions can be implicit or explicit. +- if (Op.isReg()) { +- unsigned Reg = Op.getReg(); +- if (Op.isDef() && isNAPhysCopy(Reg)) { ++ if (MO.isReg()) { ++ unsigned Reg = MO.getReg(); ++ if (MO.isDef() && isNAPhysCopy(Reg)) { + const auto &Def = NAPhysToVirtMIs.find(Reg); + if (Def != NAPhysToVirtMIs.end()) { + // A new definition of the non-allocatable physical register + // invalidates previous copies. +- DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI +- << '\n'); ++ DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI); + NAPhysToVirtMIs.erase(Def); + } + } +- } else if (Op.isRegMask()) { +- const uint32_t *RegMask = Op.getRegMask(); ++ } else if (MO.isRegMask()) { ++ const uint32_t *RegMask = MO.getRegMask(); + for (auto &RegMI : NAPhysToVirtMIs) { + unsigned Def = RegMI.first; + if (MachineOperand::clobbersPhysReg(RegMask, Def)) { +- DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI +- << '\n'); ++ DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI); + NAPhysToVirtMIs.erase(Def); + } + } +@@ -1761,58 +1692,57 @@ + // don't know what's correct anymore. + // + // FIXME: handle explicit asm clobbers. +- DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI +- << '\n'); ++ DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI); + NAPhysToVirtMIs.clear(); + } + + if ((isUncoalescableCopy(*MI) && +- optimizeUncoalescableCopy(MI, LocalMIs)) || +- (MI->isCompare() && optimizeCmpInstr(MI, &MBB)) || +- (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { ++ optimizeUncoalescableCopy(*MI, LocalMIs)) || ++ (MI->isCompare() && optimizeCmpInstr(*MI)) || ++ (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) { + // MI is deleted. + LocalMIs.erase(MI); + Changed = true; + continue; + } + +- if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { ++ if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) { + Changed = true; + continue; + } + +- if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { ++ if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) { + // MI is just rewritten. + Changed = true; + continue; + } + + if (MI->isCopy() && +- (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) || +- foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) { ++ (foldRedundantCopy(*MI, CopySrcRegs, CopySrcMIs) || ++ foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) { + LocalMIs.erase(MI); + MI->eraseFromParent(); + Changed = true; + continue; + } + +- if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { ++ if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) { + SeenMoveImm = true; + } else { +- Changed |= optimizeExtInstr(MI, &MBB, LocalMIs); ++ Changed |= optimizeExtInstr(*MI, MBB, LocalMIs); + // optimizeExtInstr might have created new instructions after MI + // and before the already incremented MII. Adjust MII so that the + // next iteration sees the new instructions. + MII = MI; + ++MII; + if (SeenMoveImm) +- Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs); ++ Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs); + } + + // Check whether MI is a load candidate for folding into a later + // instruction. If MI is not a candidate, check whether we can fold an + // earlier load into MI. +- if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && ++ if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) && + !FoldAsLoadDefCandidates.empty()) { + + // We visit each operand even after successfully folding a previous +@@ -1861,7 +1791,7 @@ + // the load candidates. Note: We might be able to fold *into* this + // instruction, so this needs to be after the folding logic. + if (MI->isLoadFoldBarrier()) { +- DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n"); ++ DEBUG(dbgs() << "Encountered load fold barrier on " << *MI); + FoldAsLoadDefCandidates.clear(); + } + } +@@ -1954,14 +1884,14 @@ + // duplicate the code from the generic TII. + return ValueTrackerResult(); + +- SmallVector RegSeqInputRegs; ++ SmallVector RegSeqInputRegs; + if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) + return ValueTrackerResult(); + + // We are looking at: + // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... + // Check if one of the operand defines the subreg we are interested in. +- for (auto &RegSeqInput : RegSeqInputRegs) { ++ for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) { + if (RegSeqInput.SubIdx == DefSubReg) { + if (RegSeqInput.SubReg) + // Bail if we have to compose sub registers. +@@ -1992,8 +1922,8 @@ + // duplicate the code from the generic TII. + return ValueTrackerResult(); + +- TargetInstrInfo::RegSubRegPair BaseReg; +- TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; ++ RegSubRegPair BaseReg; ++ RegSubRegPairAndIdx InsertedReg; + if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) + return ValueTrackerResult(); + +@@ -2046,7 +1976,7 @@ + // duplicate the code from the generic TII. + return ValueTrackerResult(); + +- TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; ++ RegSubRegPairAndIdx ExtractSubregInputReg; + if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) + return ValueTrackerResult(); + +@@ -2079,7 +2009,7 @@ + Def->getOperand(3).getImm()); + } + +-/// \brief Explore each PHI incoming operand and return its sources ++/// Explore each PHI incoming operand and return its sources. + ValueTrackerResult ValueTracker::getNextSourceFromPHI() { + assert(Def->isPHI() && "Invalid definition"); + ValueTrackerResult Res; +@@ -2091,7 +2021,7 @@ + + // Return all register sources for PHI instructions. + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { +- auto &MO = Def->getOperand(i); ++ const MachineOperand &MO = Def->getOperand(i); + assert(MO.isReg() && "Invalid PHI instruction"); + Res.addSource(MO.getReg(), MO.getSubReg()); + } +@@ -2113,7 +2043,7 @@ + return getNextSourceFromBitcast(); + // All the remaining cases involve "complex" instructions. + // Bail if we did not ask for the advanced tracking. +- if (!UseAdvancedTracking) ++ if (DisableAdvCopyOpt) + return ValueTrackerResult(); + if (Def->isRegSequence() || Def->isRegSequenceLike()) + return getNextSourceFromRegSequence(); diff --git a/debian/patches/series b/debian/patches/series index e3703458..95e1e2eb 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -61,3 +61,4 @@ llvm-D49832-SCEVPred.patch llvm-rL323946-LSRTy.patch PowerPC-Make-AddrSpaceCast-noop.diff D51108-rust-powerpc.diff +pr38663-pgo-lto-crash.patch From 848f86d417ce6438d81b206f3581c1b138beacb0 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 30 Aug 2018 11:25:26 +0200 Subject: [PATCH 03/12] * Cherry pick a patch from upstream to fix a crash when doing PGO + LTO See upstream bug 38663 * Fix an alignment issue See upstream bug 38707 (Closes: #907622) --- debian/changelog | 10 ++ debian/patches/D51335-alignment-issue.diff | 103 +++++++++++++++++++++ debian/patches/series | 1 + 3 files changed, 114 insertions(+) create mode 100644 debian/patches/D51335-alignment-issue.diff diff --git a/debian/changelog b/debian/changelog index 0141d40a..a6208e34 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,13 @@ +llvm-toolchain-6.0 (1:6.0.1-7) unstable; urgency=medium + + * Cherry pick a patch from upstream to fix a crash + when doing PGO + LTO + See upstream bug 38663 + * Fix an alignment issue + See upstream bug 38707 (Closes: #907622) + + -- + llvm-toolchain-6.0 (1:6.0.1-6) unstable; urgency=medium * Remove libtool flex, bison, dejagnu, tcl, expect, diff --git a/debian/patches/D51335-alignment-issue.diff b/debian/patches/D51335-alignment-issue.diff new file mode 100644 index 00000000..aeca7e8d --- /dev/null +++ b/debian/patches/D51335-alignment-issue.diff @@ -0,0 +1,103 @@ +--- llvm-toolchain-6.0-6.0.1~+rc1.orig/lib/Transforms/Scalar/SROA.cpp ++++ llvm-toolchain-6.0-6.0.1~+rc1/lib/Transforms/Scalar/SROA.cpp +@@ -3046,6 +3046,42 @@ + return true; + } + ++ void fixLoadStoreAlign(Instruction &Root) { ++ // This algorithm implements the same visitor loop as ++ // hasUnsafePHIOrSelectUse, and fixes the alignment of each load ++ // or store found. ++ SmallPtrSet Visited; ++ SmallVector Uses; ++ Visited.insert(&Root); ++ Uses.push_back(&Root); ++ do { ++ Instruction *I = Uses.pop_back_val(); ++ ++ if (LoadInst *LI = dyn_cast(I)) { ++ unsigned LoadAlign = LI->getAlignment(); ++ if (!LoadAlign) ++ LoadAlign = DL.getABITypeAlignment(LI->getType()); ++ LI->setAlignment(std::min(LoadAlign, getSliceAlign())); ++ continue; ++ } ++ if (StoreInst *SI = dyn_cast(I)) { ++ unsigned StoreAlign = SI->getAlignment(); ++ if (!StoreAlign) { ++ Value *Op = SI->getOperand(0); ++ StoreAlign = DL.getABITypeAlignment(Op->getType()); ++ } ++ SI->setAlignment(std::min(StoreAlign, getSliceAlign())); ++ continue; ++ } ++ ++ assert(isa(I) || isa(I) || ++ isa(I) || isa(I)); ++ for (User *U : I->users()) ++ if (Visited.insert(cast(U)).second) ++ Uses.push_back(cast(U)); ++ } while (!Uses.empty()); ++ } ++ + bool visitPHINode(PHINode &PN) { + LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); + assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable"); +@@ -3069,6 +3105,9 @@ + LLVM_DEBUG(dbgs() << " to: " << PN << "\n"); + deleteIfTriviallyDead(OldPtr); + ++ // Fix the alignment of any loads or stores using this PHI node. ++ fixLoadStoreAlign(PN); ++ + // PHIs can't be promoted on their own, but often can be speculated. We + // check the speculation outside of the rewriter so that we see the + // fully-rewritten alloca. +@@ -3093,6 +3132,9 @@ + LLVM_DEBUG(dbgs() << " to: " << SI << "\n"); + deleteIfTriviallyDead(OldPtr); + ++ // Fix the alignment of any loads or stores using this select. ++ fixLoadStoreAlign(SI); ++ + // Selects can't be promoted on their own, but often can be speculated. We + // check the speculation outside of the rewriter so that we see the + // fully-rewritten alloca. +--- llvm-toolchain-6.0-6.0.1~+rc1.orig/test/Transforms/SROA/phi-and-select.ll ++++ llvm-toolchain-6.0-6.0.1~+rc1/test/Transforms/SROA/phi-and-select.ll +@@ -600,3 +600,35 @@ + store %struct.S undef, %struct.S* %f1, align 4 + ret void + } ++ ++define i32 @phi_align(i32* %z) { ++; CHECK-LABEL: @phi_align( ++entry: ++ %a = alloca [8 x i8], align 8 ++; CHECK: alloca [7 x i8] ++ ++ %a0x = getelementptr [8 x i8], [8 x i8]* %a, i64 0, i32 1 ++ %a0 = bitcast i8* %a0x to i32* ++ %a1x = getelementptr [8 x i8], [8 x i8]* %a, i64 0, i32 4 ++ %a1 = bitcast i8* %a1x to i32* ++; CHECK: store i32 0, {{.*}}, align 1 ++ store i32 0, i32* %a0, align 1 ++; CHECK: store i32 1, {{.*}}, align 1 ++ store i32 1, i32* %a1, align 4 ++; CHECK: load {{.*}}, align 1 ++ %v0 = load i32, i32* %a0, align 1 ++; CHECK: load {{.*}}, align 1 ++ %v1 = load i32, i32* %a1, align 4 ++ %cond = icmp sle i32 %v0, %v1 ++ br i1 %cond, label %then, label %exit ++ ++then: ++ br label %exit ++ ++exit: ++; CHECK: %phi = phi i32* [ {{.*}}, %then ], [ %z, %entry ] ++; CHECK-NEXT: %result = load i32, i32* %phi, align 1 ++ %phi = phi i32* [ %a1, %then ], [ %z, %entry ] ++ %result = load i32, i32* %phi, align 4 ++ ret i32 %result ++} diff --git a/debian/patches/series b/debian/patches/series index 95e1e2eb..5cdd2645 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -62,3 +62,4 @@ llvm-rL323946-LSRTy.patch PowerPC-Make-AddrSpaceCast-noop.diff D51108-rust-powerpc.diff pr38663-pgo-lto-crash.patch +D51335-alignment-issue.diff From 7f1481186f687dfdb934b7bcd8626bb535f1349e Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 30 Aug 2018 11:26:30 +0200 Subject: [PATCH 04/12] refresh of the patch --- debian/patches/pr38663-pgo-lto-crash.patch | 116 +++++++++++---------- 1 file changed, 59 insertions(+), 57 deletions(-) diff --git a/debian/patches/pr38663-pgo-lto-crash.patch b/debian/patches/pr38663-pgo-lto-crash.patch index bdf10725..16db4086 100644 --- a/debian/patches/pr38663-pgo-lto-crash.patch +++ b/debian/patches/pr38663-pgo-lto-crash.patch @@ -1,5 +1,7 @@ ---- llvm-toolchain-6.0-6.0.1~+rc1.orig/lib/CodeGen/PeepholeOptimizer.cpp 2018/03/31 11:38:16 331838 -+++ llvm-toolchain-6.0-6.0.1~+rc1/lib/CodeGen/PeepholeOptimizer.cpp 2018/05/20 16:03:21 333926 +Index: llvm-toolchain-6.0-6.0.1/lib/CodeGen/PeepholeOptimizer.cpp +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/CodeGen/PeepholeOptimizer.cpp ++++ llvm-toolchain-6.0-6.0.1/lib/CodeGen/PeepholeOptimizer.cpp @@ -98,6 +98,8 @@ #include @@ -9,7 +11,7 @@ #define DEBUG_TYPE "peephole-opt" -@@ -110,6 +112,9 @@ +@@ -110,6 +112,9 @@ static cl::opt DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); @@ -19,7 +21,7 @@ static cl::opt DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); -@@ -132,11 +137,11 @@ +@@ -132,11 +137,11 @@ static cl::opt MaxRecurrenceCh "of commuting operands")); @@ -36,7 +38,7 @@ STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); -@@ -149,9 +154,9 @@ +@@ -149,9 +154,9 @@ namespace { class PeepholeOptimizer : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; @@ -49,7 +51,7 @@ public: static char ID; // Pass identification -@@ -173,31 +178,28 @@ +@@ -173,31 +178,28 @@ namespace { } } @@ -93,7 +95,7 @@ DenseMap &ImmDefMIs); /// \brief Finds recurrence cycles, but only ones that formulated around -@@ -212,11 +214,11 @@ +@@ -212,11 +214,11 @@ namespace { /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was /// previously seen as a copy, replace the uses of this copy with the /// previously seen copy's destination register. @@ -107,7 +109,7 @@ bool isNAPhysCopy(unsigned Reg); /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical -@@ -224,11 +226,10 @@ +@@ -224,11 +226,10 @@ namespace { /// non-allocatable physical register was previously copied to a virtual /// registered and hasn't been clobbered, the virt->phys copy can be /// deleted. @@ -121,7 +123,7 @@ SmallSet &FoldAsLoadDefCandidates); /// \brief Check whether \p MI is understood by the register coalescer -@@ -249,10 +250,13 @@ +@@ -249,10 +250,13 @@ namespace { (MI.isRegSequenceLike() || MI.isInsertSubregLike() || MI.isExtractSubregLike())); } @@ -137,7 +139,7 @@ /// tied use operand, or 2) a def operand and a use operand that is commutable /// with another use operand which is tied to the def operand. In the latter /// case, index of the tied use operand and the commutable use operand are -@@ -273,13 +277,13 @@ +@@ -273,13 +277,13 @@ namespace { Optional CommutePair; }; @@ -155,7 +157,7 @@ /// Instruction using the sources in 'RegSrcs'. const MachineInstr *Inst = nullptr; -@@ -302,16 +306,20 @@ +@@ -302,16 +306,20 @@ namespace { } void addSource(unsigned SrcReg, unsigned SrcSubReg) { @@ -178,7 +180,7 @@ unsigned getSrcReg(int Idx) const { assert(Idx < getNumSources() && "Reg source out of index"); return RegSrcs[Idx].Reg; -@@ -367,59 +375,41 @@ +@@ -367,59 +375,41 @@ namespace { /// The register where the value can be found. unsigned Reg; @@ -248,7 +250,7 @@ /// If \p Reg is a physical register, a value tracker constructed with /// this constructor will not find any alternative source. /// Indeed, when \p Reg is a physical register that constructor does not -@@ -427,46 +417,20 @@ +@@ -427,46 +417,20 @@ namespace { /// Use the next constructor to track a physical register. ValueTracker(unsigned Reg, unsigned DefSubReg, const MachineRegisterInfo &MRI, @@ -296,7 +298,7 @@ }; } // end anonymous namespace -@@ -476,11 +440,11 @@ +@@ -476,11 +440,11 @@ char PeepholeOptimizer::ID = 0; char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE, @@ -310,7 +312,7 @@ /// If instruction is a copy-like instruction, i.e. it reads a single register /// and writes a single register and it does not modify the source, and if the -@@ -491,10 +455,10 @@ +@@ -491,10 +455,10 @@ INITIALIZE_PASS_END(PeepholeOptimizer, D /// the code. Since this code does not currently share EXTRACTs, just ignore all /// debug uses. bool PeepholeOptimizer:: @@ -323,7 +325,7 @@ return false; if (TargetRegisterInfo::isPhysicalRegister(DstReg) || -@@ -535,7 +499,7 @@ +@@ -535,7 +499,7 @@ optimizeExtInstr(MachineInstr *MI, Machi bool ExtendLife = true; for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { MachineInstr *UseMI = UseMO.getParent(); @@ -332,7 +334,7 @@ continue; if (UseMI->isPHI()) { -@@ -568,7 +532,7 @@ +@@ -568,7 +532,7 @@ optimizeExtInstr(MachineInstr *MI, Machi continue; MachineBasicBlock *UseMBB = UseMI->getParent(); @@ -341,7 +343,7 @@ // Local uses that come after the extension. if (!LocalMIs.count(UseMI)) Uses.push_back(&UseMO); -@@ -576,7 +540,7 @@ +@@ -576,7 +540,7 @@ optimizeExtInstr(MachineInstr *MI, Machi // Non-local uses where the result of the extension is used. Always // replace these unless it's a PHI. Uses.push_back(&UseMO); @@ -350,7 +352,7 @@ // We may want to extend the live range of the extension result in order // to replace these uses. ExtendedUses.push_back(&UseMO); -@@ -640,19 +604,18 @@ +@@ -640,19 +604,18 @@ optimizeExtInstr(MachineInstr *MI, Machi /// against already sets (or could be modified to set) the same flag as the /// compare, then we can remove the comparison and use the flag from the /// previous instruction. @@ -373,7 +375,7 @@ ++NumCmps; return true; } -@@ -661,27 +624,26 @@ +@@ -661,27 +624,26 @@ bool PeepholeOptimizer::optimizeCmpInstr } /// Optimize a select instruction. @@ -408,7 +410,7 @@ } /// \brief Try to find the next source that share the same register file -@@ -695,30 +657,29 @@ +@@ -695,30 +657,29 @@ bool PeepholeOptimizer::optimizeCondBran /// share the same register file as \p Reg and \p SubReg. The client should /// then be capable to rewrite all intermediate PHIs to get the next source. /// \return False if no alternative sources are available. True otherwise. @@ -447,7 +449,7 @@ // Follow the chain of copies until we find a more suitable source, a phi // or have to abort. -@@ -747,14 +708,17 @@ +@@ -747,14 +708,17 @@ bool PeepholeOptimizer::findNextSource(u unsigned NumSrcs = Res.getNumSources(); if (NumSrcs > 1) { PHICount++; @@ -469,7 +471,7 @@ // Do not extend the live-ranges of physical registers as they add // constraints to the register allocator. Moreover, if we want to extend // the live-range of a physical register, unlike SSA virtual register, -@@ -764,7 +728,8 @@ +@@ -764,7 +728,8 @@ bool PeepholeOptimizer::findNextSource(u // Keep following the chain if the value isn't any better yet. const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); @@ -479,7 +481,7 @@ continue; // We currently cannot deal with subreg operands on PHI instructions -@@ -775,7 +740,7 @@ +@@ -775,7 +740,7 @@ bool PeepholeOptimizer::findNextSource(u // We found a suitable source, and are done with this chain. break; } @@ -488,7 +490,7 @@ // If we did not find a more suitable source, there is nothing to optimize. return CurSrcPair.Reg != Reg; -@@ -786,54 +751,50 @@ +@@ -786,54 +751,50 @@ bool PeepholeOptimizer::findNextSource(u /// successfully traverse a PHI instruction and find suitable sources coming /// from its edges. By inserting a new PHI, we provide a rewritten PHI def /// suitable to be used in a new COPY instruction. @@ -563,7 +565,7 @@ /// coalescer friendly. In other words, given a copy-like instruction /// not all the arguments may be returned at rewritable source, since /// some arguments are none to be register coalescer friendly. -@@ -848,137 +809,72 @@ +@@ -848,137 +809,72 @@ public: /// the only source this instruction has: /// (SrcReg, SrcSubReg) = (src, srcSubIdx). /// This source defines the whole definition, i.e., @@ -732,7 +734,7 @@ // Find the next non-dead definition and continue from there. if (CurrentSrcIdx == NumDefs) return false; -@@ -990,64 +886,27 @@ +@@ -990,64 +886,27 @@ public: } // What we track are the alternative sources of the definition. @@ -805,7 +807,7 @@ /// Here CopyLike has the following form: /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. /// Src1 has the same register class has dst, hence, there is -@@ -1055,29 +914,27 @@ +@@ -1055,29 +914,27 @@ public: /// Src2.src2SubIdx, may not be register coalescer friendly. /// Therefore, the first call to this method returns: /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). @@ -841,7 +843,7 @@ return true; } -@@ -1092,41 +949,39 @@ +@@ -1092,41 +949,39 @@ public: } }; @@ -892,7 +894,7 @@ return true; } -@@ -1156,14 +1011,14 @@ +@@ -1156,14 +1011,14 @@ public: } }; @@ -911,7 +913,7 @@ /// Here CopyLike has the following form: /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. /// Each call will return a different source, walking all the available -@@ -1171,17 +1026,16 @@ +@@ -1171,17 +1026,16 @@ public: /// /// The first call returns: /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). @@ -933,7 +935,7 @@ // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. // If this is the first call, move to the first argument. -@@ -1194,17 +1048,17 @@ +@@ -1194,17 +1048,17 @@ public: return false; } const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); @@ -955,7 +957,7 @@ // If we have to compose sub-registers, bail. return MODef.getSubReg() == 0; } -@@ -1224,16 +1078,14 @@ +@@ -1224,16 +1078,14 @@ public: } // end anonymous namespace @@ -979,7 +981,7 @@ switch (MI.getOpcode()) { default: -@@ -1247,53 +1099,102 @@ +@@ -1247,53 +1099,102 @@ static CopyRewriter *getCopyRewriter(Mac case TargetOpcode::REG_SEQUENCE: return new RegSequenceRewriter(MI); } @@ -1102,7 +1104,7 @@ continue; // Rewrite source. -@@ -1312,6 +1213,47 @@ +@@ -1312,6 +1213,47 @@ bool PeepholeOptimizer::optimizeCoalesca return Changed; } @@ -1150,7 +1152,7 @@ /// \brief Optimize copy-like instructions to create /// register coalescer friendly instruction. /// The optimization tries to kill-off the \p MI by looking -@@ -1324,48 +1266,40 @@ +@@ -1324,48 +1266,40 @@ bool PeepholeOptimizer::optimizeCoalesca /// been removed from its parent. /// All COPY instructions created, are inserted in \p LocalMIs. bool PeepholeOptimizer::optimizeUncoalescableCopy( @@ -1212,7 +1214,7 @@ ++NumUncoalescableCopies; return true; } -@@ -1374,18 +1308,18 @@ +@@ -1374,18 +1308,18 @@ bool PeepholeOptimizer::optimizeUncoales /// We only fold loads to virtual registers and the virtual register defined /// has a single use. bool PeepholeOptimizer::isLoadFoldable( @@ -1236,7 +1238,7 @@ TargetRegisterInfo::isVirtualRegister(Reg) && MRI->hasOneNonDBGUse(Reg)) { FoldAsLoadDefCandidates.insert(Reg); -@@ -1395,16 +1329,16 @@ +@@ -1395,16 +1329,16 @@ bool PeepholeOptimizer::isLoadFoldable( } bool PeepholeOptimizer::isMoveImmediate( @@ -1258,7 +1260,7 @@ ImmDefRegs.insert(Reg); return true; } -@@ -1415,11 +1349,11 @@ +@@ -1415,11 +1349,11 @@ bool PeepholeOptimizer::isMoveImmediate( /// Try folding register operands that are defined by move immediate /// instructions, i.e. a trivial constant folding optimization, if /// and only if the def and use are in the same BB. @@ -1274,7 +1276,7 @@ if (!MO.isReg() || MO.isDef()) continue; // Ignore dead implicit defs. -@@ -1432,7 +1366,7 @@ +@@ -1432,7 +1366,7 @@ bool PeepholeOptimizer::foldImmediate( continue; DenseMap::iterator II = ImmDefMIs.find(Reg); assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); @@ -1283,7 +1285,7 @@ ++NumImmFold; return true; } -@@ -1454,28 +1388,28 @@ +@@ -1454,28 +1388,28 @@ bool PeepholeOptimizer::foldImmediate( // %2 = COPY %0:sub1 // // Should replace %2 uses with %1:sub1 @@ -1319,7 +1321,7 @@ unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg(); // Can't replace different subregister extracts. -@@ -1504,19 +1438,19 @@ +@@ -1504,19 +1438,19 @@ bool PeepholeOptimizer::isNAPhysCopy(uns } bool PeepholeOptimizer::foldRedundantNAPhysCopy( @@ -1344,7 +1346,7 @@ return false; } -@@ -1528,8 +1462,7 @@ +@@ -1528,8 +1462,7 @@ bool PeepholeOptimizer::foldRedundantNAP if (PrevCopy == NAPhysToVirtMIs.end()) { // We can't remove the copy: there was an intervening clobber of the // non-allocatable physical register after the copy to virtual. @@ -1354,7 +1356,7 @@ return false; } -@@ -1537,7 +1470,7 @@ +@@ -1537,7 +1470,7 @@ bool PeepholeOptimizer::foldRedundantNAP if (PrevDstReg == SrcReg) { // Remove the virt->phys copy: we saw the virtual register definition, and // the non-allocatable physical register's state hasn't changed since then. @@ -1363,7 +1365,7 @@ ++NumNAPhysCopies; return true; } -@@ -1546,7 +1479,7 @@ +@@ -1546,7 +1479,7 @@ bool PeepholeOptimizer::foldRedundantNAP // register get a copy of the non-allocatable physical register, and we only // track one such copy. Avoid getting confused by this new non-allocatable // physical register definition, and remove it from the tracked copies. @@ -1372,7 +1374,7 @@ NAPhysToVirtMIs.erase(PrevCopy); return false; } -@@ -1611,11 +1544,11 @@ +@@ -1611,11 +1544,11 @@ bool PeepholeOptimizer::findTargetRecurr return false; } @@ -1389,7 +1391,7 @@ /// /// LoopHeader: /// %1 = phi(%0, %100) -@@ -1725,27 +1658,25 @@ +@@ -1725,27 +1658,25 @@ bool PeepholeOptimizer::runOnMachineFunc } if (!MI->isCopy()) { @@ -1425,7 +1427,7 @@ NAPhysToVirtMIs.erase(Def); } } -@@ -1761,58 +1692,57 @@ +@@ -1761,58 +1692,57 @@ bool PeepholeOptimizer::runOnMachineFunc // don't know what's correct anymore. // // FIXME: handle explicit asm clobbers. @@ -1496,7 +1498,7 @@ !FoldAsLoadDefCandidates.empty()) { // We visit each operand even after successfully folding a previous -@@ -1861,7 +1791,7 @@ +@@ -1861,7 +1791,7 @@ bool PeepholeOptimizer::runOnMachineFunc // the load candidates. Note: We might be able to fold *into* this // instruction, so this needs to be after the folding logic. if (MI->isLoadFoldBarrier()) { @@ -1505,7 +1507,7 @@ FoldAsLoadDefCandidates.clear(); } } -@@ -1954,14 +1884,14 @@ +@@ -1958,14 +1888,14 @@ ValueTrackerResult ValueTracker::getNext // duplicate the code from the generic TII. return ValueTrackerResult(); @@ -1522,7 +1524,7 @@ if (RegSeqInput.SubIdx == DefSubReg) { if (RegSeqInput.SubReg) // Bail if we have to compose sub registers. -@@ -1992,8 +1922,8 @@ +@@ -1996,8 +1926,8 @@ ValueTrackerResult ValueTracker::getNext // duplicate the code from the generic TII. return ValueTrackerResult(); @@ -1533,7 +1535,7 @@ if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) return ValueTrackerResult(); -@@ -2046,7 +1976,7 @@ +@@ -2050,7 +1980,7 @@ ValueTrackerResult ValueTracker::getNext // duplicate the code from the generic TII. return ValueTrackerResult(); @@ -1542,7 +1544,7 @@ if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) return ValueTrackerResult(); -@@ -2079,7 +2009,7 @@ +@@ -2083,7 +2013,7 @@ ValueTrackerResult ValueTracker::getNext Def->getOperand(3).getImm()); } @@ -1551,16 +1553,16 @@ ValueTrackerResult ValueTracker::getNextSourceFromPHI() { assert(Def->isPHI() && "Invalid definition"); ValueTrackerResult Res; -@@ -2091,7 +2021,7 @@ +@@ -2095,7 +2025,7 @@ ValueTrackerResult ValueTracker::getNext // Return all register sources for PHI instructions. for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { - auto &MO = Def->getOperand(i); + const MachineOperand &MO = Def->getOperand(i); assert(MO.isReg() && "Invalid PHI instruction"); - Res.addSource(MO.getReg(), MO.getSubReg()); - } -@@ -2113,7 +2043,7 @@ + // We have no code to deal with undef operands. They shouldn't happen in + // normal programs anyway. +@@ -2121,7 +2051,7 @@ ValueTrackerResult ValueTracker::getNext return getNextSourceFromBitcast(); // All the remaining cases involve "complex" instructions. // Bail if we did not ask for the advanced tracking. From ea146c9ff132efdea9bb55ebb7ba7ff9bf3f305a Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 30 Aug 2018 18:28:17 +0200 Subject: [PATCH 05/12] rebase of the patch --- debian/patches/D51335-alignment-issue.diff | 26 +++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/debian/patches/D51335-alignment-issue.diff b/debian/patches/D51335-alignment-issue.diff index aeca7e8d..6c8589cd 100644 --- a/debian/patches/D51335-alignment-issue.diff +++ b/debian/patches/D51335-alignment-issue.diff @@ -1,6 +1,8 @@ ---- llvm-toolchain-6.0-6.0.1~+rc1.orig/lib/Transforms/Scalar/SROA.cpp -+++ llvm-toolchain-6.0-6.0.1~+rc1/lib/Transforms/Scalar/SROA.cpp -@@ -3046,6 +3046,42 @@ +Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/SROA.cpp +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Scalar/SROA.cpp ++++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Scalar/SROA.cpp +@@ -2987,6 +2987,42 @@ private: return true; } @@ -41,10 +43,10 @@ + } + bool visitPHINode(PHINode &PN) { - LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); + DEBUG(dbgs() << " original: " << PN << "\n"); assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable"); -@@ -3069,6 +3105,9 @@ - LLVM_DEBUG(dbgs() << " to: " << PN << "\n"); +@@ -3010,6 +3046,9 @@ private: + DEBUG(dbgs() << " to: " << PN << "\n"); deleteIfTriviallyDead(OldPtr); + // Fix the alignment of any loads or stores using this PHI node. @@ -53,8 +55,8 @@ // PHIs can't be promoted on their own, but often can be speculated. We // check the speculation outside of the rewriter so that we see the // fully-rewritten alloca. -@@ -3093,6 +3132,9 @@ - LLVM_DEBUG(dbgs() << " to: " << SI << "\n"); +@@ -3034,6 +3073,9 @@ private: + DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldPtr); + // Fix the alignment of any loads or stores using this select. @@ -63,9 +65,11 @@ // Selects can't be promoted on their own, but often can be speculated. We // check the speculation outside of the rewriter so that we see the // fully-rewritten alloca. ---- llvm-toolchain-6.0-6.0.1~+rc1.orig/test/Transforms/SROA/phi-and-select.ll -+++ llvm-toolchain-6.0-6.0.1~+rc1/test/Transforms/SROA/phi-and-select.ll -@@ -600,3 +600,35 @@ +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/SROA/phi-and-select.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/SROA/phi-and-select.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/SROA/phi-and-select.ll +@@ -600,3 +600,35 @@ if.then5: store %struct.S undef, %struct.S* %f1, align 4 ret void } From 79ca35379082363265f1328f11b2c2029c6fa40c Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 6 Sep 2018 20:51:50 +0200 Subject: [PATCH 06/12] Fix an optimization issues (Closes: #907649) See upstream bug 38786 --- debian/changelog | 4 ++- debian/patches/D51639-optim-issue.diff | 46 ++++++++++++++++++++++++++ debian/patches/series | 1 + 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 debian/patches/D51639-optim-issue.diff diff --git a/debian/changelog b/debian/changelog index a6208e34..eec039c8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -5,8 +5,10 @@ llvm-toolchain-6.0 (1:6.0.1-7) unstable; urgency=medium See upstream bug 38663 * Fix an alignment issue See upstream bug 38707 (Closes: #907622) + * Fix an optimization issues (Closes: #907649) + See upstream bug 38786 - -- + -- Sylvestre Ledru Thu, 06 Sep 2018 20:51:24 +0200 llvm-toolchain-6.0 (1:6.0.1-6) unstable; urgency=medium diff --git a/debian/patches/D51639-optim-issue.diff b/debian/patches/D51639-optim-issue.diff new file mode 100644 index 00000000..791ef595 --- /dev/null +++ b/debian/patches/D51639-optim-issue.diff @@ -0,0 +1,46 @@ +Index: llvm-toolchain-6.0-6.0.1/lib/Transforms/Vectorize/LoopVectorize.cpp +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/Transforms/Vectorize/LoopVectorize.cpp ++++ llvm-toolchain-6.0-6.0.1/lib/Transforms/Vectorize/LoopVectorize.cpp +@@ -4529,6 +4529,11 @@ + // isOutOfScope operands cannot be uniform instructions. + if (isOutOfScope(OV)) + continue; ++ // First order recurrence Phi's should typically be considered ++ // non-uniform. ++ auto *OP = dyn_cast(OV); ++ if (OP && Legal->isFirstOrderRecurrence(OP)) ++ continue; + // If all the users of the operand are uniform, then add the + // operand into the uniform worklist. + auto *OI = cast(OV); +Index: llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/X86/uniform-phi.ll +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/Transforms/LoopVectorize/X86/uniform-phi.ll ++++ llvm-toolchain-6.0-6.0.1/test/Transforms/LoopVectorize/X86/uniform-phi.ll +@@ -75,3 +75,25 @@ + ret i64 %retval + } + ++; CHECK-LABEL: PR38786 ++; Check that first order recurrence phis (%phi32 and %phi64) are not uniform. ++; CHECK-NOT: LV: Found uniform instruction: %phi ++define void @PR38786(double* %y, double* %x, i64 %n) { ++entry: ++ br label %for.body ++ ++for.body: ++ %phi32 = phi i32 [ 0, %entry ], [ %i32next, %for.body ] ++ %phi64 = phi i64 [ 0, %entry ], [ %i64next, %for.body ] ++ %i32next = add i32 %phi32, 1 ++ %i64next = zext i32 %i32next to i64 ++ %xip = getelementptr inbounds double, double* %x, i64 %i64next ++ %yip = getelementptr inbounds double, double* %y, i64 %phi64 ++ %xi = load double, double* %xip, align 8 ++ store double %xi, double* %yip, align 8 ++ %cmp = icmp slt i64 %i64next, %n ++ br i1 %cmp, label %for.body, label %for.end ++ ++for.end: ++ ret void ++} diff --git a/debian/patches/series b/debian/patches/series index 5cdd2645..0e732a84 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -63,3 +63,4 @@ PowerPC-Make-AddrSpaceCast-noop.diff D51108-rust-powerpc.diff pr38663-pgo-lto-crash.patch D51335-alignment-issue.diff +D51639-optim-issue.diff From 27ee005bc9f38b418fa44293a5ea58fdab713751 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 7 Sep 2018 14:37:21 +0200 Subject: [PATCH 07/12] fix a build issue with openmp --- debian/rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/rules b/debian/rules index bc0ef238..7a259da6 100755 --- a/debian/rules +++ b/debian/rules @@ -435,7 +435,7 @@ build_doc: LD_LIBRARY_PATH=$(DEB_INST)/usr/lib/llvm-$(LLVM_VERSION)/lib/:/usr/lib/*/libfakeroot help2man --no-info --version-string=$(LLVM_VERSION) $(TARGET_BUILD)/bin/$$f > debian/man/$$f-$(LLVM_VERSION).1; \ done if test "$(OPENMP_ENABLE)" = yes; then \ - cd openmp/runtime && doxygen doc/doxygen/config; \ + cd openmp/runtime && doxygen doc/doxygen/config; cd -; \ cd openmp/runtime/doc/doxygen/generated/html/ && rm jquery.js && ln -s /usr/share/javascript/jquery/jquery.js; \ fi From 8337dca8061e3ef218ef0044d7ddd22e3e3e6797 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 10 Sep 2018 16:57:56 +0200 Subject: [PATCH 08/12] Cherry pick an upstream issue with x86 mentionned here: https://lists.llvm.org/pipermail/llvm-dev/2018-August/125111.html "A very subtle miscompile due to a bug in EFLAGS copy lowering for X86 was fixed. Chandler suggests maintainers of out-of-tree branches using the X86 backend may want to cherry-pick this fix." https://reviews.llvm.org/rL338481 --- debian/changelog | 6 + ...-cherry-pick-really-subtle-miscompile.diff | 153 ++++++++++++++++++ debian/patches/series | 1 + 3 files changed, 160 insertions(+) create mode 100644 debian/patches/rL338481-cherry-pick-really-subtle-miscompile.diff diff --git a/debian/changelog b/debian/changelog index eec039c8..898c96e6 100644 --- a/debian/changelog +++ b/debian/changelog @@ -7,6 +7,12 @@ llvm-toolchain-6.0 (1:6.0.1-7) unstable; urgency=medium See upstream bug 38707 (Closes: #907622) * Fix an optimization issues (Closes: #907649) See upstream bug 38786 + * Cherry pick an upstream issue with x86 mentionned here: + https://lists.llvm.org/pipermail/llvm-dev/2018-August/125111.html + "A very subtle miscompile due to a bug in EFLAGS copy lowering + for X86 was fixed. Chandler suggests maintainers of out-of-tree + branches using the X86 backend may want to cherry-pick this fix." + https://reviews.llvm.org/rL338481 -- Sylvestre Ledru Thu, 06 Sep 2018 20:51:24 +0200 diff --git a/debian/patches/rL338481-cherry-pick-really-subtle-miscompile.diff b/debian/patches/rL338481-cherry-pick-really-subtle-miscompile.diff new file mode 100644 index 00000000..07653e53 --- /dev/null +++ b/debian/patches/rL338481-cherry-pick-really-subtle-miscompile.diff @@ -0,0 +1,153 @@ +Index: llvm-toolchain-6.0-6.0.1/test/CodeGen/X86/flags-copy-lowering.mir +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/test/CodeGen/X86/flags-copy-lowering.mir ++++ llvm-toolchain-6.0-6.0.1/test/CodeGen/X86/flags-copy-lowering.mir +@@ -72,6 +72,18 @@ + call void @foo() + ret void + } ++ ++ define i32 @test_existing_setcc(i64 %a, i64 %b) { ++ entry: ++ call void @foo() ++ ret i32 0 ++ } ++ ++ define i32 @test_existing_setcc_memory(i64 %a, i64 %b) { ++ entry: ++ call void @foo() ++ ret i32 0 ++ } + ... + --- + name: test_branch +@@ -553,3 +565,110 @@ body: | + RET 0 + + ... ++--- ++name: test_existing_setcc ++# CHECK-LABEL: name: test_existing_setcc ++liveins: ++ - { reg: '$rdi', virtual-reg: '%0' } ++ - { reg: '$rsi', virtual-reg: '%1' } ++body: | ++ bb.0: ++ successors: %bb.1, %bb.2, %bb.3 ++ liveins: $rdi, $rsi ++ ++ %0:gr64 = COPY $rdi ++ %1:gr64 = COPY $rsi ++ CMP64rr %0, %1, implicit-def $eflags ++ %2:gr8 = SETAr implicit $eflags ++ %3:gr8 = SETAEr implicit $eflags ++ %4:gr64 = COPY $eflags ++ ; CHECK: CMP64rr %0, %1, implicit-def $eflags ++ ; CHECK-NEXT: %[[A_REG:[^:]*]]:gr8 = SETAr implicit $eflags ++ ; CHECK-NEXT: %[[AE_REG:[^:]*]]:gr8 = SETAEr implicit $eflags ++ ; CHECK-NOT: COPY{{( killed)?}} $eflags ++ ++ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ++ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax ++ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ++ ++ $eflags = COPY %4 ++ JA_1 %bb.1, implicit $eflags ++ JB_1 %bb.2, implicit $eflags ++ JMP_1 %bb.3 ++ ; CHECK-NOT: $eflags = ++ ; ++ ; CHECK: TEST8rr %[[A_REG]], %[[A_REG]], implicit-def $eflags ++ ; CHECK-NEXT: JNE_1 %bb.1, implicit killed $eflags ++ ; CHECK-SAME: {{$[[:space:]]}} ++ ; CHECK-NEXT: bb.4: ++ ; CHECK-NEXT: successors: {{.*$}} ++ ; CHECK-SAME: {{$[[:space:]]}} ++ ; CHECK-NEXT: TEST8rr %[[AE_REG]], %[[AE_REG]], implicit-def $eflags ++ ; CHECK-NEXT: JE_1 %bb.2, implicit killed $eflags ++ ; CHECK-NEXT: JMP_1 %bb.3 ++ ++ bb.1: ++ %5:gr32 = MOV32ri64 42 ++ $eax = COPY %5 ++ RET 0, $eax ++ ++ bb.2: ++ %6:gr32 = MOV32ri64 43 ++ $eax = COPY %6 ++ RET 0, $eax ++ ++ bb.3: ++ %7:gr32 = MOV32r0 implicit-def dead $eflags ++ $eax = COPY %7 ++ RET 0, $eax ++ ++... ++--- ++name: test_existing_setcc_memory ++# CHECK-LABEL: name: test_existing_setcc_memory ++liveins: ++ - { reg: '$rdi', virtual-reg: '%0' } ++ - { reg: '$rsi', virtual-reg: '%1' } ++body: | ++ bb.0: ++ successors: %bb.1, %bb.2 ++ liveins: $rdi, $rsi ++ ++ %0:gr64 = COPY $rdi ++ %1:gr64 = COPY $rsi ++ CMP64rr %0, %1, implicit-def $eflags ++ SETEm %0, 1, $noreg, -16, $noreg, implicit $eflags ++ %2:gr64 = COPY $eflags ++ ; CHECK: CMP64rr %0, %1, implicit-def $eflags ++ ; We cannot reuse this SETE because it stores the flag directly to memory, ++ ; so we have two SETEs here. FIXME: It'd be great if something could fold ++ ; these automatically. If not, maybe we want to unfold SETcc instructions ++ ; writing to memory so we can reuse them. ++ ; CHECK-NEXT: SETEm {{.*}} implicit $eflags ++ ; CHECK-NEXT: %[[E_REG:[^:]*]]:gr8 = SETEr implicit $eflags ++ ; CHECK-NOT: COPY{{( killed)?}} $eflags ++ ++ ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ++ CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax ++ ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ++ ++ $eflags = COPY %2 ++ JE_1 %bb.1, implicit $eflags ++ JMP_1 %bb.2 ++ ; CHECK-NOT: $eflags = ++ ; ++ ; CHECK: TEST8rr %[[E_REG]], %[[E_REG]], implicit-def $eflags ++ ; CHECK-NEXT: JNE_1 %bb.1, implicit killed $eflags ++ ; CHECK-NEXT: JMP_1 %bb.2 ++ ++ bb.1: ++ %3:gr32 = MOV32ri64 42 ++ $eax = COPY %3 ++ RET 0, $eax ++ ++ bb.2: ++ %4:gr32 = MOV32ri64 43 ++ $eax = COPY %4 ++ RET 0, $eax ++ ++... +Index: llvm-toolchain-6.0-6.0.1/lib/Target/X86/X86FlagsCopyLowering.cpp +=================================================================== +--- llvm-toolchain-6.0-6.0.1.orig/lib/Target/X86/X86FlagsCopyLowering.cpp ++++ llvm-toolchain-6.0-6.0.1/lib/Target/X86/X86FlagsCopyLowering.cpp +@@ -608,9 +608,12 @@ X86FlagsCopyLoweringPass::collectCondsIn + for (MachineInstr &MI : llvm::reverse( + llvm::make_range(MBB.instr_begin(), CopyDefI.getIterator()))) { + X86::CondCode Cond = X86::getCondFromSETOpc(MI.getOpcode()); +- if (Cond != X86::COND_INVALID && MI.getOperand(0).isReg() && +- TRI->isVirtualRegister(MI.getOperand(0).getReg())) ++ if (Cond != X86::COND_INVALID && !MI.mayStore() && MI.getOperand(0).isReg() && ++ TRI->isVirtualRegister(MI.getOperand(0).getReg())) { ++ assert(MI.getOperand(0).isDef() && ++ "A non-storing SETcc should always define a register!"); + CondRegs[Cond] = MI.getOperand(0).getReg(); ++ } + + // Stop scanning when we see the first definition of the EFLAGS as prior to + // this we would potentially capture the wrong flag state. diff --git a/debian/patches/series b/debian/patches/series index 0e732a84..5f3ca0bd 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -64,3 +64,4 @@ D51108-rust-powerpc.diff pr38663-pgo-lto-crash.patch D51335-alignment-issue.diff D51639-optim-issue.diff +rL338481-cherry-pick-really-subtle-miscompile.diff From a09a33c62e0305e1c3a74d1d5c55304851ff8e9f Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 10 Sep 2018 17:29:11 +0200 Subject: [PATCH 09/12] New snapshot release --- debian/changelog | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index accc41ff..571a4309 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,10 +1,13 @@ -llvm-toolchain-7 (1:7~+rc2-1~exp4) UNRELEASED; urgency=medium +llvm-toolchain-7 (1:7~+rc3-1) unstable; urgency=medium [ John Paul Adrian Glaubitz ] * Disable OpenMP on unsupported architectures powerpc, powerpcspe, riscv64 and sparc64 (Closes: #907912) - -- John Paul Adrian Glaubitz Tue, 04 Sep 2018 10:20:57 +0200 + [ Sylvestre Ledru ] + * New snapshot release + + -- Sylvestre Ledru Mon, 10 Sep 2018 17:01:25 +0200 llvm-toolchain-7 (1:7~+rc2-1~exp3) experimental; urgency=medium From 815439705690b9d41f80b343619508062be4666e Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 10 Sep 2018 23:29:49 +0200 Subject: [PATCH 10/12] disable a patch which doesn't apply --- debian/changelog | 2 -- debian/patches/series | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/debian/changelog b/debian/changelog index 898c96e6..b735495f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -5,8 +5,6 @@ llvm-toolchain-6.0 (1:6.0.1-7) unstable; urgency=medium See upstream bug 38663 * Fix an alignment issue See upstream bug 38707 (Closes: #907622) - * Fix an optimization issues (Closes: #907649) - See upstream bug 38786 * Cherry pick an upstream issue with x86 mentionned here: https://lists.llvm.org/pipermail/llvm-dev/2018-August/125111.html "A very subtle miscompile due to a bug in EFLAGS copy lowering diff --git a/debian/patches/series b/debian/patches/series index 5f3ca0bd..533ea6c5 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -63,5 +63,6 @@ PowerPC-Make-AddrSpaceCast-noop.diff D51108-rust-powerpc.diff pr38663-pgo-lto-crash.patch D51335-alignment-issue.diff -D51639-optim-issue.diff +# Doesn't fully apply +# D51639-optim-issue.diff rL338481-cherry-pick-really-subtle-miscompile.diff From 1c3d927f815441c1853a8f48054f43ab24099c86 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 11 Sep 2018 11:30:38 +0200 Subject: [PATCH 11/12] add a test case for bug 900440 --- debian/qualify-clang.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/debian/qualify-clang.sh b/debian/qualify-clang.sh index e8bd3d27..cc96184a 100644 --- a/debian/qualify-clang.sh +++ b/debian/qualify-clang.sh @@ -205,6 +205,24 @@ echo "Test: CMake find LLVM and Clang in explicit prefix path" (cd cmaketest/explicit && CC=clang-$VERSION CXX=clang++-$VERSION CMAKE_PREFIX_PATH=/usr/lib/llvm-$VERSION cmake ..) rm -rf cmaketest +# Test case for bug #900440 +rm -rf cmaketest && mkdir cmaketest +cat > cmaketest/CMakeLists.txt </dev/null 2>/dev/null || { printf "Usage:\n%s CLANGEXE [ARGS]\n" "$0" 1>&2; exit 1; } #shift From 66affa62a8016479f967d57e0970ad9661f0e1b4 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Tue, 11 Sep 2018 11:40:44 +0200 Subject: [PATCH 12/12] Improve the management of the merge on debian/changelog --- .gitattributes | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..c0bb3fe0 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +debian/changelog merge=dpkg-mergechangelogs + +# ~/.gitconfig should have +# [merge "dpkg-mergechangelogs"] +# name = debian/changelog merge driver +# driver = dpkg-mergechangelogs -m %O %A %B %A