From ccab486c34bda2d0b2905d5dfcda3890e9099e71 Mon Sep 17 00:00:00 2001 From: John Paul Adrian Glaubitz Date: Thu, 14 Feb 2019 16:24:58 +0100 Subject: [PATCH] Update D54583-powerpcspe-double-parameter.diff to latest revision --- debian/changelog | 1 + .../D54583-powerpcspe-double-parameter.diff | 420 ++++++++++++++++++ .../D54584-powerpcspe-double-parameter.diff | 217 --------- debian/patches/series | 2 +- 4 files changed, 422 insertions(+), 218 deletions(-) create mode 100644 debian/patches/powerpcspe/D54583-powerpcspe-double-parameter.diff delete mode 100644 debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff diff --git a/debian/changelog b/debian/changelog index c76ddd74..2284eba4 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,7 @@ llvm-toolchain-snapshot (1:9~svn353106-1~exp2) UNRELEASED; urgency=medium [ John Paul Adrian Glaubitz ] * Update patches for powerpcspe: - D49754-powerpcspe-clang.diff + - D54583-powerpcspe-double-parameter.diff -- John Paul Adrian Glaubitz Thu, 14 Feb 2019 16:21:28 +0100 diff --git a/debian/patches/powerpcspe/D54583-powerpcspe-double-parameter.diff b/debian/patches/powerpcspe/D54583-powerpcspe-double-parameter.diff new file mode 100644 index 00000000..969f0eb8 --- /dev/null +++ b/debian/patches/powerpcspe/D54583-powerpcspe-double-parameter.diff @@ -0,0 +1,420 @@ +Description: PowerPC: Optimize SPE double parameter calling setup +Author: Justin Hibbits +Origin: https://reviews.llvm.org/D54583 +Last-Update: 2019-02-14 + +--- llvm-toolchain-snapshot-9~svn351420.orig/lib/Target/PowerPC/PPCCallingConv.td ++++ llvm-toolchain-snapshot-9~svn351420/lib/Target/PowerPC/PPCCallingConv.td +@@ -90,7 +90,7 @@ def RetCC_PPC : CallingConv<[ + CCIfSubtarget<"hasSPE()", + CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, + CCIfSubtarget<"hasSPE()", +- CCIfType<[f64], CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>, ++ CCIfType<[f64], CCCustom<"CC_PPC32_SPE_RetF64">>>, + + // For P9, f128 are passed in vector registers. + CCIfType<[f128], +@@ -179,6 +179,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[ + CCIfType<[i32], + CCIfSplit>>>, ++ CCIfType<[f64], ++ CCIfSubtarget<"hasSPE()", ++ CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>, + CCIfSplit>>>, +@@ -199,7 +202,7 @@ def CC_PPC32_SVR4_Common : CallingConv<[ + CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>>, + CCIfType<[f64], + CCIfSubtarget<"hasSPE()", +- CCAssignToReg<[S3, S4, S5, S6, S7, S8, S9, S10]>>>, ++ CCCustom<"CC_PPC32_SPE_CustomSplitFP64">>>, + CCIfType<[f32], + CCIfSubtarget<"hasSPE()", + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>>, +--- llvm-toolchain-snapshot-9~svn351420.orig/lib/Target/PowerPC/PPCISelLowering.cpp ++++ llvm-toolchain-snapshot-9~svn351420/lib/Target/PowerPC/PPCISelLowering.cpp +@@ -1232,22 +1232,6 @@ unsigned PPCTargetLowering::getByValType + return Align; + } + +-unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, +- CallingConv:: ID CC, +- EVT VT) const { +- if (Subtarget.hasSPE() && VT == MVT::f64) +- return 2; +- return PPCTargetLowering::getNumRegisters(Context, VT); +-} +- +-MVT PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, +- CallingConv:: ID CC, +- EVT VT) const { +- if (Subtarget.hasSPE() && VT == MVT::f64) +- return MVT::i32; +- return PPCTargetLowering::getRegisterType(Context, VT); +-} +- + bool PPCTargetLowering::useSoftFloat() const { + return Subtarget.useSoftFloat(); + } +@@ -1365,6 +1349,8 @@ const char *PPCTargetLowering::getTarget + case PPCISD::QBFLT: return "PPCISD::QBFLT"; + case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; + case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; ++ case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64"; ++ case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE"; + case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; + } + return nullptr; +@@ -3162,6 +3148,58 @@ bool llvm::CC_PPC32_SVR4_Custom_Dummy(un + return true; + } + ++bool llvm::CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT, ++ MVT &LocVT, ++ CCValAssign::LocInfo &LocInfo, ++ ISD::ArgFlagsTy &ArgFlags, ++ CCState &State) { ++ static const MCPhysReg HiRegList[] = { PPC::R3, PPC::R5, PPC::R7, PPC::R9 }; ++ static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 }; ++ ++ // Try to get the first register. ++ unsigned Reg = State.AllocateReg(HiRegList); ++ if (!Reg) ++ return false; ++ ++ unsigned i; ++ for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i) ++ if (HiRegList[i] == Reg) ++ break; ++ ++ unsigned T = State.AllocateReg(LoRegList[i]); ++ (void)T; ++ assert(T == LoRegList[i] && "Could not allocate register"); ++ ++ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); ++ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], ++ LocVT, LocInfo)); ++ return true; ++} ++ ++bool llvm::CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT, ++ MVT &LocVT, ++ CCValAssign::LocInfo &LocInfo, ++ ISD::ArgFlagsTy &ArgFlags, ++ CCState &State) { ++ static const MCPhysReg HiRegList[] = { PPC::R3, PPC::R5, PPC::R7, PPC::R9 }; ++ static const MCPhysReg LoRegList[] = { PPC::R4, PPC::R6, PPC::R8, PPC::R10 }; ++ ++ // Try to get the first register. ++ unsigned Reg = State.AllocateReg(HiRegList); ++ if (!Reg) ++ return false; ++ ++ unsigned i; ++ for (i = 0; i < sizeof(HiRegList) / sizeof(HiRegList[0]); ++i) ++ if (HiRegList[i] == Reg) ++ break; ++ ++ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); ++ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], ++ LocVT, LocInfo)); ++ return true; ++} ++ + bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, +@@ -3449,7 +3487,7 @@ SDValue PPCTargetLowering::LowerFormalAr + // Reserve space for the linkage area on the stack. + unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); + CCInfo.AllocateStack(LinkageSize, PtrByteSize); +- if (useSoftFloat() || hasSPE()) ++ if (useSoftFloat()) + CCInfo.PreAnalyzeFormalArguments(Ins); + + CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); +@@ -3482,7 +3520,8 @@ SDValue PPCTargetLowering::LowerFormalAr + if (Subtarget.hasVSX()) + RC = &PPC::VSFRCRegClass; + else if (Subtarget.hasSPE()) +- RC = &PPC::SPERCRegClass; ++ // SPE passes doubles in GPR pairs. ++ RC = &PPC::GPRCRegClass; + else + RC = &PPC::F8RCRegClass; + break; +@@ -3506,13 +3545,30 @@ SDValue PPCTargetLowering::LowerFormalAr + break; + } + +- // Transform the arguments stored in physical registers into virtual ones. +- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); +- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, +- ValVT == MVT::i1 ? MVT::i32 : ValVT); ++ SDValue ArgValue; ++ if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) { ++ // Transform the arguments stored in physical registers into ++ // virtual ones. ++ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ++ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); ++ ++ SDValue ArgValue2; ++ Reg = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC); ++ ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); ++ if (!Subtarget.isLittleEndian()) ++ std::swap (ArgValue, ArgValue2); ++ ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValue, ++ ArgValue2); ++ } else { + +- if (ValVT == MVT::i1) +- ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); ++ // Transform the arguments stored in physical registers into ++ // virtual ones. ++ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ++ ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ++ ValVT == MVT::i1 ? MVT::i32 : ValVT); ++ if (ValVT == MVT::i1) ++ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); ++ } + + InVals.push_back(ArgValue); + } else { +@@ -5129,10 +5185,27 @@ SDValue PPCTargetLowering::LowerCallResu + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + +- SDValue Val = DAG.getCopyFromReg(Chain, dl, +- VA.getLocReg(), VA.getLocVT(), InFlag); +- Chain = Val.getValue(1); +- InFlag = Val.getValue(2); ++ SDValue Val; ++ ++ if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) { ++ SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, ++ InFlag); ++ Chain = Lo.getValue(1); ++ InFlag = Lo.getValue(2); ++ VA = RVLocs[++i]; // skip ahead to next loc ++ SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, ++ InFlag); ++ Chain = Hi.getValue(1); ++ InFlag = Hi.getValue(2); ++ if (!Subtarget.isLittleEndian()) ++ std::swap (Lo, Hi); ++ Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi); ++ } else { ++ Val = DAG.getCopyFromReg(Chain, dl, ++ VA.getLocReg(), VA.getLocVT(), InFlag); ++ Chain = Val.getValue(1); ++ InFlag = Val.getValue(2); ++ } + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); +@@ -5444,12 +5517,12 @@ SDValue PPCTargetLowering::LowerCall_32S + + bool seenFloatArg = false; + // Walk the register/memloc assignments, inserting copies/loads. +- for (unsigned i = 0, j = 0, e = ArgLocs.size(); ++ for (unsigned i = 0, realI = 0, j = 0, e = ArgLocs.size(); + i != e; +- ++i) { ++ ++i, ++realI) { + CCValAssign &VA = ArgLocs[i]; +- SDValue Arg = OutVals[i]; +- ISD::ArgFlagsTy Flags = Outs[i].Flags; ++ SDValue Arg = OutVals[realI]; ++ ISD::ArgFlagsTy Flags = Outs[realI].Flags; + + if (Flags.isByVal()) { + // Argument is an aggregate which is passed by value, thus we need to +@@ -5498,7 +5571,18 @@ SDValue PPCTargetLowering::LowerCall_32S + if (VA.isRegLoc()) { + seenFloatArg |= VA.getLocVT().isFloatingPoint(); + // Put argument in a physical register. +- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); ++ if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) { ++ unsigned id = Subtarget.isLittleEndian() ? 0 : 1; ++ SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, ++ DAG.getIntPtrConstant(id, dl)); ++ RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0))); ++ SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, ++ DAG.getIntPtrConstant(1 - id, dl)); ++ ++ RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(), ++ SVal.getValue(0))); ++ } else ++ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + // Put argument in the parameter list area of the current stack frame. + assert(VA.isMemLoc()); +@@ -6644,11 +6728,11 @@ PPCTargetLowering::LowerReturn(SDValue C + SmallVector RetOps(1, Chain); + + // Copy the result values into the output registers. +- for (unsigned i = 0; i != RVLocs.size(); ++i) { ++ for (unsigned i = 0, realI = 0; i != RVLocs.size(); ++i, ++realI) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + +- SDValue Arg = OutVals[i]; ++ SDValue Arg = OutVals[realI]; + + switch (VA.getLocInfo()) { + default: llvm_unreachable("Unknown loc info!"); +@@ -6663,8 +6747,21 @@ PPCTargetLowering::LowerReturn(SDValue C + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + break; + } +- +- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); ++ if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) { ++ bool isLittleEndian = Subtarget.isLittleEndian(); ++ // Legalize ret f64 -> ret 2 x i32. ++ SDValue SVal = ++ DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, ++ DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl)); ++ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag); ++ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); ++ SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, ++ DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl)); ++ Flag = Chain.getValue(1); ++ VA = RVLocs[++i]; // skip ahead to next loc ++ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag); ++ } else ++ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } +--- llvm-toolchain-snapshot-9~svn351420.orig/lib/Target/PowerPC/PPCISelLowering.h ++++ llvm-toolchain-snapshot-9~svn351420/lib/Target/PowerPC/PPCISelLowering.h +@@ -196,6 +196,15 @@ namespace llvm { + /// Direct move of 2 consective GPR to a VSX register. + BUILD_FP128, + ++ /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and ++ /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is ++ /// unsupported for this target. ++ /// Merge 2 GPRs to a single SPE register. ++ BUILD_SPE64, ++ ++ /// Extract SPE register component, second argument is high or low. ++ EXTRACT_SPE, ++ + /// Extract a subvector from signed integer vector and convert to FP. + /// It is primarily used to convert a (widened) illegal integer vector + /// type to a legal floating point vector type. +@@ -898,14 +907,6 @@ namespace llvm { + unsigned JTI, + MCContext &Ctx) const override; + +- unsigned getNumRegistersForCallingConv(LLVMContext &Context, +- CallingConv:: ID CC, +- EVT VT) const override; +- +- MVT getRegisterTypeForCallingConv(LLVMContext &Context, +- CallingConv:: ID CC, +- EVT VT) const override; +- + private: + struct ReuseLoadInfo { + SDValue Ptr; +@@ -1110,6 +1111,7 @@ namespace llvm { + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; ++ SDValue LowerEXTRACT_ELEMENT(SDValue Op, SelectionDAG &DAG) const; + + SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; +@@ -1187,6 +1189,17 @@ namespace llvm { + ISD::ArgFlagsTy &ArgFlags, + CCState &State); + ++ bool CC_PPC32_SPE_CustomSplitFP64(unsigned &ValNo, MVT &ValVT, ++ MVT &LocVT, ++ CCValAssign::LocInfo &LocInfo, ++ ISD::ArgFlagsTy &ArgFlags, ++ CCState &State); ++ bool CC_PPC32_SPE_RetF64(unsigned &ValNo, MVT &ValVT, ++ MVT &LocVT, ++ CCValAssign::LocInfo &LocInfo, ++ ISD::ArgFlagsTy &ArgFlags, ++ CCState &State); ++ + bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, + MVT &LocVT, + CCValAssign::LocInfo &LocInfo, +--- llvm-toolchain-snapshot-9~svn351420.orig/lib/Target/PowerPC/PPCInstrInfo.td ++++ llvm-toolchain-snapshot-9~svn351420/lib/Target/PowerPC/PPCInstrInfo.td +@@ -231,6 +231,17 @@ def PPCbuild_fp128: SDNode<"PPCISD::BUIL + SDTCisSameAs<1,2>]>, + []>; + ++def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64", ++ SDTypeProfile<1, 2, ++ [SDTCisFP<0>, SDTCisSameSizeAs<1,2>, ++ SDTCisSameAs<1,2>]>, ++ []>; ++ ++def PPCextract_spe : SDNode<"PPCISD::EXTRACT_SPE", ++ SDTypeProfile<1, 2, ++ [SDTCisInt<0>, SDTCisFP<1>, SDTCisPtrTy<2>]>, ++ []>; ++ + // These are target-independent nodes, but have target-specific formats. + def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +--- llvm-toolchain-snapshot-9~svn351420.orig/lib/Target/PowerPC/PPCInstrSPE.td ++++ llvm-toolchain-snapshot-9~svn351420/lib/Target/PowerPC/PPCInstrSPE.td +@@ -512,7 +512,7 @@ def EVLWWSPLATX : EVXForm_1<792, (out + + def EVMERGEHI : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>; +-def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), ++def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins gprc:$RA, gprc:$RB), + "evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>; + def EVMERGEHILO : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), + "evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>; +@@ -887,4 +887,14 @@ def : Pat<(f64 (selectcc i1:$lhs, i1:$rh + (SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), + (SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>; ++ ++ ++def : Pat<(f64 (PPCbuild_spe64 i32:$rB, i32:$rA)), ++ (f64 (COPY_TO_REGCLASS (EVMERGELO $rA, $rB), SPERC))>; ++ ++def : Pat<(i32 (PPCextract_spe f64:$rA, 1)), ++ (i32 (EXTRACT_SUBREG (EVMERGEHI $rA, $rA), sub_32))>; ++def : Pat<(i32 (PPCextract_spe f64:$rA, 0)), ++ (i32 (EXTRACT_SUBREG $rA, sub_32))>; ++ + } +--- llvm-toolchain-snapshot-9~svn351420.orig/test/CodeGen/PowerPC/spe.ll ++++ llvm-toolchain-snapshot-9~svn351420/test/CodeGen/PowerPC/spe.ll +@@ -472,10 +472,8 @@ entry: + ; CHECK-LABEL: test_dselect + ; CHECK: andi. + ; CHECK: bc +-; CHECK: evldd +-; CHECK: b +-; CHECK: evldd +-; CHECK: evstdd ++; CHECK: evor ++; CHECK: evmergehi + ; CHECK: blr + } + +@@ -519,7 +517,7 @@ entry: + %1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0) + ret i32 %1 + ; CHECK-LABEL: test_dasmconst +-; CHECK: evldd ++; CHECK: evmergelo + ; CHECK: #APP + ; CHECK: efdctsi + ; CHECK: #NO_APP diff --git a/debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff b/debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff deleted file mode 100644 index b0386dcc..00000000 --- a/debian/patches/powerpcspe/D54584-powerpcspe-double-parameter.diff +++ /dev/null @@ -1,217 +0,0 @@ -Description: PowerPC: Optimize SPE double parameter calling setup -Author: Justin Hibbits -Origin: https://reviews.llvm.org/D54583 -Last-Update: 2018-12-04 - -Index: llvm-toolchain-snapshot_9~svn352610/lib/Target/PowerPC/PPCISelLowering.cpp -=================================================================== ---- llvm-toolchain-snapshot_9~svn352610.orig/lib/Target/PowerPC/PPCISelLowering.cpp -+++ llvm-toolchain-snapshot_9~svn352610/lib/Target/PowerPC/PPCISelLowering.cpp -@@ -384,8 +384,16 @@ PPCTargetLowering::PPCTargetLowering(con - } else { - setOperationAction(ISD::BITCAST, MVT::f32, Expand); - setOperationAction(ISD::BITCAST, MVT::i32, Expand); -- setOperationAction(ISD::BITCAST, MVT::i64, Expand); - setOperationAction(ISD::BITCAST, MVT::f64, Expand); -+ if (Subtarget.hasSPE()) { -+ setOperationAction(ISD::BITCAST, MVT::i64, Custom); -+ } else { -+ setOperationAction(ISD::BITCAST, MVT::i64, Expand); -+ } -+ } -+ -+ if (Subtarget.hasSPE()) { -+ setOperationAction(ISD::EXTRACT_ELEMENT, MVT::i64, Custom); - } - - // We cannot sextinreg(i1). Expand to shifts. -@@ -1365,6 +1373,9 @@ const char *PPCTargetLowering::getTarget - case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; - case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; - case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; -+ case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64"; -+ case PPCISD::EXTRACT_SPE_LO: return "PPCISD::EXTRACT_SPE_LO"; -+ case PPCISD::EXTRACT_SPE_HI: return "PPCISD::EXTRACT_SPE_HI"; - } - return nullptr; - } -@@ -7790,6 +7801,15 @@ SDValue PPCTargetLowering::LowerBITCAST( - SDLoc dl(Op); - SDValue Op0 = Op->getOperand(0); - -+ if (Subtarget.hasSPE()) { -+ if (Op.getValueType() == MVT::f64 && -+ Op0.getOpcode() == ISD::BUILD_PAIR && -+ (Op0.getOperand(1).getValueType() == MVT::i32) && -+ (Op0.getOperand(0).getValueType() == MVT::i32)) -+ return DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Op0.getOperand(0), -+ Op0.getOperand(1)); -+ } -+ - if (!EnableQuadPrecision || - (Op.getValueType() != MVT::f128 ) || - (Op0.getOpcode() != ISD::BUILD_PAIR) || -@@ -7801,6 +7821,26 @@ SDValue PPCTargetLowering::LowerBITCAST( - Op0.getOperand(1)); - } - -+// Lower EXTRACT_ELEMENT (i64 BITCAST f64), 0/1 to evmerge* -+SDValue PPCTargetLowering::LowerEXTRACT_ELEMENT(SDValue Op, SelectionDAG &DAG) const { -+ -+ SDLoc dl(Op); -+ SDValue Op0 = Op->getOperand(0); -+ -+ if (!Subtarget.hasSPE()) -+ return SDValue(); -+ -+ if (!(Op.getValueType() == MVT::i32 && -+ Op0.getOpcode() == ISD::BITCAST)) -+ return SDValue(); -+ -+ assert(Op0.getNumOperands() > 0 && "WTF?"); -+ if (Op->getConstantOperandVal(1) == 0) -+ return DAG.getNode(PPCISD::EXTRACT_SPE_LO, dl, MVT::i32, Op0.getOperand(0)); -+ -+ return DAG.getNode(PPCISD::EXTRACT_SPE_HI, dl, MVT::i32, Op0.getOperand(0)); -+} -+ - // If this is a case we can't handle, return null and let the default - // expansion code take care of it. If we CAN select this case, and if it - // selects to a single instruction, return Op. Otherwise, if we can codegen -@@ -9584,6 +9624,8 @@ SDValue PPCTargetLowering::LowerOperatio - return LowerBSWAP(Op, DAG); - case ISD::ATOMIC_CMP_SWAP: - return LowerATOMIC_CMP_SWAP(Op, DAG); -+ case ISD::EXTRACT_ELEMENT: -+ return LowerEXTRACT_ELEMENT(Op, DAG); - } - } - -Index: llvm-toolchain-snapshot_9~svn352610/lib/Target/PowerPC/PPCISelLowering.h -=================================================================== ---- llvm-toolchain-snapshot_9~svn352610.orig/lib/Target/PowerPC/PPCISelLowering.h -+++ llvm-toolchain-snapshot_9~svn352610/lib/Target/PowerPC/PPCISelLowering.h -@@ -195,6 +195,15 @@ namespace llvm { - /// Direct move of 2 consective GPR to a VSX register. - BUILD_FP128, - -+ /// Merge 2 GPRs to a single SPE register -+ BUILD_SPE64, -+ -+ /// Extract high SPE register component -+ EXTRACT_SPE_HI, -+ -+ /// Extract low SPE register component -+ EXTRACT_SPE_LO, -+ - /// Extract a subvector from signed integer vector and convert to FP. - /// It is primarily used to convert a (widened) illegal integer vector - /// type to a legal floating point vector type. -@@ -1109,6 +1118,7 @@ namespace llvm { - SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; -+ SDValue LowerEXTRACT_ELEMENT(SDValue Op, SelectionDAG &DAG) const; - - SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const; -Index: llvm-toolchain-snapshot_9~svn352610/lib/Target/PowerPC/PPCInstrInfo.td -=================================================================== ---- llvm-toolchain-snapshot_9~svn352610.orig/lib/Target/PowerPC/PPCInstrInfo.td -+++ llvm-toolchain-snapshot_9~svn352610/lib/Target/PowerPC/PPCInstrInfo.td -@@ -230,6 +230,22 @@ def PPCbuild_fp128: SDNode<"PPCISD::BUIL - SDTCisSameAs<1,2>]>, - []>; - -+def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64", -+ SDTypeProfile<1, 2, -+ [SDTCisFP<0>, SDTCisSameSizeAs<1,2>, -+ SDTCisSameAs<1,2>]>, -+ []>; -+ -+def PPCextract_spe_hi : SDNode<"PPCISD::EXTRACT_SPE_HI", -+ SDTypeProfile<1, 1, -+ [SDTCisInt<0>, SDTCisFP<1>]>, -+ []>; -+ -+def PPCextract_spe_lo : SDNode<"PPCISD::EXTRACT_SPE_LO", -+ SDTypeProfile<1, 1, -+ [SDTCisInt<0>, SDTCisFP<1>]>, -+ []>; -+ - // These are target-independent nodes, but have target-specific formats. - def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; -Index: llvm-toolchain-snapshot_9~svn352610/lib/Target/PowerPC/PPCInstrSPE.td -=================================================================== ---- llvm-toolchain-snapshot_9~svn352610.orig/lib/Target/PowerPC/PPCInstrSPE.td -+++ llvm-toolchain-snapshot_9~svn352610/lib/Target/PowerPC/PPCInstrSPE.td -@@ -511,7 +511,7 @@ def EVLWWSPLATX : EVXForm_1<792, (out - - def EVMERGEHI : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), - "evmergehi $RT, $RA, $RB", IIC_VecGeneral, []>; --def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), -+def EVMERGELO : EVXForm_1<557, (outs sperc:$RT), (ins gprc:$RA, gprc:$RB), - "evmergelo $RT, $RA, $RB", IIC_VecGeneral, []>; - def EVMERGEHILO : EVXForm_1<558, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB), - "evmergehilo $RT, $RA, $RB", IIC_VecGeneral, []>; -@@ -886,4 +886,15 @@ def : Pat<(f64 (selectcc i1:$lhs, i1:$rh - (SELECT_SPE (CRANDC $lhs, $rhs), $tval, $fval)>; - def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)), - (SELECT_SPE (CRXOR $lhs, $rhs), $tval, $fval)>; -+ -+ -+def : Pat<(f64 (PPCbuild_spe64 i32:$rB, i32:$rA)), -+ (f64 (COPY_TO_REGCLASS (EVMERGELO $rA, $rB), SPERC))>; -+ -+def : Pat<(i32 (PPCextract_spe_hi f64:$rA)), -+ (i32 (EXTRACT_SUBREG (EVMERGEHI $rA, $rA), sub_32))>; -+ -+def : Pat<(i32 (PPCextract_spe_lo f64:$rA)), -+ (i32 (EXTRACT_SUBREG $rA, sub_32))>; -+ - } -Index: llvm-toolchain-snapshot_9~svn352610/test/CodeGen/PowerPC/spe.ll -=================================================================== ---- llvm-toolchain-snapshot_9~svn352610.orig/test/CodeGen/PowerPC/spe.ll -+++ llvm-toolchain-snapshot_9~svn352610/test/CodeGen/PowerPC/spe.ll -@@ -472,10 +472,8 @@ entry: - ; CHECK-LABEL: test_dselect - ; CHECK: andi. - ; CHECK: bc --; CHECK: evldd --; CHECK: b --; CHECK: evldd --; CHECK: evstdd -+; CHECK: evor -+; CHECK: evmergehi - ; CHECK: blr - } - -@@ -519,7 +517,7 @@ entry: - %1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0) - ret i32 %1 - ; CHECK-LABEL: test_dasmconst --; CHECK: evldd -+; CHECK: evmergelo - ; CHECK: #APP - ; CHECK: efdctsi - ; CHECK: #NO_APP -@@ -541,7 +539,7 @@ entry: - %a4.addr = alloca i32*, align 4 - %a5.addr = alloca i32*, align 4 - %ptr = alloca i32*, align 4 -- %v1 = alloca [8 x i32], align 4 -+ %v1 = alloca [9 x i32], align 4 - %v2 = alloca [7 x i32], align 4 - %v3 = alloca [5 x i32], align 4 - store i32 %a1, i32* %a1.addr, align 4 -@@ -554,7 +552,7 @@ entry: - call void asm sideeffect "","~{s0},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind - %1 = fadd double %0, 3.14159 - %2 = load i32*, i32** %ptr, align 4 -- %3 = bitcast [8 x i32]* %v1 to i8* -+ %3 = bitcast [9 x i32]* %v1 to i8* - call void @llvm.memset.p0i8.i32(i8* align 4 %3, i8 0, i32 24, i1 true) - %4 = load i32*, i32** %a5.addr, align 4 - store i32 0, i32* %4, align 4 diff --git a/debian/patches/series b/debian/patches/series index 112646e1..6894b525 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -109,7 +109,7 @@ hurd/impl-path-hurd.diff # powerpcspe powerpcspe/D49754-powerpcspe-clang.diff powerpcspe/D54409-powerpcspe-register-spilling.diff -powerpcspe/D54584-powerpcspe-double-parameter.diff +powerpcspe/D54583-powerpcspe-double-parameter.diff # kfreebsd kfreebsd/clang_lib_Basic_Targets.diff