llvm-toolchain/debian/patches/revert-D103865-s390x-crash.patch
2021-07-09 09:10:27 +02:00

535 lines
23 KiB
Diff

commit 37a92f3b03bf165245a9d0dc4830dcc6fed7c253
Author: Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Date: Fri Jun 4 19:32:03 2021 -0500
[SystemZ] Generate XC loop for memset 0 of variable length.
Benchmarking has shown that it is worthwhile to implement a variable length
memset of 0 with XC (exclusive or) like gcc does, instead of using a libcall.
This requires the use of the EXecute Relative Long (EXRL) instruction which
can now be done in a framework that can also be used with other target
instructions (not just XC).
Review: Ulrich Weigand
Differential Revision: https://reviews.llvm.org/D103865
Index: llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
===================================================================
--- llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c.orig/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -541,30 +541,6 @@ void SystemZAsmPrinter::emitInstruction(
LowerPATCHPOINT(*MI, Lower);
return;
- case SystemZ::EXRL_Pseudo: {
- unsigned TargetInsOpc = MI->getOperand(0).getImm();
- Register LenMinus1Reg = MI->getOperand(1).getReg();
- Register DestReg = MI->getOperand(2).getReg();
- int64_t DestDisp = MI->getOperand(3).getImm();
- Register SrcReg = MI->getOperand(4).getReg();
- int64_t SrcDisp = MI->getOperand(5).getImm();
-
- MCSymbol *DotSym = nullptr;
- MCInst ET = MCInstBuilder(TargetInsOpc).addReg(DestReg)
- .addImm(DestDisp).addImm(1).addReg(SrcReg).addImm(SrcDisp);
- MCInstSTIPair ET_STI(ET, &MF->getSubtarget());
- EXRLT2SymMap::iterator I = EXRLTargets2Sym.find(ET_STI);
- if (I != EXRLTargets2Sym.end())
- DotSym = I->second;
- else
- EXRLTargets2Sym[ET_STI] = DotSym = OutContext.createTempSymbol();
- const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext);
- EmitToStreamer(
- *OutStreamer,
- MCInstBuilder(SystemZ::EXRL).addReg(LenMinus1Reg).addExpr(Dot));
- return;
- }
-
default:
Lower.lower(MI, LoweredMI);
break;
@@ -722,19 +698,6 @@ void SystemZAsmPrinter::LowerPATCHPOINT(
getSubtargetInfo());
}
-void SystemZAsmPrinter::emitEXRLTargetInstructions() {
- if (EXRLTargets2Sym.empty())
- return;
- // Switch to the .text section.
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
- for (auto &I : EXRLTargets2Sym) {
- OutStreamer->emitLabel(I.second);
- const MCInstSTIPair &MCI_STI = I.first;
- OutStreamer->emitInstruction(MCI_STI.first, *MCI_STI.second);
- }
- EXRLTargets2Sym.clear();
-}
-
// Convert a SystemZ-specific constant pool modifier into the associated
// MCSymbolRefExpr variant kind.
static MCSymbolRefExpr::VariantKind
@@ -790,7 +753,6 @@ bool SystemZAsmPrinter::PrintAsmMemoryOp
}
void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
- emitEXRLTargetInstructions();
emitStackMaps(SM);
}
Index: llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
===================================================================
--- llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c.orig/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -9,11 +9,10 @@
#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZASMPRINTER_H
-#include "SystemZMCInstLower.h"
#include "SystemZTargetMachine.h"
+#include "SystemZMCInstLower.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/MC/MCInstBuilder.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
@@ -27,33 +26,6 @@ class LLVM_LIBRARY_VISIBILITY SystemZAsm
private:
StackMaps SM;
- typedef std::pair<MCInst, const MCSubtargetInfo *> MCInstSTIPair;
- struct CmpMCInst {
- bool operator()(const MCInstSTIPair &MCI_STI_A,
- const MCInstSTIPair &MCI_STI_B) const {
- if (MCI_STI_A.second != MCI_STI_B.second)
- return uintptr_t(MCI_STI_A.second) < uintptr_t(MCI_STI_B.second);
- const MCInst &A = MCI_STI_A.first;
- const MCInst &B = MCI_STI_B.first;
- assert(A.getNumOperands() == B.getNumOperands() &&
- A.getNumOperands() == 5 && A.getOperand(2).getImm() == 1 &&
- B.getOperand(2).getImm() == 1 && "Unexpected EXRL target MCInst");
- if (A.getOpcode() != B.getOpcode())
- return A.getOpcode() < B.getOpcode();
- if (A.getOperand(0).getReg() != B.getOperand(0).getReg())
- return A.getOperand(0).getReg() < B.getOperand(0).getReg();
- if (A.getOperand(1).getImm() != B.getOperand(1).getImm())
- return A.getOperand(1).getImm() < B.getOperand(1).getImm();
- if (A.getOperand(3).getReg() != B.getOperand(3).getReg())
- return A.getOperand(3).getReg() < B.getOperand(3).getReg();
- if (A.getOperand(4).getImm() != B.getOperand(4).getImm())
- return A.getOperand(4).getImm() < B.getOperand(4).getImm();
- return false;
- }
- };
- typedef std::map<MCInstSTIPair, MCSymbol *, CmpMCInst> EXRLT2SymMap;
- EXRLT2SymMap EXRLTargets2Sym;
-
public:
SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
@@ -77,7 +49,6 @@ private:
void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL);
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower);
- void emitEXRLTargetInstructions();
};
} // end namespace llvm
Index: llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c.orig/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -7795,89 +7795,43 @@ MachineBasicBlock *SystemZTargetLowering
uint64_t DestDisp = MI.getOperand(1).getImm();
MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
uint64_t SrcDisp = MI.getOperand(3).getImm();
- MachineOperand &LengthMO = MI.getOperand(4);
- uint64_t ImmLength = LengthMO.isImm() ? LengthMO.getImm() : 0;
- Register LenMinus1Reg =
- LengthMO.isReg() ? LengthMO.getReg() : SystemZ::NoRegister;
+ uint64_t Length = MI.getOperand(4).getImm();
// When generating more than one CLC, all but the last will need to
// branch to the end when a difference is found.
- MachineBasicBlock *EndMBB = (ImmLength > 256 && Opcode == SystemZ::CLC
- ? SystemZ::splitBlockAfter(MI, MBB)
- : nullptr);
+ MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
+ SystemZ::splitBlockAfter(MI, MBB) : nullptr);
// Check for the loop form, in which operand 5 is the trip count.
if (MI.getNumExplicitOperands() > 5) {
- Register StartCountReg = MI.getOperand(5).getReg();
-
- MachineBasicBlock *StartMBB = nullptr;
- MachineBasicBlock *LoopMBB = nullptr;
- MachineBasicBlock *NextMBB = nullptr;
- MachineBasicBlock *DoneMBB = nullptr;
- MachineBasicBlock *AllDoneMBB = nullptr;
-
bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
- Register StartSrcReg = forceReg(MI, SrcBase, TII);
- Register StartDestReg =
- (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
+
+ Register StartCountReg = MI.getOperand(5).getReg();
+ Register StartSrcReg = forceReg(MI, SrcBase, TII);
+ Register StartDestReg = (HaveSingleBase ? StartSrcReg :
+ forceReg(MI, DestBase, TII));
const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
Register ThisSrcReg = MRI.createVirtualRegister(RC);
- Register ThisDestReg =
- (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
+ Register ThisDestReg = (HaveSingleBase ? ThisSrcReg :
+ MRI.createVirtualRegister(RC));
Register NextSrcReg = MRI.createVirtualRegister(RC);
- Register NextDestReg =
- (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
+ Register NextDestReg = (HaveSingleBase ? NextSrcReg :
+ MRI.createVirtualRegister(RC));
+
RC = &SystemZ::GR64BitRegClass;
Register ThisCountReg = MRI.createVirtualRegister(RC);
Register NextCountReg = MRI.createVirtualRegister(RC);
- if (LengthMO.isReg()) {
- AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
- StartMBB = SystemZ::emitBlockAfter(MBB);
- LoopMBB = SystemZ::emitBlockAfter(StartMBB);
- NextMBB = LoopMBB;
- DoneMBB = SystemZ::emitBlockAfter(LoopMBB);
-
- // MBB:
- // # Jump to AllDoneMBB if LenMinus1Reg is -1, or fall thru to StartMBB.
- BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
- .addReg(LenMinus1Reg).addImm(-1);
- BuildMI(MBB, DL, TII->get(SystemZ::BRC))
- .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
- .addMBB(AllDoneMBB);
- MBB->addSuccessor(AllDoneMBB);
- MBB->addSuccessor(StartMBB);
-
- // StartMBB:
- // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
- MBB = StartMBB;
- BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
- .addReg(StartCountReg).addImm(0);
- BuildMI(MBB, DL, TII->get(SystemZ::BRC))
- .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
- .addMBB(DoneMBB);
- MBB->addSuccessor(DoneMBB);
- MBB->addSuccessor(LoopMBB);
- }
- else {
- StartMBB = MBB;
- DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
- LoopMBB = SystemZ::emitBlockAfter(StartMBB);
- NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
-
- // StartMBB:
- // # fall through to LoopMBB
- MBB->addSuccessor(LoopMBB);
-
- DestBase = MachineOperand::CreateReg(NextDestReg, false);
- SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
- ImmLength &= 255;
- if (EndMBB && !ImmLength)
- // If the loop handled the whole CLC range, DoneMBB will be empty with
- // CC live-through into EndMBB, so add it as live-in.
- DoneMBB->addLiveIn(SystemZ::CC);
- }
+ MachineBasicBlock *StartMBB = MBB;
+ MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
+ MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
+ MachineBasicBlock *NextMBB =
+ (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
+
+ // StartMBB:
+ // # fall through to LoopMMB
+ MBB->addSuccessor(LoopMBB);
// LoopMBB:
// %ThisDestReg = phi [ %StartDestReg, StartMBB ],
@@ -7892,6 +7846,7 @@ MachineBasicBlock *SystemZTargetLowering
//
// The prefetch is used only for MVC. The JLH is used only for CLC.
MBB = LoopMBB;
+
BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
.addReg(StartDestReg).addMBB(StartMBB)
.addReg(NextDestReg).addMBB(NextMBB);
@@ -7927,6 +7882,7 @@ MachineBasicBlock *SystemZTargetLowering
//
// The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
MBB = NextMBB;
+
BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
.addReg(ThisDestReg).addImm(256).addReg(0);
if (!HaveSingleBase)
@@ -7942,39 +7898,18 @@ MachineBasicBlock *SystemZTargetLowering
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
+ DestBase = MachineOperand::CreateReg(NextDestReg, false);
+ SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
+ Length &= 255;
+ if (EndMBB && !Length)
+ // If the loop handled the whole CLC range, DoneMBB will be empty with
+ // CC live-through into EndMBB, so add it as live-in.
+ DoneMBB->addLiveIn(SystemZ::CC);
MBB = DoneMBB;
- if (LengthMO.isReg()) {
- // DoneMBB:
- // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
- // # Use EXecute Relative Long for the remainder of the bytes. The target
- // instruction of the EXRL will have a length field of 1 since 0 is an
- // illegal value. The number of bytes processed becomes (%LenMinus1Reg &
- // 0xff) + 1.
- // # Fall through to AllDoneMBB.
- Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
- Register RemDestReg = HaveSingleBase ? RemSrcReg
- : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
- BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
- .addReg(StartDestReg).addMBB(StartMBB)
- .addReg(NextDestReg).addMBB(LoopMBB);
- if (!HaveSingleBase)
- BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
- .addReg(StartSrcReg).addMBB(StartMBB)
- .addReg(NextSrcReg).addMBB(LoopMBB);
- MRI.constrainRegClass(LenMinus1Reg, &SystemZ::ADDR64BitRegClass);
- BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
- .addImm(Opcode)
- .addReg(LenMinus1Reg)
- .addReg(RemDestReg).addImm(DestDisp)
- .addReg(RemSrcReg).addImm(SrcDisp);
- MBB->addSuccessor(AllDoneMBB);
- MBB = AllDoneMBB;
- }
}
-
// Handle any remaining bytes with straight-line code.
- while (ImmLength > 0) {
- uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
+ while (Length > 0) {
+ uint64_t ThisLength = std::min(Length, uint64_t(256));
// The previous iteration might have created out-of-range displacements.
// Apply them using LAY if so.
if (!isUInt<12>(DestDisp)) {
@@ -8004,10 +7939,10 @@ MachineBasicBlock *SystemZTargetLowering
.setMemRefs(MI.memoperands());
DestDisp += ThisLength;
SrcDisp += ThisLength;
- ImmLength -= ThisLength;
+ Length -= ThisLength;
// If there's another CLC to go, branch to the end if a difference
// was found.
- if (EndMBB && ImmLength > 0) {
+ if (EndMBB && Length > 0) {
MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
@@ -8498,7 +8433,6 @@ MachineBasicBlock *SystemZTargetLowering
return emitMemMemWrapper(MI, MBB, SystemZ::OC);
case SystemZ::XCSequence:
case SystemZ::XCLoop:
- case SystemZ::XCLoopVarLen:
return emitMemMemWrapper(MI, MBB, SystemZ::XC);
case SystemZ::CLCSequence:
case SystemZ::CLCLoop:
Index: llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
===================================================================
--- llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c.orig/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -5253,7 +5253,6 @@ multiclass CondUnaryRSYPseudoAndMemFold<
// The Sequence form uses a straight-line sequence of instructions and
// the Loop form uses a loop of length-256 instructions followed by
// another instruction to handle the excess.
-// The LoopVarLen form is for a loop with a non-constant length parameter.
multiclass MemorySS<string mnemonic, bits<8> opcode,
SDPatternOperator sequence, SDPatternOperator loop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
@@ -5266,10 +5265,6 @@ multiclass MemorySS<string mnemonic, bit
imm64:$length, GR64:$count256),
[(loop bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256)]>;
- def LoopVarLen : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- GR64:$length, GR64:$count256),
- [(loop bdaddr12only:$dest, bdaddr12only:$src,
- GR64:$length, GR64:$count256)]>;
}
}
Index: llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
===================================================================
--- llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c.orig/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -2165,12 +2165,8 @@ let Predicates = [FeatureDeflateConversi
// Execute.
let hasSideEffects = 1 in {
- def EX : SideEffectBinaryRX<"ex", 0x44, ADDR64>;
- def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, ADDR64>;
- let hasNoSchedulingInfo = 1 in
- def EXRL_Pseudo : Pseudo<(outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1,
- bdaddr12only:$bdl1, bdaddr12only:$bd2),
- []>;
+ def EX : SideEffectBinaryRX<"ex", 0x44, GR64>;
+ def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, GR64>;
}
//===----------------------------------------------------------------------===//
Index: llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
===================================================================
--- llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c.orig/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -81,12 +81,11 @@ SDValue SystemZSelectionDAGInfo::EmitTar
if (IsVolatile)
return SDValue();
- auto *CByte = dyn_cast<ConstantSDNode>(Byte);
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
if (Bytes == 0)
return SDValue();
- if (CByte) {
+ if (auto *CByte = dyn_cast<ConstantSDNode>(Byte)) {
// Handle cases that can be done using at most two of
// MVI, MVHI, MVHHI and MVGHI. The latter two can only be
// used if ByteVal is all zeros or all ones; in other casees,
@@ -126,6 +125,7 @@ SDValue SystemZSelectionDAGInfo::EmitTar
assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already");
// Handle the special case of a memset of 0, which can use XC.
+ auto *CByte = dyn_cast<ConstantSDNode>(Byte);
if (CByte && CByte->getZExtValue() == 0)
return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
Chain, Dst, Dst, Bytes);
@@ -138,18 +138,6 @@ SDValue SystemZSelectionDAGInfo::EmitTar
return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
Chain, DstPlus1, Dst, Bytes - 1);
}
-
- // Variable length
- if (CByte && CByte->getZExtValue() == 0) {
- // Handle the special case of a variable length memset of 0 with XC.
- SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
- DAG.getZExtOrTrunc(Size, DL, MVT::i64),
- DAG.getConstant(-1, DL, MVT::i64));
- SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1,
- DAG.getConstant(8, DL, MVT::i64));
- return DAG.getNode(SystemZISD::XC_LOOP, DL, MVT::Other, Chain, Dst, Dst,
- LenMinus1, TripC);
- }
return SDValue();
}
Index: llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c/llvm/test/CodeGen/SystemZ/memset-05.ll
===================================================================
--- llvm-toolchain-snapshot_13~++20210709090531+88326bbce38c.orig/llvm/test/CodeGen/SystemZ/memset-05.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; Test memset 0 with variable length
-;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-
-define void @fun0(i8* %Addr, i64 %Len) {
-; CHECK-LABEL: fun0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: aghi %r3, -1
-; CHECK-NEXT: cgibe %r3, -1, 0(%r14)
-; CHECK-NEXT: .LBB0_1:
-; CHECK-NEXT: srlg %r0, %r3, 8
-; CHECK-NEXT: cgije %r0, 0, .LBB0_3
-; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
-; CHECK-NEXT: la %r2, 256(%r2)
-; CHECK-NEXT: brctg %r0, .LBB0_2
-; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: exrl %r3, .Ltmp0
-; CHECK-NEXT: br %r14
- tail call void @llvm.memset.p0i8.i64(i8* %Addr, i8 0, i64 %Len, i1 false)
- ret void
-}
-
-define void @fun1(i8* %Addr, i32 %Len) {
-; CHECK-LABEL: fun1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: llgfr %r1, %r3
-; CHECK-NEXT: aghi %r1, -1
-; CHECK-NEXT: cgibe %r1, -1, 0(%r14)
-; CHECK-NEXT: .LBB1_1:
-; CHECK-NEXT: srlg %r0, %r1, 8
-; CHECK-NEXT: cgije %r0, 0, .LBB1_3
-; CHECK-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
-; CHECK-NEXT: la %r2, 256(%r2)
-; CHECK-NEXT: brctg %r0, .LBB1_2
-; CHECK-NEXT: .LBB1_3:
-; CHECK-NEXT: exrl %r1, .Ltmp0
-; CHECK-NEXT: br %r14
- tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
- ret void
-}
-
-; Test that identical target instructions get reused.
-define void @fun2(i8* %Addr, i32 %Len) {
-; CHECK-LABEL: fun2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: llgfr %r1, %r3
-; CHECK-NEXT: aghi %r1, -1
-; CHECK-NEXT: srlg %r0, %r1, 8
-; CHECK-NEXT: cgije %r1, -1, .LBB2_5
-; CHECK-NEXT: # %bb.1:
-; CHECK-NEXT: lgr %r3, %r2
-; CHECK-NEXT: cgije %r0, 0, .LBB2_4
-; CHECK-NEXT: # %bb.2:
-; CHECK-NEXT: lgr %r3, %r2
-; CHECK-NEXT: lgr %r4, %r0
-; CHECK-NEXT: .LBB2_3: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: xc 0(256,%r3), 0(%r3)
-; CHECK-NEXT: la %r3, 256(%r3)
-; CHECK-NEXT: brctg %r4, .LBB2_3
-; CHECK-NEXT: .LBB2_4:
-; CHECK-NEXT: exrl %r1, .Ltmp1
-; CHECK-NEXT: .LBB2_5:
-; CHECK-NEXT: cgije %r1, -1, .LBB2_10
-; CHECK-NEXT: # %bb.6:
-; CHECK-NEXT: lgr %r3, %r2
-; CHECK-NEXT: cgije %r0, 0, .LBB2_9
-; CHECK-NEXT: # %bb.7:
-; CHECK-NEXT: lgr %r3, %r2
-; CHECK-NEXT: lgr %r4, %r0
-; CHECK-NEXT: .LBB2_8: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: xc 0(256,%r3), 0(%r3)
-; CHECK-NEXT: la %r3, 256(%r3)
-; CHECK-NEXT: brctg %r4, .LBB2_8
-; CHECK-NEXT: .LBB2_9:
-; CHECK-NEXT: exrl %r1, .Ltmp1
-; CHECK-NEXT: .LBB2_10:
-; CHECK-NEXT: cgibe %r1, -1, 0(%r14)
-; CHECK-NEXT: .LBB2_11:
-; CHECK-NEXT: cgije %r0, 0, .LBB2_13
-; CHECK-NEXT: .LBB2_12: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: xc 0(256,%r2), 0(%r2)
-; CHECK-NEXT: la %r2, 256(%r2)
-; CHECK-NEXT: brctg %r0, .LBB2_12
-; CHECK-NEXT: .LBB2_13:
-; CHECK-NEXT: exrl %r1, .Ltmp0
-; CHECK-NEXT: br %r14
- tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
- tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
- tail call void @llvm.memset.p0i8.i32(i8* %Addr, i8 0, i32 %Len, i1 false)
- ret void
-}
-
-; CHECK: .Ltmp0:
-; CHECK-NEXT: xc 0(1,%r2), 0(%r2)
-; CHECK-NEXT: .Ltmp1:
-; CHECK-NEXT: xc 0(1,%r3), 0(%r3)
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
-declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)