From bb9b8d9801800fad17c36d377a5c22ff34057073 Mon Sep 17 00:00:00 2001 From: chenli Date: Thu, 7 Dec 2023 10:17:29 +0800 Subject: [PATCH] [LoongArch] Release 15.0.7-1.0.4 --- ...port-compiler-rt-and-fix-some-issues.patch | 20199 ++++++++++++++++ llvm.spec | 7 +- 2 files changed, 20205 insertions(+), 1 deletion(-) create mode 100644 0003-LoongArch-support-compiler-rt-and-fix-some-issues.patch diff --git a/0003-LoongArch-support-compiler-rt-and-fix-some-issues.patch b/0003-LoongArch-support-compiler-rt-and-fix-some-issues.patch new file mode 100644 index 0000000..5da32f6 --- /dev/null +++ b/0003-LoongArch-support-compiler-rt-and-fix-some-issues.patch @@ -0,0 +1,20199 @@ +diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h +index 6d4f6222a..226601b20 100644 +--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h ++++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h +@@ -5099,7 +5099,7 @@ template <> + struct FloatData + { + #if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) || \ +- defined(__wasm__) || defined(__riscv) ++ defined(__wasm__) || defined(__riscv) || defined(__loongarch__) + static const size_t mangled_size = 32; + #elif defined(__arm__) || defined(__mips__) || defined(__hexagon__) + static const size_t mangled_size = 16; +diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc +index 5e008069d..6735b74c5 100644 +--- a/llvm/lib/Support/Unix/Memory.inc ++++ b/llvm/lib/Support/Unix/Memory.inc +@@ -241,8 +241,9 @@ void Memory::InvalidateInstructionCache(const void *Addr, + for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) + asm volatile("icbi 0, %0" : : "r"(Line)); + asm volatile("isync"); +-# elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ +- defined(__GNUC__) ++#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \ ++ defined(__mips__)) && \ ++ defined(__GNUC__) + // FIXME: Can we safely always call this for __GNUC__ everywhere? + const char *Start = static_cast(Addr); + const char *End = Start + Len; +diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +index 2d35dfd0c..61d455518 100644 +--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp ++++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +@@ -239,6 +239,7 @@ public: + MCAsmParserExtension::Initialize(parser); + + parser.addAliasForDirective(".asciiz", ".asciz"); ++ parser.addAliasForDirective(".half", ".2byte"); + parser.addAliasForDirective(".hword", ".2byte"); + parser.addAliasForDirective(".word", ".4byte"); + parser.addAliasForDirective(".dword", ".8byte"); +diff --git a/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td +index e85fce2fd..aa297c837 100644 +--- a/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArch32InstrInfo.td +@@ -363,6 +363,10 @@ def : LoongArchPat<(brcond RC:$cond, bb:$dst), + + defm : BrcondPats, GPR_64; + ++defm atomic_cmp_swap_8 : ternary_atomic_op_failure_ord; ++defm atomic_cmp_swap_16 : ternary_atomic_op_failure_ord; ++defm atomic_cmp_swap_32 : ternary_atomic_op_failure_ord; ++ + let usesCustomInserter = 1 in { + def ATOMIC_LOAD_ADD_I8 : Atomic2Ops; + def ATOMIC_LOAD_ADD_I16 : Atomic2Ops; +@@ -387,9 +391,9 @@ let usesCustomInserter = 1 in { + def ATOMIC_SWAP_I16 : Atomic2Ops; + def ATOMIC_SWAP_I32 : Atomic2Ops; + +- def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; +- def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; +- def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; ++ defm I8_ : AtomicCmpSwapInstrs<"8", GPR32>; ++ defm I16_ : AtomicCmpSwapInstrs<"16", GPR32>; ++ defm I32_ : AtomicCmpSwapInstrs<"32", GPR32>; + + def ATOMIC_LOAD_MAX_I8 : Atomic2Ops; + def ATOMIC_LOAD_MAX_I16 : Atomic2Ops; +@@ -465,8 +469,29 @@ def : LoongArchPat<(atomic_store_32 addrimm14lsl2:$a, GPR32:$v), + def : LoongArchPat<(atomic_store_32 addr:$a, GPR32:$v), + (ST_W32 GPR32:$v, addr:$a)>; + +-def : LoongArchPat<(LoongArchDBAR (i32 immz)), +- (DBAR 0)>; ++// DBAR hint encoding for LA664 and later micro-architectures, paraphrased from ++// the Linux patch revealing it [1]: ++// ++// - Bit 4: kind of constraint (0: completion, 1: ordering) ++// - Bit 3: barrier for previous read (0: true, 1: false) ++// - Bit 2: barrier for previous write (0: true, 1: false) ++// - Bit 1: barrier for succeeding read (0: true, 1: false) ++// - Bit 0: barrier for succeeding write (0: true, 1: false) ++// ++// Hint 0x700: barrier for "read after read" from the same address, which is ++// e.g. needed by LL-SC loops on older models. (DBAR 0x700 behaves the same as ++// nop if such reordering is disabled on supporting newer models.) ++// ++// [1]: https://lore.kernel.org/loongarch/20230516124536.535343-1-chenhuacai@loongson.cn/ ++// ++// Implementations without support for the finer-granularity hints simply treat ++// all as the full barrier (DBAR 0), so we can unconditionally start emiting the ++// more precise hints right away. ++ ++def : Pat<(atomic_fence 4, timm), (DBAR 0b10100)>; // acquire ++def : Pat<(atomic_fence 5, timm), (DBAR 0b10010)>; // release ++def : Pat<(atomic_fence 6, timm), (DBAR 0b10000)>; // acqrel ++def : Pat<(atomic_fence 7, timm), (DBAR 0b10000)>; // seqcst + + def : LoongArchPat<(i32 (extloadi1 addr:$src)), (LD_BU32 addr:$src)>; + def : LoongArchPat<(i32 (extloadi8 addr:$src)), (LD_BU32 addr:$src)>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp +index 0f33e1db6..2e13e5b83 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudo.cpp +@@ -93,18 +93,6 @@ namespace { + char LoongArchExpandPseudo::ID = 0; + } + +-static bool hasDbar(MachineBasicBlock *MBB) { +- +- for (MachineBasicBlock::iterator MBBb = MBB->begin(), MBBe = MBB->end(); +- MBBb != MBBe; ++MBBb) { +- if (MBBb->getOpcode() == LoongArch::DBAR) +- return true; +- if (MBBb->mayLoad() || MBBb->mayStore()) +- break; +- } +- return false; +-} +- + bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword( + MachineBasicBlock &BB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &NMBBI) { +@@ -129,8 +117,8 @@ bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword( + unsigned Mask2 = I->getOperand(4).getReg(); + unsigned ShiftNewVal = I->getOperand(5).getReg(); + unsigned ShiftAmnt = I->getOperand(6).getReg(); +- unsigned Scratch = I->getOperand(7).getReg(); +- unsigned Scratch2 = I->getOperand(8).getReg(); ++ unsigned Scratch = I->getOperand(8).getReg(); ++ unsigned Scratch2 = I->getOperand(9).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); +@@ -201,10 +189,21 @@ bool LoongArchExpandPseudo::expandAtomicCmpSwapSubword( + + BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest); + +- if (!hasDbar(sinkMBB)) { +- MachineBasicBlock::iterator Pos = sinkMBB->begin(); +- BuildMI(*sinkMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ AtomicOrdering Ordering = ++ static_cast(I->getOperand(7).getImm()); ++ int hint; ++ switch (Ordering) { ++ case AtomicOrdering::Acquire: ++ case AtomicOrdering::AcquireRelease: ++ case AtomicOrdering::SequentiallyConsistent: ++ // acquire ++ hint = 0b10100; ++ break; ++ default: ++ hint = 0x700; + } ++ MachineBasicBlock::iterator Pos = sinkMBB->begin(); ++ BuildMI(*sinkMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(hint); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); +@@ -250,7 +249,7 @@ bool LoongArchExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + unsigned Ptr = I->getOperand(1).getReg(); + unsigned OldVal = I->getOperand(2).getReg(); + unsigned NewVal = I->getOperand(3).getReg(); +- unsigned Scratch = I->getOperand(4).getReg(); ++ unsigned Scratch = I->getOperand(5).getReg(); + + // insert new blocks after the current block + const BasicBlock *LLVM_BB = BB.getBasicBlock(); +@@ -295,10 +294,21 @@ bool LoongArchExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); + +- if (!hasDbar(exitMBB)) { +- MachineBasicBlock::iterator Pos = exitMBB->begin(); +- BuildMI(*exitMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); ++ AtomicOrdering Ordering = ++ static_cast(I->getOperand(4).getImm()); ++ int hint; ++ switch (Ordering) { ++ case AtomicOrdering::Acquire: ++ case AtomicOrdering::AcquireRelease: ++ case AtomicOrdering::SequentiallyConsistent: ++ // TODO: acquire ++ hint = 0; ++ break; ++ default: ++ hint = 0x700; + } ++ MachineBasicBlock::iterator Pos = exitMBB->begin(); ++ BuildMI(*exitMBB, Pos, DL, TII->get(LoongArch::DBAR)).addImm(hint); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); +@@ -1938,7 +1948,8 @@ bool LoongArchExpandPseudo::expandAtomicBinOp(MachineBasicBlock &BB, + + BB.addSuccessor(loopMBB, BranchProbability::getOne()); + loopMBB->addSuccessor(exitMBB); +- loopMBB->addSuccessor(loopMBB); ++ if (!Opcode && IsNand) ++ loopMBB->addSuccessor(loopMBB); + loopMBB->normalizeSuccProbs(); + + assert((OldVal != Ptr) && "Clobbered the wrong ptr reg!"); +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +index 4c5f3ffd8..4dcac7a09 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +@@ -157,7 +157,6 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { + case LoongArchISD::CMovFP_T: return "LoongArchISD::CMovFP_T"; + case LoongArchISD::CMovFP_F: return "LoongArchISD::CMovFP_F"; + case LoongArchISD::TruncIntFP: return "LoongArchISD::TruncIntFP"; +- case LoongArchISD::DBAR: return "LoongArchISD::DBAR"; + case LoongArchISD::BSTRPICK: return "LoongArchISD::BSTRPICK"; + case LoongArchISD::BSTRINS: return "LoongArchISD::BSTRINS"; + case LoongArchISD::VALL_ZERO: +@@ -3619,13 +3618,28 @@ LoongArchTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + case LoongArch::ATOMIC_LOAD_UMIN_I64: + return emitAtomicBinary(MI, BB); + +- case LoongArch::ATOMIC_CMP_SWAP_I8: ++ case LoongArch::I8_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I8_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I8_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I8_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I8_ATOMIC_CMP_SWAP_SEQ_CST: + return emitAtomicCmpSwapPartword(MI, BB, 1); +- case LoongArch::ATOMIC_CMP_SWAP_I16: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_SEQ_CST: + return emitAtomicCmpSwapPartword(MI, BB, 2); +- case LoongArch::ATOMIC_CMP_SWAP_I32: +- return emitAtomicCmpSwap(MI, BB); +- case LoongArch::ATOMIC_CMP_SWAP_I64: ++ case LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_SEQ_CST: + return emitAtomicCmpSwap(MI, BB); + + case LoongArch::PseudoSELECT_I: +@@ -4024,11 +4038,6 @@ LoongArchTargetLowering::emitAtomicBinary(MachineInstr &MI, + .addReg(Scratch, RegState::Define | RegState::EarlyClobber | + RegState::Implicit | RegState::Dead); + +- if(MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I32 +- || MI.getOpcode() == LoongArch::ATOMIC_LOAD_NAND_I64){ +- BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); +- } +- + MI.eraseFromParent(); + + return BB; +@@ -4220,7 +4229,6 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicBinaryPartword( + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among registers chosen for the instruction. + +- BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(BB, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(AlignedAddr) +@@ -4252,11 +4260,26 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicBinaryPartword( + MachineBasicBlock * + LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + MachineBasicBlock *BB) const { +- assert((MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 || +- MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I64) && ++ unsigned Op = MI.getOpcode(); ++ assert((Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST || ++ Op == LoongArch::I64_ATOMIC_CMP_SWAP_ACQUIRE || ++ Op == LoongArch::I64_ATOMIC_CMP_SWAP_ACQ_REL || ++ Op == LoongArch::I64_ATOMIC_CMP_SWAP_MONOTONIC || ++ Op == LoongArch::I64_ATOMIC_CMP_SWAP_RELEASE || ++ Op == LoongArch::I64_ATOMIC_CMP_SWAP_SEQ_CST) && + "Unsupported atomic psseudo for EmitAtomicCmpSwap."); + +- const unsigned Size = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 ? 4 : 8; ++ const unsigned Size = (Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE || ++ Op == LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST) ++ ? 4 ++ : 8; + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); +@@ -4264,9 +4287,8 @@ LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + +- unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I32 +- ? LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA +- : LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA; ++ unsigned AtomicOp = Size == 4 ? LoongArch::ATOMIC_CMP_SWAP_I32_POSTRA ++ : LoongArch::ATOMIC_CMP_SWAP_I64_POSTRA; + unsigned Dest = MI.getOperand(0).getReg(); + unsigned Ptr = MI.getOperand(1).getReg(); + unsigned OldVal = MI.getOperand(2).getReg(); +@@ -4288,21 +4310,43 @@ LoongArchTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), OldValCopy).addReg(OldVal); + BuildMI(*BB, II, DL, TII->get(LoongArch::COPY), NewValCopy).addReg(NewVal); + ++ AtomicOrdering Ordering; ++ switch (Op) { ++ case LoongArch::I32_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_ACQUIRE: ++ Ordering = AtomicOrdering::Acquire; ++ break; ++ case LoongArch::I32_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_ACQ_REL: ++ Ordering = AtomicOrdering::AcquireRelease; ++ break; ++ case LoongArch::I32_ATOMIC_CMP_SWAP_SEQ_CST: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_SEQ_CST: ++ Ordering = AtomicOrdering::SequentiallyConsistent; ++ break; ++ case LoongArch::I32_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_RELEASE: ++ Ordering = AtomicOrdering::Release; ++ break; ++ case LoongArch::I32_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I64_ATOMIC_CMP_SWAP_MONOTONIC: ++ Ordering = AtomicOrdering::Monotonic; ++ break; ++ } ++ + // The purposes of the flags on the scratch registers is explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among registers chosen for the instruction. + +- BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(*BB, II, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(PtrCopy, RegState::Kill) + .addReg(OldValCopy, RegState::Kill) + .addReg(NewValCopy, RegState::Kill) ++ .addImm(static_cast(Ordering)) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit); + +- BuildMI(*BB, II, DL, TII->get(LoongArch::DBAR)).addImm(DBAR_HINT); +- + MI.eraseFromParent(); // The instruction is gone now. + + return BB; +@@ -4312,6 +4356,18 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( + MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { + assert((Size == 1 || Size == 2) && + "Unsupported size for EmitAtomicCmpSwapPartial."); ++ unsigned Op = MI.getOpcode(); ++ assert((Op == LoongArch::I8_ATOMIC_CMP_SWAP_ACQUIRE || ++ Op == LoongArch::I8_ATOMIC_CMP_SWAP_ACQ_REL || ++ Op == LoongArch::I8_ATOMIC_CMP_SWAP_MONOTONIC || ++ Op == LoongArch::I8_ATOMIC_CMP_SWAP_RELEASE || ++ Op == LoongArch::I8_ATOMIC_CMP_SWAP_SEQ_CST || ++ Op == LoongArch::I16_ATOMIC_CMP_SWAP_ACQUIRE || ++ Op == LoongArch::I16_ATOMIC_CMP_SWAP_ACQ_REL || ++ Op == LoongArch::I16_ATOMIC_CMP_SWAP_MONOTONIC || ++ Op == LoongArch::I16_ATOMIC_CMP_SWAP_RELEASE || ++ Op == LoongArch::I16_ATOMIC_CMP_SWAP_SEQ_CST) && ++ "Unsupported atomic psseudo for EmitAtomicCmpSwapPartword."); + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); +@@ -4340,9 +4396,8 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( + unsigned Mask3 = RegInfo.createVirtualRegister(RC); + unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC); + unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC); +- unsigned AtomicOp = MI.getOpcode() == LoongArch::ATOMIC_CMP_SWAP_I8 +- ? LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA +- : LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA; ++ unsigned AtomicOp = Size == 1 ? LoongArch::ATOMIC_CMP_SWAP_I8_POSTRA ++ : LoongArch::ATOMIC_CMP_SWAP_I16_POSTRA; + + // The scratch registers here with the EarlyClobber | Define | Dead | Implicit + // flags are used to coerce the register allocator and the machine verifier to +@@ -4427,11 +4482,33 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( + BuildMI(BB, DL, TII->get(LoongArch::SLL_W), ShiftedNewVal) + .addReg(MaskedNewVal).addReg(ShiftAmt); + ++ AtomicOrdering Ordering; ++ switch (Op) { ++ case LoongArch::I8_ATOMIC_CMP_SWAP_ACQUIRE: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_ACQUIRE: ++ Ordering = AtomicOrdering::Acquire; ++ break; ++ case LoongArch::I8_ATOMIC_CMP_SWAP_ACQ_REL: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_ACQ_REL: ++ Ordering = AtomicOrdering::AcquireRelease; ++ break; ++ case LoongArch::I8_ATOMIC_CMP_SWAP_SEQ_CST: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_SEQ_CST: ++ Ordering = AtomicOrdering::SequentiallyConsistent; ++ break; ++ case LoongArch::I8_ATOMIC_CMP_SWAP_RELEASE: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_RELEASE: ++ Ordering = AtomicOrdering::Release; ++ break; ++ case LoongArch::I8_ATOMIC_CMP_SWAP_MONOTONIC: ++ case LoongArch::I16_ATOMIC_CMP_SWAP_MONOTONIC: ++ Ordering = AtomicOrdering::Monotonic; ++ break; ++ } + // The purposes of the flags on the scratch registers are explained in + // emitAtomicBinary. In summary, we need a scratch register which is going to + // be undef, that is unique among the register chosen for the instruction. + +- BuildMI(BB, DL, TII->get(LoongArch::DBAR)).addImm(0); + BuildMI(BB, DL, TII->get(AtomicOp)) + .addReg(Dest, RegState::Define | RegState::EarlyClobber) + .addReg(AlignedAddr) +@@ -4440,6 +4517,7 @@ MachineBasicBlock *LoongArchTargetLowering::emitAtomicCmpSwapPartword( + .addReg(Mask2) + .addReg(ShiftedNewVal) + .addReg(ShiftAmt) ++ .addImm(static_cast(Ordering)) + .addReg(Scratch, RegState::EarlyClobber | RegState::Define | + RegState::Dead | RegState::Implicit) + .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | +@@ -4735,13 +4813,9 @@ SDValue LoongArchTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) + } + + SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op, +- SelectionDAG &DAG) const { +- // FIXME: Need pseudo-fence for 'singlethread' fences +- // FIXME: Set SType for weaker fences where supported/appropriate. +- unsigned SType = 0; +- SDLoc DL(Op); +- return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op.getOperand(0), +- DAG.getConstant(SType, DL, MVT::i32)); ++ SelectionDAG &DAG) const { ++ // TODO: handle SyncScope::SingleThread. ++ return Op; + } + + SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, +diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +index 64e06b53f..ea23b6350 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h ++++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +@@ -93,8 +93,6 @@ class TargetRegisterClass; + // Software Exception Return. + EH_RETURN, + +- DBAR, +- + BSTRPICK, + BSTRINS, + +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +index 53191a94d..272e1e25e 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +@@ -17,8 +17,6 @@ + #ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H + #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHINSTRINFO_H + +-#define DBAR_HINT 0x700 +- + #include "MCTargetDesc/LoongArchMCTargetDesc.h" + #include "LoongArch.h" + #include "LoongArchRegisterInfo.h" +diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +index 2d505ee25..96eb554c9 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +@@ -17,13 +17,13 @@ def SDT_Bstrins : SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>, SDTCisSameAs<2, 3>, + SDTCisSameAs<0, 4>]>; + ++def SDT_REVBD : SDTypeProfile<1, 1, [SDTCisInt<0>]>; ++def LoongArchREVBD : SDNode<"LoongArchISD::REVBD", SDT_REVBD>; ++ + def LoongArchBstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_Bstrpick>; + + def LoongArchBstrins : SDNode<"LoongArchISD::BSTRINS", SDT_Bstrins>; + +-def SDT_DBAR : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +-def LoongArchDBAR : SDNode<"LoongArchISD::DBAR", SDT_DBAR, [SDNPHasChain,SDNPSideEffect]>; +- + def SDT_LoongArchEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>; + + def LoongArchehret : SDNode<"LoongArchISD::EH_RETURN", SDT_LoongArchEHRET, +@@ -1032,15 +1032,73 @@ class AtomicCmpSwap : + LoongArchPseudo<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap), + [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>; + ++// These atomic cmpxchg PatFrags only care about the failure ordering. ++// In llvm <= 13, the PatFrags defined by multiclass `ternary_atomic_op_ord` ++// in TargetSelectionDAG.td only care about the success ordering while llvm > 13 ++// care about the `merged` ordering which is the stronger one of success and ++// failure. See https://reviews.llvm.org/D106729. But for LoongArch LL-SC we ++// only need to care about the failure ordering as explained in ++// https://github.com/llvm/llvm-project/pull/67391. So we defined these ++// PatFrags. ++multiclass ternary_atomic_op_failure_ord { ++ def NAME#_failure_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Monotonic; ++ }]>; ++ def NAME#_failure_acquire : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Acquire; ++ }]>; ++ def NAME#_failure_release : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::Release; ++ }]>; ++ def NAME#_failure_acq_rel : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::AcquireRelease; ++ }]>; ++ def NAME#_failure_seq_cst : PatFrag<(ops node:$ptr, node:$cmp, node:$val), ++ (!cast(NAME) node:$ptr, node:$cmp, node:$val), [{ ++ AtomicOrdering Ordering = cast(N)->getFailureOrdering(); ++ return Ordering == AtomicOrdering::SequentiallyConsistent; ++ }]>; ++} ++ ++defm atomic_cmp_swap_64 : ternary_atomic_op_failure_ord; ++ ++multiclass AtomicCmpSwapInstrs { ++ def ATOMIC_CMP_SWAP_MONOTONIC : ++ AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_monotonic"), ++ RC>; ++ def ATOMIC_CMP_SWAP_ACQUIRE : ++ AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_acquire"), ++ RC>; ++ def ATOMIC_CMP_SWAP_RELEASE : ++ AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_release"), ++ RC>; ++ def ATOMIC_CMP_SWAP_ACQ_REL : ++ AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_acq_rel"), ++ RC>; ++ def ATOMIC_CMP_SWAP_SEQ_CST : ++ AtomicCmpSwap("atomic_cmp_swap_"#Bits#"_failure_seq_cst"), ++ RC>; ++} ++ + class AtomicCmpSwapPostRA : +- LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$cmp, RC:$swap), []> { ++ LoongArchPseudo<(outs RC:$dst), ++ (ins PtrRC:$ptr, RC:$cmp, RC:$swap, i32imm:$ordering), []> { + let mayLoad = 1; + let mayStore = 1; + } + + class AtomicCmpSwapSubwordPostRA : +- LoongArchPseudo<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, +- RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []> { ++ LoongArchPseudo<(outs RC:$dst), ++ (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal, RC:$mask2, ++ RC:$ShiftNewVal, RC:$ShiftAmt, i32imm:$ordering), []> { + let mayLoad = 1; + let mayStore = 1; + } +@@ -1062,7 +1120,7 @@ def CTZ_D : Int_Reg2<"ctz.d", GPR64Opnd, cttz>, R2I<0b01011>; + + def REVB_4H : Int_Reg2<"revb.4h", GPR64Opnd>, R2I<0b01101>; //[] + def REVB_2W : Int_Reg2<"revb.2w", GPR64Opnd>, R2I<0b01110>; +-def REVB_D : Int_Reg2<"revb.d", GPR64Opnd>, R2I<0b01111>; ++def REVB_D : Int_Reg2<"revb.d", GPR64Opnd, LoongArchREVBD>, R2I<0b01111>; + def REVH_2W : Int_Reg2<"revh.2w", GPR64Opnd>, R2I<0b10000>; + def REVH_D : Int_Reg2<"revh.d", GPR64Opnd>, R2I<0b10001>; //[] + +@@ -1450,7 +1508,8 @@ let usesCustomInserter = 1 in { + def ATOMIC_LOAD_XOR_I64 : Atomic2Ops; + def ATOMIC_LOAD_NAND_I64 : Atomic2Ops; + def ATOMIC_SWAP_I64 : Atomic2Ops; +- def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; ++ ++ defm I64_ : AtomicCmpSwapInstrs<"64", GPR64>; + + def ATOMIC_LOAD_MAX_I64 : Atomic2Ops; + def ATOMIC_LOAD_MIN_I64 : Atomic2Ops; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +index 2677a79fa..7ba77a88a 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +@@ -4734,213 +4734,8 @@ def : LASXPat<(srl + + + +-def : LASXPat<(sra +- (v32i8 (add +- (v32i8 (add (v32i8 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v32i8 (add LASX256B:$a, LASX256B:$b)) +- )), +- (v32i8 (srl +- (v32i8 ( add (v32i8( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v32i8 (add LASX256B:$a, LASX256B:$b)) +- )), +- (v32i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7)) +- ) +- ) +- ) +- ) +- ), +- (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- ))), +- (XVAVGR_B (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; +- +- +-def : LASXPat<(sra +- (v16i16 (add +- (v16i16 (add (v16i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i16 (add LASX256H:$a, LASX256H:$b)) +- )), +- (v16i16 (srl +- (v16i16 (add (v16i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i16 (add LASX256H:$a, LASX256H:$b)) +- )), +- (v16i16 (build_vector +- (i32 15),(i32 15),(i32 15),(i32 15), +- (i32 15),(i32 15),(i32 15),(i32 15), +- (i32 15),(i32 15),(i32 15),(i32 15), +- (i32 15),(i32 15),(i32 15),(i32 15)) +- ) +- ) +- ) +- ) +- ), +- (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- ))), +- (XVAVGR_H (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; +- +- +-def : LASXPat<(sra +- (v8i32 (add +- (v8i32 (add (v8i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i32 (add LASX256W:$a, LASX256W:$b)) +- )), +- (v8i32 (srl +- (v8i32 (add (v8i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i32 (add LASX256W:$a, LASX256W:$b)) +- )), +- (v8i32 (build_vector +- (i32 31),(i32 31),(i32 31),(i32 31), +- (i32 31),(i32 31),(i32 31),(i32 31) +- ) +- ) +- ) +- ) +- ) +- ), +- (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)))), +- (XVAVGR_W (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; +- +-def : LASXPat<(sra +- (v4i64 (add +- (v4i64 (add (v4i64 ( +- build_vector (i64 1),(i64 1),(i64 1),(i64 1) +- )), +- (v4i64 (add LASX256D:$a, LASX256D:$b)) +- )), +- (v4i64 (srl +- (v4i64 (add (v4i64 ( +- build_vector (i64 1),(i64 1),(i64 1),(i64 1) +- )), +- (v4i64 (add LASX256D:$a, LASX256D:$b)) +- )), +- (v4i64 (build_vector +- (i64 63),(i64 63),(i64 63),(i64 63))) +- ) +- ) +- ) +- ), +- (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)))), +- (XVAVGR_D (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; +- + + +-def : LASXPat<(srl +- (v32i8 (add (v32i8 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v32i8 (add LASX256B:$a, LASX256B:$b)) +- )), +- (v32i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (XVAVGR_BU (v32i8 LASX256B:$a), (v32i8 LASX256B:$b))>; +- +-def : LASXPat<(srl +- (v16i16 (add (v16i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i16 (add LASX256H:$a, LASX256H:$b)) +- )), +- (v16i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (XVAVGR_HU (v16i16 LASX256H:$a), (v16i16 LASX256H:$b))>; +- +-def : LASXPat<(srl +- (v8i32 (add (v8i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i32 (add LASX256W:$a, LASX256W:$b)) +- )), +- (v8i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (XVAVGR_WU (v8i32 LASX256W:$a), (v8i32 LASX256W:$b))>; +- +-def : LASXPat<(srl +- (v4i64 (add (v4i64 ( +- build_vector (i64 1),(i64 1),(i64 1),(i64 1) +- )), +- (v4i64 (add LASX256D:$a, LASX256D:$b)) +- )), +- (v4i64 (build_vector (i64 1),(i64 1),(i64 1),(i64 1)) +- ) +- ), +- (XVAVGR_DU (v4i64 LASX256D:$a), (v4i64 LASX256D:$b))>; +- + + def : LASXPat<(mulhs LASX256D:$a, LASX256D:$b), + (XVMUH_D LASX256D:$a, LASX256D:$b)>; +@@ -5651,6 +5446,13 @@ def : LASXPat<(and v4i64:$xj, (xor (shl xvsplat_imm_eq_1, v4i64:$xk), + (XVBITCLR_D v4i64:$xj, v4i64:$xk)>; + + ++def : LASXPat<(insert_subvector (v32i8 LASX256B:$dst), ++ (v16i8 LSX128B:$src), (i64 0)), ++ (XVPERMI_Q (v32i8 LASX256B:$dst), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), ++ (i32 48))>; ++ + def : LASXPat<(insert_subvector (v16i16 LASX256H:$dst), + (v8i16 LSX128H:$src), (i64 0)), + (XVPERMI_QH (v16i16 LASX256H:$dst), +@@ -5671,3 +5473,27 @@ def : LASXPat<(insert_subvector (v4i64 LASX256D:$dst), + (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), + LSX128D:$src, sub_128)), + (i32 48))>; ++ ++def : LASXPat<(insert_subvector (v4i64 LASX256D:$dst), ++ (v2i64 LSX128D:$src), (i64 2)), ++ (XVPERMI_QD (v4i64 LASX256D:$dst), ++ (v4i64 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), ++ LSX128D:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector (v8i32 LASX256W:$dst), ++ (v4i32 LSX128W:$src), (i64 4)), ++ (XVPERMI_QW (v8i32 LASX256W:$dst), ++ (v8i32 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), ++ LSX128W:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector (v16i16 LASX256H:$dst), ++ (v8i16 LSX128H:$src), (i64 8)), ++ (XVPERMI_QH (v16i16 LASX256H:$dst), ++ (v16i16 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), ++ LSX128H:$src, sub_128)), (i32 32))>; ++ ++def : LASXPat<(insert_subvector (v32i8 LASX256B:$dst), ++ (v16i8 LSX128B:$src), (i64 16)), ++ (XVPERMI_Q (v32i8 LASX256B:$dst), ++ (v32i8 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), ++ LSX128B:$src, sub_128)), (i32 32))>; +diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +index 3d0ea3901..37a0d9066 100644 +--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td ++++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +@@ -5090,165 +5090,6 @@ def : LSXPat<(srl + + + +-def : LSXPat<(sra +- (v16i8 (add +- (v16i8 (add (v16i8 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i8 (add LSX128B:$a, LSX128B:$b)) +- )), +- (v16i8 (srl +- (v16i8 ( add (v16i8( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i8 (add LSX128B:$a, LSX128B:$b)) +- )), +- (v16i8 (build_vector (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7), +- (i32 7),(i32 7),(i32 7),(i32 7)) +- ) +- ) +- ) +- ) +- ), +- (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- ))), +- (VAVGR_B (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; +- +-def : LSXPat<(sra +- (v8i16 (add +- (v8i16 (add (v8i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i16 (add LSX128H:$a, LSX128H:$b)) +- )), +- (v8i16 (srl +- (v8i16 (add (v8i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i16 (add LSX128H:$a, LSX128H:$b)) +- )), +- (v8i16 (build_vector (i32 15),(i32 15),(i32 15),(i32 15), +- (i32 15),(i32 15),(i32 15),(i32 15)) +- ) +- ) +- ) +- ) +- ), +- (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- ))), +- (VAVGR_H (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; +- +-def : LSXPat<(sra +- (v4i32 (add +- (v4i32 (add (v4i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v4i32 (add LSX128W:$a, LSX128W:$b)) +- )), +- (v4i32 (srl +- (v4i32 (add (v4i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v4i32 (add LSX128W:$a, LSX128W:$b)) +- )), +- (v4i32 (build_vector (i32 31),(i32 31),(i32 31),(i32 31)) +- ) +- ) +- ) +- ) +- ), +- (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)))), +- (VAVGR_W (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; +- +-def : LSXPat<(sra +- (v2i64 (add +- (v2i64 (add (v2i64 ( +- build_vector (i64 1),(i64 1) +- )), +- (v2i64 (add LSX128D:$a, LSX128D:$b)) +- )), +- (v2i64 (srl +- (v2i64 (add (v2i64 ( +- build_vector (i64 1),(i64 1) +- )), +- (v2i64 (add LSX128D:$a, LSX128D:$b)) +- )), +- (v2i64 (build_vector (i64 63),(i64 63))) +- ) +- ) +- ) +- ), +- (v2i64 (build_vector (i64 1),(i64 1)))), +- (VAVGR_D (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; +- +- +- +- +-def : LSXPat<(srl +- (v16i8 (add (v16i8 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v16i8 (add LSX128B:$a, LSX128B:$b)) +- )), +- (v16i8 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (VAVGR_BU (v16i8 LSX128B:$a), (v16i8 LSX128B:$b))>; +- +-def : LSXPat<(srl +- (v8i16 (add (v8i16 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v8i16 (add LSX128H:$a, LSX128H:$b)) +- )), +- (v8i16 (build_vector (i32 1),(i32 1),(i32 1),(i32 1), +- (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (VAVGR_HU (v8i16 LSX128H:$a), (v8i16 LSX128H:$b))>; +- +-def : LSXPat<(srl +- (v4i32 (add (v4i32 ( +- build_vector (i32 1),(i32 1),(i32 1),(i32 1) +- )), +- (v4i32 (add LSX128W:$a, LSX128W:$b)) +- )), +- (v4i32 (build_vector (i32 1),(i32 1),(i32 1),(i32 1)) +- ) +- ), +- (VAVGR_WU (v4i32 LSX128W:$a), (v4i32 LSX128W:$b))>; +- +-def : LSXPat<(srl +- (v2i64 (add (v2i64 ( +- build_vector (i64 1),(i64 1) +- )), +- (v2i64 (add LSX128D:$a, LSX128D:$b)) +- )), +- (v2i64 (build_vector (i64 1),(i64 1)) +- ) +- ), +- (VAVGR_DU (v2i64 LSX128D:$a), (v2i64 LSX128D:$b))>; + + + def : LSXPat<(mulhs LSX128D:$a, LSX128D:$b), +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +index b9ba9e536..75b7838bf 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +@@ -77,11 +77,19 @@ void LoongArchAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixu + } + + Optional LoongArchAsmBackend::getFixupKind(StringRef Name) const { +- return StringSwitch>(Name) +- .Case("R_LARCH_NONE", (MCFixupKind)LoongArch::fixup_LARCH_NONE) +- .Case("R_LARCH_32", FK_Data_4) +- .Case("R_LARCH_64", FK_Data_8) +- .Default(MCAsmBackend::getFixupKind(Name)); ++ if (STI.getTargetTriple().isOSBinFormatELF()) { ++ unsigned Type = llvm::StringSwitch(Name) ++#define ELF_RELOC(X, Y) .Case(#X, Y) ++#include "llvm/BinaryFormat/ELFRelocs/LoongArch.def" ++#undef ELF_RELOC ++ .Case("BFD_RELOC_NONE", ELF::R_LARCH_NONE) ++ .Case("BFD_RELOC_32", ELF::R_LARCH_32) ++ .Case("BFD_RELOC_64", ELF::R_LARCH_64) ++ .Default(-1u); ++ if (Type != -1u) ++ return static_cast(FirstLiteralRelocationKind + Type); ++ } ++ return None; + } + + const MCFixupKindInfo &LoongArchAsmBackend:: +@@ -143,6 +151,11 @@ getFixupKindInfo(MCFixupKind Kind) const { + { "fixup_LARCH_SUB64", 0, 0, 0}, + }; + ++ // Fixup kinds from .reloc directive are like R_LARCH_NONE. They do not ++ // require any extra processing. ++ if (Kind >= FirstLiteralRelocationKind) ++ return MCAsmBackend::getFixupKindInfo(FK_NONE); ++ + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + +@@ -173,6 +186,10 @@ bool LoongArchAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, + bool LoongArchAsmBackend::shouldForceRelocation(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target) { ++ // .reloc directive should force relocation. ++ if (Fixup.getKind() >= FirstLiteralRelocationKind) ++ return true; ++ + const unsigned FixupKind = Fixup.getKind(); + switch (FixupKind) { + default: +@@ -209,6 +226,6 @@ MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T, + const MCTargetOptions &Options) { + LoongArchABIInfo ABI = LoongArchABIInfo::computeTargetABI( + STI.getTargetTriple(), STI.getCPU(), Options); +- return new LoongArchAsmBackend(T, MRI, STI.getTargetTriple(), STI.getCPU(), +- ABI.IsLPX32()); ++ return new LoongArchAsmBackend(STI, T, MRI, STI.getTargetTriple(), ++ STI.getCPU(), ABI.IsLPX32()); + } +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +index 45ae6af44..d96791f7d 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +@@ -28,14 +28,15 @@ class MCSymbolELF; + class Target; + + class LoongArchAsmBackend : public MCAsmBackend { ++ const MCSubtargetInfo &STI; + Triple TheTriple; + bool IsLPX32; + + public: +- LoongArchAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, +- StringRef CPU, bool LPX32) +- : MCAsmBackend(support::little), +- TheTriple(TT), IsLPX32(LPX32) { ++ LoongArchAsmBackend(const MCSubtargetInfo &STI, const Target &T, ++ const MCRegisterInfo &MRI, const Triple &TT, ++ StringRef CPU, bool LPX32) ++ : MCAsmBackend(support::little), STI(STI), TheTriple(TT), IsLPX32(LPX32) { + assert(TT.isLittleEndian()); + } + +diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +index e00b9af9d..c08f3ba0c 100644 +--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp ++++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +@@ -62,6 +62,9 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, + ///XXX:Reloc + unsigned Kind = (unsigned)Fixup.getKind(); + ++ if (Kind >= FirstLiteralRelocationKind) ++ return Kind - FirstLiteralRelocationKind; ++ + switch (Kind) { + default: + return ELF::R_LARCH_NONE; +diff --git a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp +index d242083f9..1a2d89fbb 100644 +--- a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp ++++ b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp +@@ -27,6 +27,12 @@ + + using namespace llvm; + ++static cl::opt ++EnableLoongson3FixLLSC("mips-fix-loongson3-llsc", cl::Hidden, ++ cl::desc("Work around loongson3 llsc erratum"), ++ cl::init(true)); ++ ++ + #define DEBUG_TYPE "mips-pseudo" + + namespace { +@@ -188,6 +194,21 @@ bool MipsExpandPseudo::expandAtomicCmpSwapSubword( + .addImm(ShiftImm); + } + ++ if (EnableLoongson3FixLLSC) { ++ bool Has_sync = false; ++ for (MachineBasicBlock::iterator MBBb = sinkMBB->begin(), MBBe = sinkMBB->end(); ++ MBBb != MBBe; ++MBBb) { ++ Has_sync |= MBBb->getOpcode() == Mips::SYNC ? true : false; ++ if (MBBb->mayLoad() || MBBb->mayStore()) ++ break; ++ } ++ ++ if (!Has_sync) { ++ MachineBasicBlock::iterator Pos = sinkMBB->begin(); ++ BuildMI(*sinkMBB, Pos, DL, TII->get(Mips::SYNC)).addImm(0); ++ } ++ } ++ + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); + computeAndAddLiveIns(LiveRegs, *loop2MBB); +@@ -289,6 +310,20 @@ bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB, + BuildMI(loop2MBB, DL, TII->get(BEQ)) + .addReg(Scratch, RegState::Kill).addReg(ZERO).addMBB(loop1MBB); + ++ if (EnableLoongson3FixLLSC) { ++ bool Has_sync = false; ++ for (MachineBasicBlock::iterator MBBb = exitMBB->begin(), MBBe = exitMBB->end(); ++ MBBb != MBBe; ++MBBb) { ++ Has_sync |= MBBb->getOpcode() == Mips::SYNC ? true : false; ++ if (MBBb->mayLoad() || MBBb->mayStore()) ++ break; ++ } ++ if (!Has_sync) { ++ MachineBasicBlock::iterator Pos = exitMBB->begin(); ++ BuildMI(*exitMBB, Pos, DL, TII->get(Mips::SYNC)).addImm(0); ++ } ++ } ++ + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *loop1MBB); + computeAndAddLiveIns(LiveRegs, *loop2MBB); +diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +index 3274e36ab..fadba1472 100644 +--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp ++++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +@@ -104,6 +104,7 @@ static const uint64_t kSystemZ_ShadowOffset64 = 1ULL << 52; + static const uint64_t kMIPS_ShadowOffsetN32 = 1ULL << 29; + static const uint64_t kMIPS32_ShadowOffset32 = 0x0aaa0000; + static const uint64_t kMIPS64_ShadowOffset64 = 1ULL << 37; ++static const uint64_t kLoongArch64_ShadowOffset64 = 1ULL << 37; + static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36; + static const uint64_t kRISCV64_ShadowOffset64 = 0xd55550000; + static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30; +@@ -481,6 +482,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, + bool IsMIPSN32ABI = TargetTriple.getEnvironment() == Triple::GNUABIN32; + bool IsMIPS32 = TargetTriple.isMIPS32(); + bool IsMIPS64 = TargetTriple.isMIPS64(); ++ bool IsLoongArch64 = TargetTriple.isLoongArch64(); + bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb(); + bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64; + bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64; +@@ -548,7 +550,9 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize, + Mapping.Offset = kWindowsShadowOffset64; + } else if (IsMIPS64) + Mapping.Offset = kMIPS64_ShadowOffset64; +- else if (IsIOS) ++ else if (IsLoongArch64) { ++ Mapping.Offset = kLoongArch64_ShadowOffset64; ++ } else if (IsIOS) + Mapping.Offset = kDynamicShadowSentinel; + else if (IsMacOS && IsAArch64) + Mapping.Offset = kDynamicShadowSentinel; +diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +index 4606bd5de..70155a590 100644 +--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp ++++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +@@ -385,6 +385,14 @@ static const MemoryMapParams Linux_X86_64_MemoryMapParams = { + #endif + }; + ++// loongarch64 Linux ++static const MemoryMapParams Linux_LOONGARCH64_MemoryMapParams = { ++ 0, // AndMask (not used) ++ 0x008000000000, // XorMask ++ 0, // ShadowBase (not used) ++ 0x002000000000, // OriginBase ++}; ++ + // mips64 Linux + static const MemoryMapParams Linux_MIPS64_MemoryMapParams = { + 0, // AndMask (not used) +@@ -454,6 +462,11 @@ static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = { + &Linux_X86_64_MemoryMapParams, + }; + ++static const PlatformMemoryMapParams Linux_LOONGARCH_MemoryMapParams = { ++ nullptr, ++ &Linux_LOONGARCH64_MemoryMapParams, ++}; ++ + static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = { + nullptr, + &Linux_MIPS64_MemoryMapParams, +@@ -516,6 +529,7 @@ public: + private: + friend struct MemorySanitizerVisitor; + friend struct VarArgAMD64Helper; ++ friend struct VarArgLoongArch64Helper; + friend struct VarArgMIPS64Helper; + friend struct VarArgAArch64Helper; + friend struct VarArgPowerPC64Helper; +@@ -937,6 +951,9 @@ void MemorySanitizer::initializeModule(Module &M) { + case Triple::x86: + MapParams = Linux_X86_MemoryMapParams.bits32; + break; ++ case Triple::loongarch64: ++ MapParams = Linux_LOONGARCH_MemoryMapParams.bits64; ++ break; + case Triple::mips64: + case Triple::mips64el: + MapParams = Linux_MIPS_MemoryMapParams.bits64; +@@ -4445,6 +4462,117 @@ struct VarArgAMD64Helper : public VarArgHelper { + } + }; + ++/// LoongArch64-specific implementation of VarArgHelper. ++struct VarArgLoongArch64Helper : public VarArgHelper { ++ Function &F; ++ MemorySanitizer &MS; ++ MemorySanitizerVisitor &MSV; ++ Value *VAArgTLSCopy = nullptr; ++ Value *VAArgSize = nullptr; ++ ++ SmallVector VAStartInstrumentationList; ++ ++ VarArgLoongArch64Helper(Function &F, MemorySanitizer &MS, ++ MemorySanitizerVisitor &MSV) : F(F), MS(MS), MSV(MSV) {} ++ ++ void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override { ++ unsigned VAArgOffset = 0; ++ const DataLayout &DL = F.getParent()->getDataLayout(); ++ for (auto ArgIt = CB.arg_begin() + CB.getFunctionType()->getNumParams(), ++ End = CB.arg_end(); ++ ArgIt != End; ++ArgIt) { ++ Triple TargetTriple(F.getParent()->getTargetTriple()); ++ Value *A = *ArgIt; ++ Value *Base; ++ uint64_t ArgSize = DL.getTypeAllocSize(A->getType()); ++ Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize); ++ VAArgOffset += ArgSize; ++ VAArgOffset = alignTo(VAArgOffset, 8); ++ if (!Base) ++ continue; ++ IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment); ++ } ++ ++ Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset); ++ // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of ++ // a new class member i.e. it is the total size of all VarArgs. ++ IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS); ++ } ++ ++ /// Compute the shadow address for a given va_arg. ++ Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, ++ unsigned ArgOffset, unsigned ArgSize) { ++ // Make sure we don't overflow __msan_va_arg_tls. ++ if (ArgOffset + ArgSize > kParamTLSSize) ++ return nullptr; ++ Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy); ++ Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); ++ return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0), ++ "_msarg"); ++ } ++ ++ void visitVAStartInst(VAStartInst &I) override { ++ IRBuilder<> IRB(&I); ++ VAStartInstrumentationList.push_back(&I); ++ Value *VAListTag = I.getArgOperand(0); ++ Value *ShadowPtr, *OriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( ++ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); ++ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), ++ /* size */ 8, Alignment, false); ++ } ++ ++ void visitVACopyInst(VACopyInst &I) override { ++ IRBuilder<> IRB(&I); ++ VAStartInstrumentationList.push_back(&I); ++ Value *VAListTag = I.getArgOperand(0); ++ Value *ShadowPtr, *OriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( ++ VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true); ++ IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()), ++ /* size */ 8, Alignment, false); ++ } ++ ++ void finalizeInstrumentation() override { ++ assert(!VAArgSize && !VAArgTLSCopy && ++ "finalizeInstrumentation called twice"); ++ IRBuilder<> IRB(MSV.FnPrologueEnd); ++ VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS); ++ Value *CopySize = IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), ++ VAArgSize); ++ ++ if (!VAStartInstrumentationList.empty()) { ++ // If there is a va_start in this function, make a backup copy of ++ // va_arg_tls somewhere in the function entry block. ++ VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); ++ IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize); ++ } ++ ++ // Instrument va_start. ++ // Copy va_list shadow from the backup copy of the TLS contents. ++ for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) { ++ CallInst *OrigInst = VAStartInstrumentationList[i]; ++ IRBuilder<> IRB(OrigInst->getNextNode()); ++ Value *VAListTag = OrigInst->getArgOperand(0); ++ Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C); ++ Value *RegSaveAreaPtrPtr = ++ IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ++ PointerType::get(Type::getInt64PtrTy(*MS.C), 0)); ++ Value *RegSaveAreaPtr = ++ IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr); ++ Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr; ++ const Align Alignment = Align(8); ++ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = ++ MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), ++ Alignment, /*isStore*/ true); ++ IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, ++ CopySize); ++ } ++ } ++}; ++ + /// MIPS64-specific implementation of VarArgHelper. + struct VarArgMIPS64Helper : public VarArgHelper { + Function &F; +@@ -5344,6 +5472,8 @@ static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, + return new VarArgPowerPC64Helper(Func, Msan, Visitor); + else if (TargetTriple.getArch() == Triple::systemz) + return new VarArgSystemZHelper(Func, Msan, Visitor); ++ else if (TargetTriple.getArch() == Triple::loongarch64) ++ return new VarArgLoongArch64Helper(Func, Msan, Visitor); + else + return new VarArgNoOpHelper(Func, Msan, Visitor); + } +diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp +index c60efa465..fc3e6745c 100644 +--- a/llvm/lib/XRay/InstrumentationMap.cpp ++++ b/llvm/lib/XRay/InstrumentationMap.cpp +@@ -61,6 +61,7 @@ loadObj(StringRef Filename, object::OwningBinary &ObjFile, + if ((!ObjFile.getBinary()->isELF() && !ObjFile.getBinary()->isMachO()) || + !(ObjFile.getBinary()->getArch() == Triple::x86_64 || + ObjFile.getBinary()->getArch() == Triple::ppc64le || ++ ObjFile.getBinary()->getArch() == Triple::loongarch64 || + ObjFile.getBinary()->getArch() == Triple::arm || + ObjFile.getBinary()->getArch() == Triple::aarch64)) + return make_error( +diff --git a/llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll +new file mode 100644 +index 000000000..795b5c6b2 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomic-cmpxchg.ll +@@ -0,0 +1,902 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 ++ ++define void @cmpxchg_i8_acquire_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: ori $r8, $zero, 255 ++; LA64-NEXT: sll.w $r8, $r8, $r4 ++; LA64-NEXT: nor $r9, $zero, $r8 ++; LA64-NEXT: andi $r5, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: andi $r6, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r8 ++; LA64-NEXT: bne $r12, $r5, .LBB0_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r9 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB0_1 ++; LA64-NEXT: .LBB0_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r10, $r12, $r4 ++; LA64-NEXT: ext.w.b $r10, $r10 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire acquire ++ ret void ++} ++ ++define void @cmpxchg_i8_release_acquire(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_release_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: ori $r8, $zero, 255 ++; LA64-NEXT: sll.w $r8, $r8, $r4 ++; LA64-NEXT: nor $r9, $zero, $r8 ++; LA64-NEXT: andi $r5, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: andi $r6, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r8 ++; LA64-NEXT: bne $r12, $r5, .LBB1_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r9 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB1_1 ++; LA64-NEXT: .LBB1_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r10, $r12, $r4 ++; LA64-NEXT: ext.w.b $r10, $r10 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val release acquire ++ ret void ++} ++ ++;; Check that only the failure ordering is taken care. ++define void @cmpxchg_i8_acquire_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: ori $r8, $zero, 255 ++; LA64-NEXT: sll.w $r8, $r8, $r4 ++; LA64-NEXT: nor $r9, $zero, $r8 ++; LA64-NEXT: andi $r5, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: andi $r6, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r8 ++; LA64-NEXT: bne $r12, $r5, .LBB2_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r9 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB2_1 ++; LA64-NEXT: .LBB2_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r10, $r12, $r4 ++; LA64-NEXT: ext.w.b $r10, $r10 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i16_acquire_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: lu12i.w $r8, 15 ++; LA64-NEXT: ori $r8, $r8, 4095 ++; LA64-NEXT: sll.w $r9, $r8, $r4 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r8 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: and $r6, $r6, $r8 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB3_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB3_1 ++; LA64-NEXT: .LBB3_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r8, $r12, $r4 ++; LA64-NEXT: ext.w.h $r8, $r8 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire ++ ret void ++} ++ ++define void @cmpxchg_i16_release_acquire(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_release_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: lu12i.w $r8, 15 ++; LA64-NEXT: ori $r8, $r8, 4095 ++; LA64-NEXT: sll.w $r9, $r8, $r4 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r8 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: and $r6, $r6, $r8 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB4_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB4_1 ++; LA64-NEXT: .LBB4_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r8, $r12, $r4 ++; LA64-NEXT: ext.w.h $r8, $r8 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val release acquire ++ ret void ++} ++ ++;; Check that only the failure ordering is taken care. ++define void @cmpxchg_i16_acquire_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: lu12i.w $r8, 15 ++; LA64-NEXT: ori $r8, $r8, 4095 ++; LA64-NEXT: sll.w $r9, $r8, $r4 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r8 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: and $r6, $r6, $r8 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB5_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB5_1 ++; LA64-NEXT: .LBB5_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r8, $r12, $r4 ++; LA64-NEXT: ext.w.h $r8, $r8 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB6_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB6_1 ++; LA64-NEXT: .LBB6_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire acquire ++ ret void ++} ++ ++define void @cmpxchg_i32_release_acquire(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_release_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB7_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB7_1 ++; LA64-NEXT: .LBB7_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val release acquire ++ ret void ++} ++ ++;; Check that only the failure ordering is taken care. ++define void @cmpxchg_i32_acquire_monotonic(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB8_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB8_1 ++; LA64-NEXT: .LBB8_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire monotonic ++ ret void ++} ++ ++define void @cmpxchg_i64_acquire_acquire(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB9_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB9_1 ++; LA64-NEXT: .LBB9_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire acquire ++ ret void ++} ++ ++define void @cmpxchg_i64_release_acquire(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_release_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB10_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB10_1 ++; LA64-NEXT: .LBB10_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val release acquire ++ ret void ++} ++ ++;; Check that only the failure ordering is taken care. ++define void @cmpxchg_i64_acquire_monotonic(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB11_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB11_1 ++; LA64-NEXT: .LBB11_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire monotonic ++ ret void ++} ++ ++define i8 @cmpxchg_i8_acquire_acquire_reti8(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: ori $r4, $zero, 255 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: andi $r4, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r4, $r8 ++; LA64-NEXT: andi $r4, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB12_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB12_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB12_1 ++; LA64-NEXT: .LBB12_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r4, $r12, $r8 ++; LA64-NEXT: ext.w.b $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire acquire ++ %res = extractvalue { i8, i1 } %tmp, 0 ++ ret i8 %res ++} ++ ++define i16 @cmpxchg_i16_acquire_acquire_reti16(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti16: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: lu12i.w $r4, 15 ++; LA64-NEXT: ori $r4, $r4, 4095 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r4 ++; LA64-NEXT: sll.w $r5, $r5, $r8 ++; LA64-NEXT: and $r4, $r6, $r4 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB13_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB13_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB13_1 ++; LA64-NEXT: .LBB13_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r4, $r12, $r8 ++; LA64-NEXT: ext.w.h $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire ++ %res = extractvalue { i16, i1 } %tmp, 0 ++ ret i16 %res ++} ++ ++define i32 @cmpxchg_i32_acquire_acquire_reti32(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r7, $r5, 0 ++; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r5, $r4, 0 ++; LA64-NEXT: bne $r5, $r7, .LBB14_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB14_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB14_1 ++; LA64-NEXT: .LBB14_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: move $r4, $r5 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire acquire ++ %res = extractvalue { i32, i1 } %tmp, 0 ++ ret i32 %res ++} ++ ++define i64 @cmpxchg_i64_acquire_acquire_reti64(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB15_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB15_1 ++; LA64-NEXT: .LBB15_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: move $r4, $r7 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire acquire ++ %res = extractvalue { i64, i1 } %tmp, 0 ++ ret i64 %res ++} ++ ++define i1 @cmpxchg_i8_acquire_acquire_reti1(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: ori $r4, $zero, 255 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: andi $r4, $r5, 255 ++; LA64-NEXT: sll.w $r11, $r4, $r8 ++; LA64-NEXT: andi $r4, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r12, $r7, 0 ++; LA64-NEXT: and $r13, $r12, $r9 ++; LA64-NEXT: bne $r13, $r11, .LBB16_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB16_1 Depth=1 ++; LA64-NEXT: and $r12, $r12, $r10 ++; LA64-NEXT: or $r12, $r12, $r6 ++; LA64-NEXT: sc.w $r12, $r7, 0 ++; LA64-NEXT: beq $r12, $zero, .LBB16_1 ++; LA64-NEXT: .LBB16_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r4, $r13, $r8 ++; LA64-NEXT: ext.w.b $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: ext.w.b $r5, $r5 ++; LA64-NEXT: xor $r4, $r4, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val acquire acquire ++ %res = extractvalue { i8, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i16_acquire_acquire_reti1(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: lu12i.w $r4, 15 ++; LA64-NEXT: ori $r4, $r4, 4095 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r11, $r5, $r4 ++; LA64-NEXT: sll.w $r11, $r11, $r8 ++; LA64-NEXT: and $r4, $r6, $r4 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r12, $r7, 0 ++; LA64-NEXT: and $r13, $r12, $r9 ++; LA64-NEXT: bne $r13, $r11, .LBB17_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB17_1 Depth=1 ++; LA64-NEXT: and $r12, $r12, $r10 ++; LA64-NEXT: or $r12, $r12, $r6 ++; LA64-NEXT: sc.w $r12, $r7, 0 ++; LA64-NEXT: beq $r12, $zero, .LBB17_1 ++; LA64-NEXT: .LBB17_3: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: srl.w $r4, $r13, $r8 ++; LA64-NEXT: ext.w.h $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: ext.w.h $r5, $r5 ++; LA64-NEXT: xor $r4, $r4, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val acquire acquire ++ %res = extractvalue { i16, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i32_acquire_acquire_reti1(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB18_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB18_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB18_1 ++; LA64-NEXT: .LBB18_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: xor $r4, $r7, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val acquire acquire ++ %res = extractvalue { i32, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i64_acquire_acquire_reti1(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB19_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB19_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB19_1 ++; LA64-NEXT: .LBB19_3: ++; LA64-NEXT: dbar 0 ++; LA64-NEXT: xor $r4, $r7, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val acquire acquire ++ %res = extractvalue { i64, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define void @cmpxchg_i8_monotonic_monotonic(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: ori $r8, $zero, 255 ++; LA64-NEXT: sll.w $r8, $r8, $r4 ++; LA64-NEXT: nor $r9, $zero, $r8 ++; LA64-NEXT: andi $r5, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: andi $r6, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r8 ++; LA64-NEXT: bne $r12, $r5, .LBB20_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB20_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r9 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB20_1 ++; LA64-NEXT: .LBB20_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r10, $r12, $r4 ++; LA64-NEXT: ext.w.b $r10, $r10 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i16_monotonic_monotonic(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r4, $r4, 3 ++; LA64-NEXT: lu12i.w $r8, 15 ++; LA64-NEXT: ori $r8, $r8, 4095 ++; LA64-NEXT: sll.w $r9, $r8, $r4 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r8 ++; LA64-NEXT: sll.w $r5, $r5, $r4 ++; LA64-NEXT: and $r6, $r6, $r8 ++; LA64-NEXT: sll.w $r6, $r6, $r4 ++; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB21_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB21_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB21_1 ++; LA64-NEXT: .LBB21_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r8, $r12, $r4 ++; LA64-NEXT: ext.w.h $r8, $r8 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i32_monotonic_monotonic(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB22_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB22_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB22_1 ++; LA64-NEXT: .LBB22_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic ++ ret void ++} ++ ++define void @cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB23_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB23_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB23_1 ++; LA64-NEXT: .LBB23_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: jr $ra ++ %res = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic ++ ret void ++} ++ ++define i8 @cmpxchg_i8_monotonic_monotonic_reti8(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti8: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: ori $r4, $zero, 255 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: andi $r4, $r5, 255 ++; LA64-NEXT: sll.w $r5, $r4, $r8 ++; LA64-NEXT: andi $r4, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB24_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB24_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB24_1 ++; LA64-NEXT: .LBB24_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r4, $r12, $r8 ++; LA64-NEXT: ext.w.b $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic ++ %res = extractvalue { i8, i1 } %tmp, 0 ++ ret i8 %res ++} ++ ++define i16 @cmpxchg_i16_monotonic_monotonic_reti16(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti16: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: lu12i.w $r4, 15 ++; LA64-NEXT: ori $r4, $r4, 4095 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r5, $r5, $r4 ++; LA64-NEXT: sll.w $r5, $r5, $r8 ++; LA64-NEXT: and $r4, $r6, $r4 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r11, $r7, 0 ++; LA64-NEXT: and $r12, $r11, $r9 ++; LA64-NEXT: bne $r12, $r5, .LBB25_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB25_1 Depth=1 ++; LA64-NEXT: and $r11, $r11, $r10 ++; LA64-NEXT: or $r11, $r11, $r6 ++; LA64-NEXT: sc.w $r11, $r7, 0 ++; LA64-NEXT: beq $r11, $zero, .LBB25_1 ++; LA64-NEXT: .LBB25_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r4, $r12, $r8 ++; LA64-NEXT: ext.w.h $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic ++ %res = extractvalue { i16, i1 } %tmp, 0 ++ ret i16 %res ++} ++ ++define i32 @cmpxchg_i32_monotonic_monotonic_reti32(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti32: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r7, $r5, 0 ++; LA64-NEXT: .LBB26_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r5, $r4, 0 ++; LA64-NEXT: bne $r5, $r7, .LBB26_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB26_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB26_1 ++; LA64-NEXT: .LBB26_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: move $r4, $r5 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic ++ %res = extractvalue { i32, i1 } %tmp, 0 ++ ret i32 %res ++} ++ ++define i64 @cmpxchg_i64_monotonic_monotonic_reti64(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti64: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB27_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB27_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB27_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB27_1 ++; LA64-NEXT: .LBB27_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: move $r4, $r7 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic ++ %res = extractvalue { i64, i1 } %tmp, 0 ++ ret i64 %res ++} ++ ++define i1 @cmpxchg_i8_monotonic_monotonic_reti1(i8* %ptr, i8 %cmp, i8 %val) nounwind { ++; LA64-LABEL: cmpxchg_i8_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: ori $r4, $zero, 255 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: andi $r4, $r5, 255 ++; LA64-NEXT: sll.w $r11, $r4, $r8 ++; LA64-NEXT: andi $r4, $r6, 255 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r12, $r7, 0 ++; LA64-NEXT: and $r13, $r12, $r9 ++; LA64-NEXT: bne $r13, $r11, .LBB28_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB28_1 Depth=1 ++; LA64-NEXT: and $r12, $r12, $r10 ++; LA64-NEXT: or $r12, $r12, $r6 ++; LA64-NEXT: sc.w $r12, $r7, 0 ++; LA64-NEXT: beq $r12, $zero, .LBB28_1 ++; LA64-NEXT: .LBB28_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r4, $r13, $r8 ++; LA64-NEXT: ext.w.b $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: ext.w.b $r5, $r5 ++; LA64-NEXT: xor $r4, $r4, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i8* %ptr, i8 %cmp, i8 %val monotonic monotonic ++ %res = extractvalue { i8, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i16_monotonic_monotonic_reti1(i16* %ptr, i16 %cmp, i16 %val) nounwind { ++; LA64-LABEL: cmpxchg_i16_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: addi.d $r7, $zero, -4 ++; LA64-NEXT: and $r7, $r4, $r7 ++; LA64-NEXT: andi $r4, $r4, 3 ++; LA64-NEXT: slli.w $r8, $r4, 3 ++; LA64-NEXT: lu12i.w $r4, 15 ++; LA64-NEXT: ori $r4, $r4, 4095 ++; LA64-NEXT: sll.w $r9, $r4, $r8 ++; LA64-NEXT: nor $r10, $zero, $r9 ++; LA64-NEXT: and $r11, $r5, $r4 ++; LA64-NEXT: sll.w $r11, $r11, $r8 ++; LA64-NEXT: and $r4, $r6, $r4 ++; LA64-NEXT: sll.w $r6, $r4, $r8 ++; LA64-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r12, $r7, 0 ++; LA64-NEXT: and $r13, $r12, $r9 ++; LA64-NEXT: bne $r13, $r11, .LBB29_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB29_1 Depth=1 ++; LA64-NEXT: and $r12, $r12, $r10 ++; LA64-NEXT: or $r12, $r12, $r6 ++; LA64-NEXT: sc.w $r12, $r7, 0 ++; LA64-NEXT: beq $r12, $zero, .LBB29_1 ++; LA64-NEXT: .LBB29_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: srl.w $r4, $r13, $r8 ++; LA64-NEXT: ext.w.h $r4, $r4 ++; LA64-NEXT: # %bb.4: ++; LA64-NEXT: ext.w.h $r5, $r5 ++; LA64-NEXT: xor $r4, $r4, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i16* %ptr, i16 %cmp, i16 %val monotonic monotonic ++ %res = extractvalue { i16, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i32_monotonic_monotonic_reti1(i32* %ptr, i32 %cmp, i32 %val) nounwind { ++; LA64-LABEL: cmpxchg_i32_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: slli.w $r6, $r6, 0 ++; LA64-NEXT: slli.w $r5, $r5, 0 ++; LA64-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.w $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB30_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB30_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.w $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB30_1 ++; LA64-NEXT: .LBB30_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: xor $r4, $r7, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i32* %ptr, i32 %cmp, i32 %val monotonic monotonic ++ %res = extractvalue { i32, i1 } %tmp, 1 ++ ret i1 %res ++} ++ ++define i1 @cmpxchg_i64_monotonic_monotonic_reti1(i64* %ptr, i64 %cmp, i64 %val) nounwind { ++; LA64-LABEL: cmpxchg_i64_monotonic_monotonic_reti1: ++; LA64: # %bb.0: ++; LA64-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1 ++; LA64-NEXT: ll.d $r7, $r4, 0 ++; LA64-NEXT: bne $r7, $r5, .LBB31_3 ++; LA64-NEXT: # %bb.2: # in Loop: Header=BB31_1 Depth=1 ++; LA64-NEXT: move $r8, $r6 ++; LA64-NEXT: sc.d $r8, $r4, 0 ++; LA64-NEXT: beq $r8, $zero, .LBB31_1 ++; LA64-NEXT: .LBB31_3: ++; LA64-NEXT: dbar 1792 ++; LA64-NEXT: xor $r4, $r7, $r5 ++; LA64-NEXT: sltui $r4, $r4, 1 ++; LA64-NEXT: jr $ra ++ %tmp = cmpxchg i64* %ptr, i64 %cmp, i64 %val monotonic monotonic ++ %res = extractvalue { i64, i1 } %tmp, 1 ++ ret i1 %res ++} +diff --git a/llvm/test/CodeGen/LoongArch/atomic_16_8.ll b/llvm/test/CodeGen/LoongArch/atomic_16_8.ll +index d5c3e0dad..ba454ab40 100644 +--- a/llvm/test/CodeGen/LoongArch/atomic_16_8.ll ++++ b/llvm/test/CodeGen/LoongArch/atomic_16_8.ll +@@ -14,7 +14,6 @@ define void @umax_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -50,7 +49,6 @@ define void @umax_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -85,7 +83,6 @@ define void @max_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -121,7 +118,6 @@ define void @max_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -157,7 +153,6 @@ define void @umin_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -193,7 +188,6 @@ define void @umin_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -228,7 +222,6 @@ define void @min_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -264,7 +257,6 @@ define void @min_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r14, $r10, $r7 +@@ -300,7 +292,6 @@ define void @or_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: or $r11, $r10, $r5 +@@ -332,7 +323,6 @@ define void @or_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: or $r11, $r10, $r5 +@@ -364,7 +354,6 @@ define void @add_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: add.w $r11, $r10, $r5 +@@ -396,7 +385,6 @@ define void @add_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: add.w $r11, $r10, $r5 +@@ -428,7 +416,6 @@ define void @sub_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: sub.w $r11, $r10, $r5 +@@ -460,7 +447,6 @@ define void @sub_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: sub.w $r11, $r10, $r5 +@@ -492,7 +478,6 @@ define void @and_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r10, $r5 +@@ -524,7 +509,6 @@ define void @and_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r10, $r5 +@@ -556,7 +540,6 @@ define void @nand_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r10, $r5 +@@ -589,7 +572,6 @@ define void @nand_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r10, $r5 +@@ -622,7 +604,6 @@ define void @xor_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: xor $r11, $r10, $r5 +@@ -654,7 +635,6 @@ define void @xor_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: xor $r11, $r10, $r5 +@@ -686,7 +666,6 @@ define void @xchg_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r5, $r7 +@@ -717,7 +696,6 @@ define void @xchg_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r7, $r7, $r4 + ; CHECK-NEXT: nor $r8, $zero, $r7 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r10, $r6, 0 + ; CHECK-NEXT: and $r11, $r5, $r7 +@@ -751,7 +729,6 @@ define void @cmpxchg_8(i8* %ptr) { + ; CHECK-NEXT: sll.w $r6, $r6, $r4 + ; CHECK-NEXT: andi $r5, $r5, 255 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r11, $r7, 0 + ; CHECK-NEXT: and $r12, $r11, $r8 +@@ -762,7 +739,7 @@ define void @cmpxchg_8(i8* %ptr) { + ; CHECK-NEXT: sc.w $r11, $r7, 0 + ; CHECK-NEXT: beq $r11, $zero, .LBB22_1 + ; CHECK-NEXT: .LBB22_3: +-; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: dbar 20 + ; CHECK-NEXT: srl.w $r10, $r12, $r4 + ; CHECK-NEXT: ext.w.b $r10, $r10 + ; CHECK-NEXT: # %bb.4: +@@ -788,7 +765,6 @@ define void @cmpxchg_16(i16* %ptr) { + ; CHECK-NEXT: sll.w $r6, $r6, $r4 + ; CHECK-NEXT: and $r5, $r5, $r8 + ; CHECK-NEXT: sll.w $r5, $r5, $r4 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r11, $r7, 0 + ; CHECK-NEXT: and $r12, $r11, $r9 +@@ -799,7 +775,7 @@ define void @cmpxchg_16(i16* %ptr) { + ; CHECK-NEXT: sc.w $r11, $r7, 0 + ; CHECK-NEXT: beq $r11, $zero, .LBB23_1 + ; CHECK-NEXT: .LBB23_3: +-; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: dbar 20 + ; CHECK-NEXT: srl.w $r8, $r12, $r4 + ; CHECK-NEXT: ext.w.h $r8, $r8 + ; CHECK-NEXT: # %bb.4: +diff --git a/llvm/test/CodeGen/LoongArch/atomic_64_32.ll b/llvm/test/CodeGen/LoongArch/atomic_64_32.ll +index ce400fd43..61a24cd5d 100644 +--- a/llvm/test/CodeGen/LoongArch/atomic_64_32.ll ++++ b/llvm/test/CodeGen/LoongArch/atomic_64_32.ll +@@ -1,12 +1,12 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc -mtriple=loongarch64 -o - %s | FileCheck %s ++; RUN: llc -mtriple=loongarch64 --verify-machineinstrs -o - %s | FileCheck %s + + + define void @umax_32(i32* %ptr) { + ; CHECK-LABEL: umax_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammax_db.wu $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -18,7 +18,7 @@ define void @umax_64(i64* %ptr) { + ; CHECK-LABEL: umax_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -30,7 +30,7 @@ define void @max_32(i32* %ptr) { + ; CHECK-LABEL: max_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammax_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -42,7 +42,7 @@ define void @max_64(i64* %ptr) { + ; CHECK-LABEL: max_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -55,7 +55,7 @@ define void @umin_32(i32* %ptr) { + ; CHECK-LABEL: umin_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammin_db.wu $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -67,7 +67,7 @@ define void @umin_64(i64* %ptr) { + ; CHECK-LABEL: umin_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -79,7 +79,7 @@ define void @min_32(i32* %ptr) { + ; CHECK-LABEL: min_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammin_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -91,7 +91,7 @@ define void @min_64(i64* %ptr) { + ; CHECK-LABEL: min_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -104,7 +104,7 @@ define void @or_32(i32* %ptr) { + ; CHECK-LABEL: or_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amor_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -116,7 +116,7 @@ define void @or_64(i64* %ptr) { + ; CHECK-LABEL: or_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -129,7 +129,7 @@ define void @add_32(i32* %ptr) { + ; CHECK-LABEL: add_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amadd_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -141,7 +141,7 @@ define void @add_64(i64* %ptr) { + ; CHECK-LABEL: add_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -154,7 +154,7 @@ define void @sub_32(i32* %ptr) { + ; CHECK-LABEL: sub_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: sub.w $r7, $zero, $r5 + ; CHECK-NEXT: amadd_db.w $r6, $r7, $r4, 0 + ; CHECK-NEXT: # %bb.2: +@@ -167,7 +167,7 @@ define void @sub_64(i64* %ptr) { + ; CHECK-LABEL: sub_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: sub.d $r7, $zero, $r5 + ; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 + ; CHECK-NEXT: # %bb.2: +@@ -181,7 +181,7 @@ define void @and_32(i32* %ptr) { + ; CHECK-LABEL: and_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amand_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -193,7 +193,7 @@ define void @and_64(i64* %ptr) { + ; CHECK-LABEL: and_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -213,7 +213,6 @@ define void @nand_32(i32* %ptr) { + ; CHECK-NEXT: sc.w $r7, $r4, 0 + ; CHECK-NEXT: beq $r7, $zero, .LBB16_1 + ; CHECK-NEXT: # %bb.2: +-; CHECK-NEXT: dbar 1792 + ; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i32* %ptr, i32 100 seq_cst + ret void +@@ -230,7 +229,6 @@ define void @nand_64(i64* %ptr) { + ; CHECK-NEXT: sc.d $r7, $r4, 0 + ; CHECK-NEXT: beq $r7, $zero, .LBB17_1 + ; CHECK-NEXT: # %bb.2: +-; CHECK-NEXT: dbar 1792 + ; CHECK-NEXT: jr $ra + %ret = atomicrmw nand i64* %ptr, i64 100 seq_cst + ret void +@@ -241,7 +239,7 @@ define void @xor_32(i32* %ptr) { + ; CHECK-LABEL: xor_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amxor_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -253,7 +251,7 @@ define void @xor_64(i64* %ptr) { + ; CHECK-LABEL: xor_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -266,7 +264,7 @@ define void @xchg_32(i32* %ptr) { + ; CHECK-LABEL: xchg_32: + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 100 +-; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amswap_db.w $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -278,7 +276,7 @@ define void @xchg_64(i64* %ptr) { + ; CHECK-LABEL: xchg_64: + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 100 +-; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: # %bb.1: + ; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 + ; CHECK-NEXT: # %bb.2: + ; CHECK-NEXT: jr $ra +@@ -291,7 +289,6 @@ define void @cmpxchg_32(i32* %ptr) { + ; CHECK: # %bb.0: + ; CHECK-NEXT: ori $r5, $zero, 1 + ; CHECK-NEXT: ori $r6, $zero, 100 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.w $r7, $r4, 0 + ; CHECK-NEXT: bne $r7, $r6, .LBB22_3 +@@ -300,7 +297,7 @@ define void @cmpxchg_32(i32* %ptr) { + ; CHECK-NEXT: sc.w $r8, $r4, 0 + ; CHECK-NEXT: beq $r8, $zero, .LBB22_1 + ; CHECK-NEXT: .LBB22_3: +-; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: jr $ra + %ret = cmpxchg i32* %ptr, i32 100, i32 1 seq_cst seq_cst + ret void +@@ -311,7 +308,6 @@ define void @cmpxchg_64(i64* %ptr) { + ; CHECK: # %bb.0: + ; CHECK-NEXT: addi.d $r5, $zero, 1 + ; CHECK-NEXT: addi.d $r6, $zero, 100 +-; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 + ; CHECK-NEXT: ll.d $r7, $r4, 0 + ; CHECK-NEXT: bne $r7, $r6, .LBB23_3 +@@ -320,7 +316,7 @@ define void @cmpxchg_64(i64* %ptr) { + ; CHECK-NEXT: sc.d $r8, $r4, 0 + ; CHECK-NEXT: beq $r8, $zero, .LBB23_1 + ; CHECK-NEXT: .LBB23_3: +-; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: dbar 0 + ; CHECK-NEXT: jr $ra + %ret = cmpxchg i64* %ptr, i64 100, i64 1 seq_cst seq_cst + ret void +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll +new file mode 100644 +index 000000000..7ef963cc6 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-fp.ll +@@ -0,0 +1,1776 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++define float @float_fadd_acquire(ptr %p) nounwind { ++; CHECK-LABEL: float_fadd_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB0_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB0_2 Depth 2 ++; CHECK-NEXT: fadd.s $f2, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB0_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB0_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB0_2 ++; CHECK-NEXT: .LBB0_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB0_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_acquire(ptr %p) nounwind { ++; CHECK-LABEL: float_fsub_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: .LBB1_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB1_2 Depth 2 ++; CHECK-NEXT: lu12i.w $r5, .LCPI1_0 ++; CHECK-NEXT: ori $r5, $r5, .LCPI1_0 ++; CHECK-NEXT: lu32i.d $r5, .LCPI1_0 ++; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI1_0 ++; CHECK-NEXT: fld.s $f1, $r5, 0 ++; CHECK-NEXT: fadd.s $f1, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f1 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB1_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB1_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB1_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB1_2 ++; CHECK-NEXT: .LBB1_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB1_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_acquire(ptr %p) nounwind { ++; CHECK-LABEL: float_fmin_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB2_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB2_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmin.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB2_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB2_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB2_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB2_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB2_2 ++; CHECK-NEXT: .LBB2_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB2_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB2_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_acquire(ptr %p) nounwind { ++; CHECK-LABEL: float_fmax_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB3_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB3_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmax.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB3_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB3_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB3_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB3_2 ++; CHECK-NEXT: .LBB3_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB3_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB3_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_acquire(ptr %p) nounwind { ++; CHECK-LABEL: double_fadd_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 2 ++; CHECK-NEXT: .LBB4_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fadd.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB4_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, double 1.0 acquire, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_acquire(ptr %p) nounwind { ++; CHECK-LABEL: double_fsub_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 2 ++; CHECK-NEXT: .LBB5_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: lu12i.w $r4, .LCPI5_0 ++; CHECK-NEXT: ori $r4, $r4, .LCPI5_0 ++; CHECK-NEXT: lu32i.d $r4, .LCPI5_0 ++; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI5_0 ++; CHECK-NEXT: fld.d $f1, $r4, 0 ++; CHECK-NEXT: fadd.d $f0, $f0, $f1 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB5_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, double 1.0 acquire, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_acquire(ptr %p) nounwind { ++; CHECK-LABEL: double_fmin_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 2 ++; CHECK-NEXT: .LBB6_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmin.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB6_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, double 1.0 acquire, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_acquire(ptr %p) nounwind { ++; CHECK-LABEL: double_fmax_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 2 ++; CHECK-NEXT: .LBB7_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmax.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB7_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, double 1.0 acquire, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_release(ptr %p) nounwind { ++; CHECK-LABEL: float_fadd_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB8_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB8_2 Depth 2 ++; CHECK-NEXT: fadd.s $f2, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB8_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB8_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB8_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB8_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB8_2 ++; CHECK-NEXT: .LBB8_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB8_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB8_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_release(ptr %p) nounwind { ++; CHECK-LABEL: float_fsub_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: .LBB9_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB9_2 Depth 2 ++; CHECK-NEXT: lu12i.w $r5, .LCPI9_0 ++; CHECK-NEXT: ori $r5, $r5, .LCPI9_0 ++; CHECK-NEXT: lu32i.d $r5, .LCPI9_0 ++; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI9_0 ++; CHECK-NEXT: fld.s $f1, $r5, 0 ++; CHECK-NEXT: fadd.s $f1, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f1 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB9_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB9_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB9_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB9_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB9_2 ++; CHECK-NEXT: .LBB9_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB9_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB9_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_release(ptr %p) nounwind { ++; CHECK-LABEL: float_fmin_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB10_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB10_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmin.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB10_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB10_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB10_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB10_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB10_2 ++; CHECK-NEXT: .LBB10_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB10_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_release(ptr %p) nounwind { ++; CHECK-LABEL: float_fmax_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB11_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB11_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmax.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB11_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB11_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB11_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB11_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB11_2 ++; CHECK-NEXT: .LBB11_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB11_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB11_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, float 1.0 release, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_release(ptr %p) nounwind { ++; CHECK-LABEL: double_fadd_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 3 ++; CHECK-NEXT: addi.d $r28, $zero, 0 ++; CHECK-NEXT: .LBB12_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fadd.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB12_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_release(ptr %p) nounwind { ++; CHECK-LABEL: double_fsub_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 3 ++; CHECK-NEXT: addi.d $r28, $zero, 0 ++; CHECK-NEXT: .LBB13_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: lu12i.w $r4, .LCPI13_0 ++; CHECK-NEXT: ori $r4, $r4, .LCPI13_0 ++; CHECK-NEXT: lu32i.d $r4, .LCPI13_0 ++; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI13_0 ++; CHECK-NEXT: fld.d $f1, $r4, 0 ++; CHECK-NEXT: fadd.d $f0, $f0, $f1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB13_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_release(ptr %p) nounwind { ++; CHECK-LABEL: double_fmin_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 3 ++; CHECK-NEXT: addi.d $r28, $zero, 0 ++; CHECK-NEXT: .LBB14_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmin.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB14_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_release(ptr %p) nounwind { ++; CHECK-LABEL: double_fmax_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 3 ++; CHECK-NEXT: addi.d $r28, $zero, 0 ++; CHECK-NEXT: .LBB15_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmax.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB15_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, double 1.0 release, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: float_fadd_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB16_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB16_2 Depth 2 ++; CHECK-NEXT: fadd.s $f2, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB16_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB16_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB16_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB16_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB16_2 ++; CHECK-NEXT: .LBB16_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB16_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB16_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: float_fsub_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: .LBB17_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB17_2 Depth 2 ++; CHECK-NEXT: lu12i.w $r5, .LCPI17_0 ++; CHECK-NEXT: ori $r5, $r5, .LCPI17_0 ++; CHECK-NEXT: lu32i.d $r5, .LCPI17_0 ++; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI17_0 ++; CHECK-NEXT: fld.s $f1, $r5, 0 ++; CHECK-NEXT: fadd.s $f1, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f1 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB17_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB17_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB17_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB17_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB17_2 ++; CHECK-NEXT: .LBB17_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB17_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: float_fmin_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB18_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB18_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmin.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB18_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB18_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB18_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB18_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB18_2 ++; CHECK-NEXT: .LBB18_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB18_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB18_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: float_fmax_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB19_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB19_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmax.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB19_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB19_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB19_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB19_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB19_2 ++; CHECK-NEXT: .LBB19_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB19_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB19_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, float 1.0 acq_rel, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: double_fadd_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 4 ++; CHECK-NEXT: addi.d $r28, $zero, 2 ++; CHECK-NEXT: .LBB20_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fadd.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB20_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: double_fsub_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 4 ++; CHECK-NEXT: addi.d $r28, $zero, 2 ++; CHECK-NEXT: .LBB21_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: lu12i.w $r4, .LCPI21_0 ++; CHECK-NEXT: ori $r4, $r4, .LCPI21_0 ++; CHECK-NEXT: lu32i.d $r4, .LCPI21_0 ++; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI21_0 ++; CHECK-NEXT: fld.d $f1, $r4, 0 ++; CHECK-NEXT: fadd.d $f0, $f0, $f1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB21_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: double_fmin_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 4 ++; CHECK-NEXT: addi.d $r28, $zero, 2 ++; CHECK-NEXT: .LBB22_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmin.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB22_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_acq_rel(ptr %p) nounwind { ++; CHECK-LABEL: double_fmax_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r28, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 4 ++; CHECK-NEXT: addi.d $r28, $zero, 2 ++; CHECK-NEXT: .LBB23_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmax.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r28 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB23_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r28, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, double 1.0 acq_rel, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: float_fadd_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB24_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB24_2 Depth 2 ++; CHECK-NEXT: fadd.s $f2, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB24_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB24_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB24_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB24_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB24_2 ++; CHECK-NEXT: .LBB24_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB24_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB24_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: float_fsub_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: .LBB25_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB25_2 Depth 2 ++; CHECK-NEXT: lu12i.w $r5, .LCPI25_0 ++; CHECK-NEXT: ori $r5, $r5, .LCPI25_0 ++; CHECK-NEXT: lu32i.d $r5, .LCPI25_0 ++; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI25_0 ++; CHECK-NEXT: fld.s $f1, $r5, 0 ++; CHECK-NEXT: fadd.s $f1, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f1 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB25_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB25_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB25_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB25_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB25_2 ++; CHECK-NEXT: .LBB25_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB25_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB25_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: float_fmin_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB26_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB26_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmin.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB26_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB26_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB26_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB26_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB26_2 ++; CHECK-NEXT: .LBB26_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB26_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB26_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: float_fmax_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB27_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB27_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmax.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB27_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB27_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB27_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB27_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB27_2 ++; CHECK-NEXT: .LBB27_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB27_1 Depth=1 ++; CHECK-NEXT: dbar 0 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB27_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, float 1.0 seq_cst, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: double_fadd_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 5 ++; CHECK-NEXT: .LBB28_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fadd.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB28_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: double_fsub_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 5 ++; CHECK-NEXT: .LBB29_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: lu12i.w $r4, .LCPI29_0 ++; CHECK-NEXT: ori $r4, $r4, .LCPI29_0 ++; CHECK-NEXT: lu32i.d $r4, .LCPI29_0 ++; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI29_0 ++; CHECK-NEXT: fld.d $f1, $r4, 0 ++; CHECK-NEXT: fadd.d $f0, $f0, $f1 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB29_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: double_fmin_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 5 ++; CHECK-NEXT: .LBB30_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmin.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB30_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_seq_cst(ptr %p) nounwind { ++; CHECK-LABEL: double_fmax_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 5 ++; CHECK-NEXT: .LBB31_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmax.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB31_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, double 1.0 seq_cst, align 4 ++ ret double %v ++} ++ ++define float @float_fadd_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: float_fadd_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB32_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB32_2 Depth 2 ++; CHECK-NEXT: fadd.s $f2, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB32_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB32_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB32_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB32_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB32_2 ++; CHECK-NEXT: .LBB32_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB32_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB32_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fsub_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: float_fsub_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: .LBB33_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB33_2 Depth 2 ++; CHECK-NEXT: lu12i.w $r5, .LCPI33_0 ++; CHECK-NEXT: ori $r5, $r5, .LCPI33_0 ++; CHECK-NEXT: lu32i.d $r5, .LCPI33_0 ++; CHECK-NEXT: lu52i.d $r5, $r5, .LCPI33_0 ++; CHECK-NEXT: fld.s $f1, $r5, 0 ++; CHECK-NEXT: fadd.s $f1, $f0, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f1 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB33_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB33_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB33_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB33_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB33_2 ++; CHECK-NEXT: .LBB33_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB33_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB33_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fmin_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: float_fmin_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB34_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB34_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmin.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB34_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB34_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB34_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB34_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB34_2 ++; CHECK-NEXT: .LBB34_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB34_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB34_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define float @float_fmax_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: float_fmax_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: fld.s $f0, $r4, 0 ++; CHECK-NEXT: addi.w $r5, $zero, 1 ++; CHECK-NEXT: movgr2fr.w $f1, $r5 ++; CHECK-NEXT: ffint.s.w $f1, $f1 ++; CHECK-NEXT: .LBB35_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Loop Header: Depth=1 ++; CHECK-NEXT: # Child Loop BB35_2 Depth 2 ++; CHECK-NEXT: fmax.s $f2, $f0, $f0 ++; CHECK-NEXT: fmax.s $f2, $f2, $f1 ++; CHECK-NEXT: movfr2gr.s $r5, $f2 ++; CHECK-NEXT: movfr2gr.s $r6, $f0 ++; CHECK-NEXT: .LBB35_2: # %atomicrmw.start ++; CHECK-NEXT: # Parent Loop BB35_1 Depth=1 ++; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ++; CHECK-NEXT: ll.w $r7, $r4, 0 ++; CHECK-NEXT: bne $r7, $r6, .LBB35_4 ++; CHECK-NEXT: # %bb.3: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB35_2 Depth=2 ++; CHECK-NEXT: move $r8, $r5 ++; CHECK-NEXT: sc.w $r8, $r4, 0 ++; CHECK-NEXT: beq $r8, $zero, .LBB35_2 ++; CHECK-NEXT: .LBB35_4: # %atomicrmw.start ++; CHECK-NEXT: # in Loop: Header=BB35_1 Depth=1 ++; CHECK-NEXT: dbar 1792 ++; CHECK-NEXT: movgr2fr.w $f0, $r7 ++; CHECK-NEXT: bne $r7, $r6, .LBB35_1 ++; CHECK-NEXT: # %bb.5: # %atomicrmw.end ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, float 1.0 monotonic, align 4 ++ ret float %v ++} ++ ++define double @double_fadd_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: double_fadd_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 0 ++; CHECK-NEXT: .LBB36_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fadd.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB36_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fadd ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fsub_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: double_fsub_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -64 ++; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 16 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 8 ++; CHECK-NEXT: addi.d $r26, $sp, 0 ++; CHECK-NEXT: addi.d $r27, $zero, 0 ++; CHECK-NEXT: .LBB37_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: lu12i.w $r4, .LCPI37_0 ++; CHECK-NEXT: ori $r4, $r4, .LCPI37_0 ++; CHECK-NEXT: lu32i.d $r4, .LCPI37_0 ++; CHECK-NEXT: lu52i.d $r4, $r4, .LCPI37_0 ++; CHECK-NEXT: fld.d $f1, $r4, 0 ++; CHECK-NEXT: fadd.d $f0, $f0, $f1 ++; CHECK-NEXT: fst.d $f0, $sp, 0 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 8 ++; CHECK-NEXT: beqz $r4, .LBB37_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 16 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 64 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fsub ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fmin_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: double_fmin_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 0 ++; CHECK-NEXT: .LBB38_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmin.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB38_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmin ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} ++ ++define double @double_fmax_monotonic(ptr %p) nounwind { ++; CHECK-LABEL: double_fmax_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.d $sp, $sp, -80 ++; CHECK-NEXT: fst.d $f24, $sp, 72 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $ra, $sp, 64 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r27, $sp, 56 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r26, $sp, 48 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r25, $sp, 40 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r24, $sp, 32 # 8-byte Folded Spill ++; CHECK-NEXT: st.d $r23, $sp, 24 # 8-byte Folded Spill ++; CHECK-NEXT: move $r23, $r4 ++; CHECK-NEXT: fld.d $f0, $r4, 0 ++; CHECK-NEXT: addi.d $r4, $zero, 1 ++; CHECK-NEXT: movgr2fr.d $f1, $r4 ++; CHECK-NEXT: ffint.d.l $f24, $f1 ++; CHECK-NEXT: addi.d $r24, $zero, 8 ++; CHECK-NEXT: addi.d $r25, $sp, 16 ++; CHECK-NEXT: addi.d $r26, $sp, 8 ++; CHECK-NEXT: addi.d $r27, $zero, 0 ++; CHECK-NEXT: .LBB39_1: # %atomicrmw.start ++; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: fst.d $f0, $sp, 16 ++; CHECK-NEXT: fmax.d $f0, $f0, $f0 ++; CHECK-NEXT: fmax.d $f0, $f0, $f24 ++; CHECK-NEXT: fst.d $f0, $sp, 8 ++; CHECK-NEXT: move $r4, $r24 ++; CHECK-NEXT: move $r5, $r23 ++; CHECK-NEXT: move $r6, $r25 ++; CHECK-NEXT: move $r7, $r26 ++; CHECK-NEXT: move $r8, $r27 ++; CHECK-NEXT: move $r9, $r27 ++; CHECK-NEXT: lu12i.w $ra, __atomic_compare_exchange ++; CHECK-NEXT: ori $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu32i.d $ra, __atomic_compare_exchange ++; CHECK-NEXT: lu52i.d $ra, $ra, __atomic_compare_exchange ++; CHECK-NEXT: jirl $ra, $ra, 0 ++; CHECK-NEXT: fld.d $f0, $sp, 16 ++; CHECK-NEXT: beqz $r4, .LBB39_1 ++; CHECK-NEXT: # %bb.2: # %atomicrmw.end ++; CHECK-NEXT: ld.d $r23, $sp, 24 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r24, $sp, 32 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r25, $sp, 40 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r26, $sp, 48 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $r27, $sp, 56 # 8-byte Folded Reload ++; CHECK-NEXT: ld.d $ra, $sp, 64 # 8-byte Folded Reload ++; CHECK-NEXT: fld.d $f24, $sp, 72 # 8-byte Folded Reload ++; CHECK-NEXT: addi.d $sp, $sp, 80 ++; CHECK-NEXT: jr $ra ++ %v = atomicrmw fmax ptr %p, double 1.0 monotonic, align 4 ++ ret double %v ++} +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/atomicrmw-minmax.ll +new file mode 100644 +index 000000000..3e04fc53c +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw-minmax.ll +@@ -0,0 +1,1882 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s ++ ++define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB0_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB1_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB4_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB5_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB8_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB9_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB12_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB13_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB16_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB17_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB20_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB21_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB24_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB25_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB28_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB29_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB32_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB33_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB36_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB37_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB40_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB41_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB44_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB45_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB48_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB49_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB52_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB53_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB56_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB57_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB60_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB61_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB64_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB65_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umax_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umax ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB68_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: sltu $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB69_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.wu $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_umin_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.du $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw umin ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB72_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: masknez $r11, $r14, $r13 ++; CHECK-NEXT: maskeqz $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB73_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_max_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammax_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw max ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB76_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r14, $r10, $r8 ++; CHECK-NEXT: and $r5, $r5, $r8 ++; CHECK-NEXT: slt $r13, $r14, $r5 ++; CHECK-NEXT: maskeqz $r11, $r14, $r13 ++; CHECK-NEXT: masknez $r13, $r5, $r13 ++; CHECK-NEXT: or $r11, $r11, $r13 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB77_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_min_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: ammin_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw min ptr %a, i64 %b monotonic ++ ret i64 %1 ++} +diff --git a/llvm/test/CodeGen/LoongArch/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/atomicrmw.ll +new file mode 100644 +index 000000000..4732ec0fa +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/atomicrmw.ll +@@ -0,0 +1,3652 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB0_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB1_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 0 acquire ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB2_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 -1 acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB3_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB4_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 0 acquire ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB5_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 -1 acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB8_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB9_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB12_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB13_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.w $r7, $zero, $r6 ++; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB16_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB17_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r5, $r4, 0 ++; CHECK-NEXT: and $r7, $r5, $r6 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB18_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB19_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB20_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB21_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB24_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB25_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB25_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i8_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB28_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB28_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i8 %b acquire ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i16_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB29_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB29_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i16 %b acquire ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i32_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i32 %b acquire ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_acquire(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i64_acquire: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i64 %b acquire ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB32_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB33_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 0 release ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB34_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 -1 release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB35_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB36_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 0 release ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB37_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 -1 release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB40_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB41_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB44_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB45_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.w $r7, $zero, $r6 ++; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB48_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB49_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r5, $r4, 0 ++; CHECK-NEXT: and $r7, $r5, $r6 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB50_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB51_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB52_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB53_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB56_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB57_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i8_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB60_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i8 %b release ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i16_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB61_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i16 %b release ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_release(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i32_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i32 %b release ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_release(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i64_release: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i64 %b release ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB64_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB65_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 0 acq_rel ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB66_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 -1 acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB67_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB68_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 0 acq_rel ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB69_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 -1 acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB72_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB72_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB73_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB73_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB76_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB76_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB77_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB77_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.w $r7, $zero, $r6 ++; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB80_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB81_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r5, $r4, 0 ++; CHECK-NEXT: and $r7, $r5, $r6 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB82_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB83_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB84_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB85_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB88_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB89_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i8_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB92_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB92_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i8 %b acq_rel ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i16_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB93_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB93_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i16 %b acq_rel ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_acq_rel(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i32_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i32 %b acq_rel ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_acq_rel(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i64_acq_rel: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i64 %b acq_rel ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB96_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB97_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 0 seq_cst ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB98_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 -1 seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB99_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB100_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB100_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 0 seq_cst ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB101_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB101_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 -1 seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB104_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB104_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB105_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB105_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB108_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB108_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB109_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB109_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.w $r7, $zero, $r6 ++; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB112_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB113_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r5, $r4, 0 ++; CHECK-NEXT: and $r7, $r5, $r6 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB114_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB115_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB116_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB117_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB120_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB121_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i8_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB124_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i8 %b seq_cst ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i16_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB125_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i16 %b seq_cst ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_seq_cst(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i32_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i32 %b seq_cst ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_seq_cst(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i64_seq_cst: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i64 %b seq_cst ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB128_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB129_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 0 monotonic ++ ret i8 %1 ++} ++ ++define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB130_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB130_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i8 -1 monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB131_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB131_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_0_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ori $r5, $zero, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB132_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB132_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 0 monotonic ++ ret i16 %1 ++} ++ ++define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_minus_1_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: addi.w $r5, $zero, -1 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB133_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r5, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB133_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i16 -1 monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xchg_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xchg_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xchg_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amswap_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xchg ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB136_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB136_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB137_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: add.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB137_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_add_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_add_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_add_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amadd_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw add ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB140_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB140_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB141_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: sub.w $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB141_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_sub_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.w $r7, $zero, $r6 ++; CHECK-NEXT: amadd_db.w $r5, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_sub_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_sub_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: sub.d $r7, $zero, $r5 ++; CHECK-NEXT: amadd_db.d $r6, $r7, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw sub ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB144_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB144_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB145_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: nor $r11, $zero, $r11 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB145_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: .LBB146_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r5, $r4, 0 ++; CHECK-NEXT: and $r7, $r5, $r6 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.w $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB146_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_nand_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_nand_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: .LBB147_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.d $r6, $r4, 0 ++; CHECK-NEXT: and $r7, $r6, $r5 ++; CHECK-NEXT: nor $r7, $zero, $r7 ++; CHECK-NEXT: sc.d $r7, $r4, 0 ++; CHECK-NEXT: beq $r7, $zero, .LBB147_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw nand ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB148_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB148_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB149_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: and $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB149_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_and_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_and_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_and_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amand_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw and ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB152_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: or $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB153_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_or_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_or_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_or_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw or ptr %a, i64 %b monotonic ++ ret i64 %1 ++} ++ ++define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i8_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: ori $r4, $zero, 255 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB156_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB156_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.b $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i8 %b monotonic ++ ret i8 %1 ++} ++ ++define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i16_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r5, $r5, 0 ++; CHECK-NEXT: addi.d $r6, $zero, -4 ++; CHECK-NEXT: and $r6, $r4, $r6 ++; CHECK-NEXT: andi $r4, $r4, 3 ++; CHECK-NEXT: slli.w $r7, $r4, 3 ++; CHECK-NEXT: lu12i.w $r4, 15 ++; CHECK-NEXT: ori $r4, $r4, 4095 ++; CHECK-NEXT: sll.w $r8, $r4, $r7 ++; CHECK-NEXT: nor $r9, $zero, $r8 ++; CHECK-NEXT: sll.w $r5, $r5, $r7 ++; CHECK-NEXT: .LBB157_1: # =>This Inner Loop Header: Depth=1 ++; CHECK-NEXT: ll.w $r10, $r6, 0 ++; CHECK-NEXT: xor $r11, $r10, $r5 ++; CHECK-NEXT: and $r11, $r11, $r8 ++; CHECK-NEXT: and $r12, $r10, $r9 ++; CHECK-NEXT: or $r12, $r12, $r11 ++; CHECK-NEXT: sc.w $r12, $r6, 0 ++; CHECK-NEXT: beq $r12, $zero, .LBB157_1 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: and $r4, $r10, $r8 ++; CHECK-NEXT: srl.w $r4, $r4, $r7 ++; CHECK-NEXT: ext.w.h $r4, $r4 ++; CHECK-NEXT: # %bb.3: ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i16 %b monotonic ++ ret i16 %1 ++} ++ ++define i32 @atomicrmw_xor_i32_monotonic(ptr %a, i32 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i32_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: slli.w $r6, $r5, 0 ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.w $r5, $r6, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r5 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i32 %b monotonic ++ ret i32 %1 ++} ++ ++define i64 @atomicrmw_xor_i64_monotonic(ptr %a, i64 %b) nounwind { ++; CHECK-LABEL: atomicrmw_xor_i64_monotonic: ++; CHECK: # %bb.0: ++; CHECK-NEXT: # %bb.1: ++; CHECK-NEXT: amxor_db.d $r6, $r5, $r4, 0 ++; CHECK-NEXT: # %bb.2: ++; CHECK-NEXT: move $r4, $r6 ++; CHECK-NEXT: jr $ra ++ %1 = atomicrmw xor ptr %a, i64 %b monotonic ++ ret i64 %1 ++} +diff --git a/llvm/test/CodeGen/LoongArch/fence-singlethread.ll b/llvm/test/CodeGen/LoongArch/fence-singlethread.ll +new file mode 100644 +index 000000000..f4d1a3965 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/fence-singlethread.ll +@@ -0,0 +1,11 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++define void @fence_singlethread() { ++; CHECK-LABEL: fence_singlethread: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ fence syncscope("singlethread") seq_cst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/fence.ll b/llvm/test/CodeGen/LoongArch/fence.ll +new file mode 100644 +index 000000000..05e2639ca +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/fence.ll +@@ -0,0 +1,38 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 ++ ++define void @fence_acquire() nounwind { ++; LA64-LABEL: fence_acquire: ++; LA64: # %bb.0: ++; LA64-NEXT: dbar 20 ++; LA64-NEXT: jr $ra ++ fence acquire ++ ret void ++} ++ ++define void @fence_release() nounwind { ++; LA64-LABEL: fence_release: ++; LA64: # %bb.0: ++; LA64-NEXT: dbar 18 ++; LA64-NEXT: jr $ra ++ fence release ++ ret void ++} ++ ++define void @fence_acq_rel() nounwind { ++; LA64-LABEL: fence_acq_rel: ++; LA64: # %bb.0: ++; LA64-NEXT: dbar 16 ++; LA64-NEXT: jr $ra ++ fence acq_rel ++ ret void ++} ++ ++define void @fence_seq_cst() nounwind { ++; LA64-LABEL: fence_seq_cst: ++; LA64: # %bb.0: ++; LA64-NEXT: dbar 16 ++; LA64-NEXT: jr $ra ++ fence seq_cst ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll b/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll +index 51fa34606..6dbaa49b1 100644 +--- a/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll ++++ b/llvm/test/CodeGen/LoongArch/lasx/insert-lasx.ll +@@ -1,6 +1,18 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + ; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s + ++define <32 x i8> @lasxB(<32 x i8> %d, <16 x i8> %s1) { ++; CHECK-LABEL: lasxB: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 48 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <16 x i8> %s1, <16 x i8> poison, <32 x i32> ++ %r2 = shufflevector <32 x i8> %r1, <32 x i8> %d, <32 x i32> ++ ret <32 x i8> %r2 ++} ++ + define <16 x i16> @lasxH(<16 x i16> %d, <8 x i16> %s1) { + ; CHECK-LABEL: lasxH: + ; CHECK: # %bb.0: # %entry +@@ -36,3 +48,51 @@ entry: + %r2 = shufflevector <4 x i64> %r1, <4 x i64> %d, <4 x i32> + ret <4 x i64> %r2 + } ++ ++define <32 x i8> @lasxB_Hi(<32 x i8> %d, <16 x i8> %s1) { ++; CHECK-LABEL: lasxB_Hi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <16 x i8> %s1, <16 x i8> poison, <32 x i32> ++ %r2 = shufflevector <32 x i8> %r1, <32 x i8> %d, <32 x i32> ++ ret <32 x i8> %r2 ++} ++ ++define <16 x i16> @lasxH_Hi(<16 x i16> %d, <8 x i16> %s1) { ++; CHECK-LABEL: lasxH_Hi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <8 x i16> %s1, <8 x i16> poison, <16 x i32> ++ %r2 = shufflevector <16 x i16> %r1, <16 x i16> %d, <16 x i32> ++ ret <16 x i16> %r2 ++} ++ ++define <8 x i32> @lasxW_Hi(<8 x i32> %d, <4 x i32> %s1) { ++; CHECK-LABEL: lasxW_Hi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <4 x i32> %s1, <4 x i32> poison, <8 x i32> ++ %r2 = shufflevector <8 x i32> %r1, <8 x i32> %d, <8 x i32> ++ ret <8 x i32> %r2 ++} ++ ++define <4 x i64> @lasxD_Hi(<4 x i64> %d, <2 x i64> %s1) { ++; CHECK-LABEL: lasxD_Hi: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: # kill: def $vr1 killed $vr1 def $xr1 ++; CHECK-NEXT: xvpermi.q $xr0, $xr1, 32 ++; CHECK-NEXT: jr $ra ++entry: ++ %r1 = shufflevector <2 x i64> %s1, <2 x i64> poison, <4 x i32> ++ %r2 = shufflevector <4 x i64> %r1, <4 x i64> %d, <4 x i32> ++ ret <4 x i64> %r2 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll b/llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll +new file mode 100644 +index 000000000..a0f3e6ebe +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/lasxvavg.ll +@@ -0,0 +1,106 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lasx < %s | FileCheck %s ++ ++define <32 x i8> @lsxavgr_v32i8(<32 x i8> noundef %0, <32 x i8> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v32i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <32 x i8> %0, ++ %4 = add <32 x i8> %3, %1 ++ %5 = sdiv <32 x i8> %4, ++ ret <32 x i8> %5 ++} ++ ++define <16 x i16> @lsxavgr_v16i16(<16 x i16> noundef %0, <16 x i16> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v16i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <16 x i16> %0, ++ %4 = add <16 x i16> %3, %1 ++ %5 = sdiv <16 x i16> %4, ++ ret <16 x i16> %5 ++} ++ ++define <8 x i32> @lsxavgr_v8i32(<8 x i32> noundef %0, <8 x i32> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v8i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <8 x i32> %0, ++ %4 = add <8 x i32> %3, %1 ++ %5 = sdiv <8 x i32> %4, ++ ret <8 x i32> %5 ++} ++ ++define <4 x i64> @lsxavgr_v4i64(<4 x i64> noundef %0, <4 x i64> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v4i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <4 x i64> %0, ++ %4 = add <4 x i64> %3, %1 ++ %5 = sdiv <4 x i64> %4, ++ ret <4 x i64> %5 ++} ++ ++define <32 x i8> @lsxavgr_v32u8(<32 x i8> noundef %0, <32 x i8> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v32u8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <32 x i8> %0, ++ %4 = add <32 x i8> %3, %1 ++ %5 = lshr <32 x i8> %4, ++ ret <32 x i8> %5 ++} ++ ++define <16 x i16> @lsxavgr_v16u16(<16 x i16> noundef %0, <16 x i16> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v16u16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <16 x i16> %0, ++ %4 = add <16 x i16> %3, %1 ++ %5 = lshr <16 x i16> %4, ++ ret <16 x i16> %5 ++} ++ ++define <8 x i32> @lsxavgr_v8u32(<8 x i32> noundef %0, <8 x i32> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v8u32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <8 x i32> %0, ++ %4 = add <8 x i32> %3, %1 ++ %5 = lshr <8 x i32> %4, ++ ret <8 x i32> %5 ++} ++ ++define <4 x i64> @lsxavgr_v4u64(<4 x i64> noundef %0, <4 x i64> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxavgr_v4u64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 ++; CHECK-NEXT: xvldi $xr1, 1 ++; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <4 x i64> %0, ++ %4 = add <4 x i64> %3, %1 ++ %5 = lshr <4 x i64> %4, ++ ret <4 x i64> %5 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lasx/v32i8-bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/v32i8-bswap.ll +new file mode 100644 +index 000000000..1453dabaa +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lasx/v32i8-bswap.ll +@@ -0,0 +1,26 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s ++ ++define void @vshf_v32i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: vshf_v32i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: xvld $xr0, $r5, 0 ++; CHECK-NEXT: xvpickve2gr.d $r5, $xr0, 3 ++; CHECK-NEXT: xvpickve2gr.d $r6, $xr0, 2 ++; CHECK-NEXT: xvpickve2gr.d $r7, $xr0, 0 ++; CHECK-NEXT: xvpickve2gr.d $r8, $xr0, 1 ++; CHECK-NEXT: revb.d $r8, $r8 ++; CHECK-NEXT: revb.d $r7, $r7 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r7, 0 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r8, 1 ++; CHECK-NEXT: revb.d $r6, $r6 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r6, 2 ++; CHECK-NEXT: revb.d $r5, $r5 ++; CHECK-NEXT: xvinsgr2vr.d $xr0, $r5, 3 ++; CHECK-NEXT: xvst $xr0, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %v1 = load <32 x i8>, ptr %a0 ++ %v2 = shufflevector <32 x i8> %v1, <32 x i8> undef, <32 x i32> ++ store <32 x i8> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/load-store-atomic.ll +new file mode 100644 +index 000000000..414d4078b +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/load-store-atomic.ll +@@ -0,0 +1,310 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s ++ ++define i8 @load_acquire_i8(ptr %ptr) { ++; CHECK-LABEL: load_acquire_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.b $r4, $r4, 0 ++; CHECK-NEXT: dbar 20 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i8, ptr %ptr acquire, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_acquire_i16(ptr %ptr) { ++; CHECK-LABEL: load_acquire_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.h $r4, $r4, 0 ++; CHECK-NEXT: dbar 20 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i16, ptr %ptr acquire, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_acquire_i32(ptr %ptr) { ++; CHECK-LABEL: load_acquire_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $r4, $r4, 0 ++; CHECK-NEXT: dbar 20 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i32, ptr %ptr acquire, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_acquire_i64(ptr %ptr) { ++; CHECK-LABEL: load_acquire_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $r4, $r4, 0 ++; CHECK-NEXT: dbar 20 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i64, ptr %ptr acquire, align 8 ++ ret i64 %val ++} ++ ++define i8 @load_unordered_i8(ptr %ptr) { ++; CHECK-LABEL: load_unordered_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.b $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i8, ptr %ptr unordered, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_unordered_i16(ptr %ptr) { ++; CHECK-LABEL: load_unordered_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.h $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i16, ptr %ptr unordered, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_unordered_i32(ptr %ptr) { ++; CHECK-LABEL: load_unordered_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i32, ptr %ptr unordered, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_unordered_i64(ptr %ptr) { ++; CHECK-LABEL: load_unordered_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i64, ptr %ptr unordered, align 8 ++ ret i64 %val ++} ++ ++define i8 @load_monotonic_i8(ptr %ptr) { ++; CHECK-LABEL: load_monotonic_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.b $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i8, ptr %ptr monotonic, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_monotonic_i16(ptr %ptr) { ++; CHECK-LABEL: load_monotonic_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.h $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i16, ptr %ptr monotonic, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_monotonic_i32(ptr %ptr) { ++; CHECK-LABEL: load_monotonic_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i32, ptr %ptr monotonic, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_monotonic_i64(ptr %ptr) { ++; CHECK-LABEL: load_monotonic_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $r4, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i64, ptr %ptr monotonic, align 8 ++ ret i64 %val ++} ++ ++define i8 @load_seq_cst_i8(ptr %ptr) { ++; CHECK-LABEL: load_seq_cst_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.b $r4, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i8, ptr %ptr seq_cst, align 1 ++ ret i8 %val ++} ++ ++define i16 @load_seq_cst_i16(ptr %ptr) { ++; CHECK-LABEL: load_seq_cst_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.h $r4, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i16, ptr %ptr seq_cst, align 2 ++ ret i16 %val ++} ++ ++define i32 @load_seq_cst_i32(ptr %ptr) { ++; CHECK-LABEL: load_seq_cst_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.w $r4, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i32, ptr %ptr seq_cst, align 4 ++ ret i32 %val ++} ++ ++define i64 @load_seq_cst_i64(ptr %ptr) { ++; CHECK-LABEL: load_seq_cst_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: ld.d $r4, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ %val = load atomic i64, ptr %ptr seq_cst, align 8 ++ ret i64 %val ++} ++ ++define void @store_release_i8(ptr %ptr, i8 signext %v) { ++; CHECK-LABEL: store_release_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 18 ++; CHECK-NEXT: st.b $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i8 %v, ptr %ptr release, align 1 ++ ret void ++} ++ ++define void @store_release_i16(ptr %ptr, i16 signext %v) { ++; CHECK-LABEL: store_release_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 18 ++; CHECK-NEXT: st.h $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i16 %v, ptr %ptr release, align 2 ++ ret void ++} ++ ++define void @store_release_i32(ptr %ptr, i32 signext %v) { ++; CHECK-LABEL: store_release_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 18 ++; CHECK-NEXT: st.w $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i32 %v, ptr %ptr release, align 4 ++ ret void ++} ++ ++define void @store_release_i64(ptr %ptr, i64 %v) { ++; CHECK-LABEL: store_release_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 18 ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i64 %v, ptr %ptr release, align 8 ++ ret void ++} ++ ++define void @store_unordered_i8(ptr %ptr, i8 signext %v) { ++; CHECK-LABEL: store_unordered_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.b $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i8 %v, ptr %ptr unordered, align 1 ++ ret void ++} ++ ++define void @store_unordered_i16(ptr %ptr, i16 signext %v) { ++; CHECK-LABEL: store_unordered_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.h $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i16 %v, ptr %ptr unordered, align 2 ++ ret void ++} ++ ++define void @store_unordered_i32(ptr %ptr, i32 signext %v) { ++; CHECK-LABEL: store_unordered_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.w $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i32 %v, ptr %ptr unordered, align 4 ++ ret void ++} ++ ++define void @store_unordered_i64(ptr %ptr, i64 %v) { ++; CHECK-LABEL: store_unordered_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i64 %v, ptr %ptr unordered, align 8 ++ ret void ++} ++ ++define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { ++; CHECK-LABEL: store_monotonic_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.b $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i8 %v, ptr %ptr monotonic, align 1 ++ ret void ++} ++ ++define void @store_monotonic_i16(ptr %ptr, i16 signext %v) { ++; CHECK-LABEL: store_monotonic_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.h $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i16 %v, ptr %ptr monotonic, align 2 ++ ret void ++} ++ ++define void @store_monotonic_i32(ptr %ptr, i32 signext %v) { ++; CHECK-LABEL: store_monotonic_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.w $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i32 %v, ptr %ptr monotonic, align 4 ++ ret void ++} ++ ++define void @store_monotonic_i64(ptr %ptr, i64 %v) { ++; CHECK-LABEL: store_monotonic_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: jr $ra ++ store atomic i64 %v, ptr %ptr monotonic, align 8 ++ ret void ++} ++ ++define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { ++; CHECK-LABEL: store_seq_cst_i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: st.b $r5, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ store atomic i8 %v, ptr %ptr seq_cst, align 1 ++ ret void ++} ++ ++define void @store_seq_cst_i16(ptr %ptr, i16 signext %v) { ++; CHECK-LABEL: store_seq_cst_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: st.h $r5, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ store atomic i16 %v, ptr %ptr seq_cst, align 2 ++ ret void ++} ++ ++define void @store_seq_cst_i32(ptr %ptr, i32 signext %v) { ++; CHECK-LABEL: store_seq_cst_i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: st.w $r5, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ store atomic i32 %v, ptr %ptr seq_cst, align 4 ++ ret void ++} ++ ++define void @store_seq_cst_i64(ptr %ptr, i64 %v) { ++; CHECK-LABEL: store_seq_cst_i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: st.d $r5, $r4, 0 ++; CHECK-NEXT: dbar 16 ++; CHECK-NEXT: jr $ra ++ store atomic i64 %v, ptr %ptr seq_cst, align 8 ++ ret void ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll b/llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll +new file mode 100644 +index 000000000..8441ed1b0 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/lsxvavg.ll +@@ -0,0 +1,106 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -march=loongarch64 -mattr=+lsx < %s | FileCheck %s ++ ++define <16 x i8> @lsxvavg_v16i8(<16 x i8> noundef %0, <16 x i8> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v16i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <16 x i8> %0, ++ %4 = add <16 x i8> %3, %1 ++ %5 = sdiv <16 x i8> %4, ++ ret <16 x i8> %5 ++} ++ ++define <8 x i16> @lsxvavg_v8i16(<8 x i16> noundef %0, <8 x i16> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v8i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <8 x i16> %0, ++ %4 = add <8 x i16> %3, %1 ++ %5 = sdiv <8 x i16> %4, ++ ret <8 x i16> %5 ++} ++ ++define <4 x i32> @lsxvavg_v4i32(<4 x i32> noundef %0, <4 x i32> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v4i32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <4 x i32> %0, ++ %4 = add <4 x i32> %3, %1 ++ %5 = sdiv <4 x i32> %4, ++ ret <4 x i32> %5 ++} ++ ++define <2 x i64> @lsxvavg_v2i64(<2 x i64> noundef %0, <2 x i64> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v2i64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <2 x i64> %0, ++ %4 = add <2 x i64> %3, %1 ++ %5 = sdiv <2 x i64> %4, ++ ret <2 x i64> %5 ++} ++ ++define <16 x i8> @lsxvavg_v16u8(<16 x i8> noundef %0, <16 x i8> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v16u8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <16 x i8> %0, ++ %4 = add <16 x i8> %3, %1 ++ %5 = lshr <16 x i8> %4, ++ ret <16 x i8> %5 ++} ++ ++define <8 x i16> @lsxvavg_v8u16(<8 x i16> noundef %0, <8 x i16> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v8u16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <8 x i16> %0, ++ %4 = add <8 x i16> %3, %1 ++ %5 = lshr <8 x i16> %4, ++ ret <8 x i16> %5 ++} ++ ++define <4 x i32> @lsxvavg_v4u32(<4 x i32> noundef %0, <4 x i32> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v4u32: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <4 x i32> %0, ++ %4 = add <4 x i32> %3, %1 ++ %5 = lshr <4 x i32> %4, ++ ret <4 x i32> %5 ++} ++ ++define <2 x i64> @lsxvavg_v2u64(<2 x i64> noundef %0, <2 x i64> noundef %1) local_unnamed_addr #0 { ++; CHECK-LABEL: lsxvavg_v2u64: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 ++; CHECK-NEXT: vldi $vr1, 1 ++; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 ++; CHECK-NEXT: jr $ra ++ %3 = add <2 x i64> %0, ++ %4 = add <2 x i64> %3, %1 ++ %5 = lshr <2 x i64> %4, ++ ret <2 x i64> %5 ++} +diff --git a/llvm/test/CodeGen/LoongArch/lsx/v16i8-bswap.ll b/llvm/test/CodeGen/LoongArch/lsx/v16i8-bswap.ll +new file mode 100644 +index 000000000..25e4eb072 +--- /dev/null ++++ b/llvm/test/CodeGen/LoongArch/lsx/v16i8-bswap.ll +@@ -0,0 +1,20 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s ++ ++define void @vshf_v16i8(ptr %res, ptr %a0) nounwind { ++; CHECK-LABEL: vshf_v16i8: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vld $vr0, $r5, 0 ++; CHECK-NEXT: vpickve2gr.d $r5, $vr0, 0 ++; CHECK-NEXT: vpickve2gr.d $r6, $vr0, 1 ++; CHECK-NEXT: revb.d $r6, $r6 ++; CHECK-NEXT: revb.d $r5, $r5 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $r5, 0 ++; CHECK-NEXT: vinsgr2vr.d $vr0, $r6, 1 ++; CHECK-NEXT: vst $vr0, $r4, 0 ++; CHECK-NEXT: jr $ra ++ %v1 = load <16 x i8>, ptr %a0 ++ %v2 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> ++ store <16 x i8> %v2, ptr %res ++ ret void ++} +diff --git a/llvm/test/CodeGen/Mips/atomic-fix-loongson3-llsc.ll b/llvm/test/CodeGen/Mips/atomic-fix-loongson3-llsc.ll +new file mode 100644 +index 000000000..a360bddb7 +--- /dev/null ++++ b/llvm/test/CodeGen/Mips/atomic-fix-loongson3-llsc.ll +@@ -0,0 +1,7548 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32O0 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32R2 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32R6 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32R6O0 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS4 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R2 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R6 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R6O0 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MM32 ++ ++; We want to verify the produced code is well formed all optimization levels, the rest of the tests which ensure correctness. ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mipsel-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 ++ ++; Keep one big-endian check so that we don't reduce testing, but don't add more ++; since endianness doesn't affect the body of the atomic operations. ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS32EB ++ ++@x = common global i32 0, align 4 ++ ++define i32 @AtomicLoadAdd32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadAdd32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB0_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: addu $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB0_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadAdd32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB0_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: addu $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB0_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadAdd32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB0_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: addu $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB0_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadAdd32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB0_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: addu $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB0_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadAdd32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB0_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: addu $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB0_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadAdd32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB0_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: addu $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB0_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAdd32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB0_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: addu $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB0_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAdd32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB0_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: addu $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB0_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAdd32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB0_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: addu $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB0_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAdd32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB0_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: addu $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadAdd32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB0_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: addu16 $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB0_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAdd32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB0_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: addu $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB0_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAdd32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB0_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: addu $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB0_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAdd32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB0_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: addu $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB0_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadAdd32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB0_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: addu $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB0_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i32* @x, i32 %incr monotonic ++ ret i32 %0 ++ ++} ++ ++define i32 @AtomicLoadSub32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadSub32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB1_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: subu $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB1_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadSub32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB1_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: subu $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB1_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadSub32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB1_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: subu $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB1_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadSub32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB1_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: subu $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB1_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadSub32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB1_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: subu $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB1_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadSub32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB1_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: subu $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB1_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadSub32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB1_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: subu $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB1_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadSub32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB1_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: subu $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB1_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadSub32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB1_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: subu $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB1_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadSub32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB1_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: subu $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadSub32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB1_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: subu16 $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB1_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadSub32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB1_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: subu $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB1_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadSub32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB1_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: subu $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB1_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadSub32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB1_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: subu $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB1_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadSub32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB1_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: subu $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB1_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw sub i32* @x, i32 %incr monotonic ++ ret i32 %0 ++ ++} ++ ++define i32 @AtomicLoadXor32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadXor32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB2_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: xor $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB2_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadXor32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB2_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: xor $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB2_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadXor32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB2_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: xor $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB2_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadXor32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB2_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: xor $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB2_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadXor32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB2_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: xor $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB2_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadXor32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB2_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: xor $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB2_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadXor32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB2_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: xor $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB2_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadXor32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB2_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: xor $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB2_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadXor32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB2_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: xor $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB2_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadXor32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB2_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: xor $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadXor32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB2_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: xor $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB2_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadXor32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB2_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: xor $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB2_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadXor32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB2_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: xor $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB2_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadXor32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB2_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: xor $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB2_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadXor32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB2_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: xor $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB2_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw xor i32* @x, i32 %incr monotonic ++ ret i32 %0 ++} ++ ++define i32 @AtomicLoadOr32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadOr32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB3_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: or $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB3_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadOr32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB3_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: or $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB3_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadOr32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB3_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: or $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB3_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadOr32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB3_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: or $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB3_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadOr32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB3_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: or $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB3_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadOr32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB3_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: or $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB3_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadOr32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB3_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: or $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB3_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadOr32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB3_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: or $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB3_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadOr32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB3_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: or $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB3_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadOr32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB3_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: or $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadOr32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB3_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: or $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB3_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadOr32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB3_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: or $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB3_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadOr32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB3_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: or $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB3_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadOr32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB3_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: or $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB3_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadOr32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB3_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: or $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB3_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw or i32* @x, i32 %incr monotonic ++ ret i32 %0 ++} ++ ++define i32 @AtomicLoadAnd32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadAnd32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB4_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: and $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB4_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadAnd32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB4_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: and $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB4_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadAnd32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB4_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: and $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB4_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadAnd32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB4_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: and $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB4_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadAnd32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB4_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: and $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB4_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadAnd32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB4_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: and $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB4_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAnd32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB4_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: and $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB4_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAnd32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB4_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: and $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB4_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAnd32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB4_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: and $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB4_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAnd32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB4_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: and $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadAnd32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB4_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: and $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB4_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAnd32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB4_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: and $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB4_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAnd32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB4_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: and $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB4_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAnd32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB4_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: and $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB4_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadAnd32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB4_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: and $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB4_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw and i32* @x, i32 %incr monotonic ++ ret i32 %0 ++} ++ ++define i32 @AtomicLoadNand32(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadNand32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB5_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: and $3, $2, $4 ++; MIPS32-NEXT: nor $3, $zero, $3 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB5_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadNand32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB5_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: and $1, $2, $4 ++; MIPS32O0-NEXT: nor $1, $zero, $1 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB5_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadNand32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB5_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: and $3, $2, $4 ++; MIPS32R2-NEXT: nor $3, $zero, $3 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB5_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadNand32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB5_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: and $3, $2, $4 ++; MIPS32R6-NEXT: nor $3, $zero, $3 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB5_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadNand32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB5_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: and $1, $2, $4 ++; MIPS32R6O0-NEXT: nor $1, $zero, $1 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB5_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadNand32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB5_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: and $3, $2, $4 ++; MIPS4-NEXT: nor $3, $zero, $3 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB5_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadNand32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB5_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: and $3, $2, $4 ++; MIPS64-NEXT: nor $3, $zero, $3 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB5_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadNand32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB5_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: and $3, $2, $4 ++; MIPS64R2-NEXT: nor $3, $zero, $3 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB5_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadNand32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB5_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: and $3, $2, $4 ++; MIPS64R6-NEXT: nor $3, $zero, $3 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB5_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadNand32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB5_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: and $1, $2, $4 ++; MIPS64R6O0-NEXT: nor $1, $zero, $1 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadNand32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB5_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: and $3, $2, $4 ++; MM32-NEXT: nor $3, $zero, $3 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB5_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadNand32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB5_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: and $3, $2, $4 ++; O1-NEXT: nor $3, $zero, $3 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB5_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadNand32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB5_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: and $3, $2, $4 ++; O2-NEXT: nor $3, $zero, $3 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB5_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadNand32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB5_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: and $3, $2, $4 ++; O3-NEXT: nor $3, $zero, $3 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB5_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadNand32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB5_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: and $3, $2, $4 ++; MIPS32EB-NEXT: nor $3, $zero, $3 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB5_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw nand i32* @x, i32 %incr monotonic ++ ret i32 %0 ++ ++} ++ ++define i32 @AtomicSwap32(i32 signext %newval) nounwind { ++; MIPS32-LABEL: AtomicSwap32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addiu $sp, $sp, -8 ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: sw $4, 4($sp) ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB6_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: move $3, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB6_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32O0-LABEL: AtomicSwap32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: sw $4, 4($sp) ++; MIPS32O0-NEXT: lw $4, 4($sp) ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: $BB6_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: move $1, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB6_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicSwap32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addiu $sp, $sp, -8 ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: sw $4, 4($sp) ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB6_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: move $3, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB6_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32R6-LABEL: AtomicSwap32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: sw $4, 4($sp) ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB6_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: move $3, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB6_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jr $ra ++; MIPS32R6-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32R6O0-LABEL: AtomicSwap32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: sw $4, 4($sp) ++; MIPS32R6O0-NEXT: lw $4, 4($sp) ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB6_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: move $1, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB6_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicSwap32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: daddiu $sp, $sp, -16 ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ++; MIPS4-NEXT: sw $4, 12($sp) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB6_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: move $3, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB6_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64-LABEL: AtomicSwap32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: daddiu $sp, $sp, -16 ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64-NEXT: sw $4, 12($sp) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB6_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: move $3, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB6_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R2-LABEL: AtomicSwap32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R2-NEXT: sw $4, 12($sp) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB6_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: move $3, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB6_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6-LABEL: AtomicSwap32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R6-NEXT: sw $4, 12($sp) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB6_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: move $3, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB6_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jr $ra ++; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6O0-LABEL: AtomicSwap32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) ++; MIPS64R6O0-NEXT: move $2, $4 ++; MIPS64R6O0-NEXT: sw $2, 12($sp) ++; MIPS64R6O0-NEXT: lw $4, 12($sp) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB6_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicSwap32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addiu $sp, $sp, -8 ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: sw $4, 4($sp) ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB6_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: or $3, $4, $zero ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB6_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: addiusp 8 ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicSwap32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addiu $sp, $sp, -8 ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: sw $4, 4($sp) ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB6_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: move $3, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB6_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: addiu $sp, $sp, 8 ++; ++; O2-LABEL: AtomicSwap32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addiu $sp, $sp, -8 ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: sw $4, 4($sp) ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB6_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: move $3, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB6_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: addiu $sp, $sp, 8 ++; ++; O3-LABEL: AtomicSwap32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addiu $sp, $sp, -8 ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: sw $4, 4($sp) ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB6_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: move $3, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB6_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32EB-LABEL: AtomicSwap32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addiu $sp, $sp, -8 ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: sw $4, 4($sp) ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB6_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: move $3, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB6_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: addiu $sp, $sp, 8 ++entry: ++ %newval.addr = alloca i32, align 4 ++ store i32 %newval, i32* %newval.addr, align 4 ++ %tmp = load i32, i32* %newval.addr, align 4 ++ %0 = atomicrmw xchg i32* @x, i32 %tmp monotonic ++ ret i32 %0 ++ ++} ++ ++define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind { ++; MIPS32-LABEL: AtomicCmpSwap32: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addiu $sp, $sp, -8 ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: sw $5, 4($sp) ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: $BB7_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: bne $2, $4, $BB7_3 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32-NEXT: move $3, $5 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB7_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: $BB7_3: # %entry ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32O0-LABEL: AtomicCmpSwap32: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: sw $5, 4($sp) ++; MIPS32O0-NEXT: lw $6, 4($sp) ++; MIPS32O0-NEXT: lw $3, %got(x)($1) ++; MIPS32O0-NEXT: move $5, $4 ++; MIPS32O0-NEXT: $BB7_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: bne $2, $5, $BB7_3 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32O0-NEXT: move $1, $6 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB7_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: $BB7_3: # %entry ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: xor $1, $2, $4 ++; MIPS32O0-NEXT: sltiu $1, $1, 1 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicCmpSwap32: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addiu $sp, $sp, -8 ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: sw $5, 4($sp) ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: $BB7_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: bne $2, $4, $BB7_3 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32R2-NEXT: move $3, $5 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB7_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: $BB7_3: # %entry ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32R6-LABEL: AtomicCmpSwap32: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: sw $5, 4($sp) ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: $BB7_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: bnec $2, $4, $BB7_3 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32R6-NEXT: move $3, $5 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB7_1 ++; MIPS32R6-NEXT: $BB7_3: # %entry ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: jr $ra ++; MIPS32R6-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32R6O0-LABEL: AtomicCmpSwap32: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: sw $5, 4($sp) ++; MIPS32R6O0-NEXT: lw $5, 4($sp) ++; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ++; MIPS32R6O0-NEXT: $BB7_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: bnec $2, $4, $BB7_3 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32R6O0-NEXT: move $1, $5 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB7_1 ++; MIPS32R6O0-NEXT: $BB7_3: # %entry ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicCmpSwap32: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: daddiu $sp, $sp, -16 ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS4-NEXT: sw $5, 12($sp) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB7_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: bne $2, $4, .LBB7_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS4-NEXT: move $3, $5 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB7_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB7_3: # %entry ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64-LABEL: AtomicCmpSwap32: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: daddiu $sp, $sp, -16 ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64-NEXT: sw $5, 12($sp) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB7_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: bne $2, $4, .LBB7_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64-NEXT: move $3, $5 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB7_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB7_3: # %entry ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R2-LABEL: AtomicCmpSwap32: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R2-NEXT: sw $5, 12($sp) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB7_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: bne $2, $4, .LBB7_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R2-NEXT: move $3, $5 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB7_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB7_3: # %entry ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6-LABEL: AtomicCmpSwap32: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R6-NEXT: sw $5, 12($sp) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB7_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: bnec $2, $4, .LBB7_3 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R6-NEXT: move $3, $5 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB7_1 ++; MIPS64R6-NEXT: .LBB7_3: # %entry ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: jr $ra ++; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6O0-LABEL: AtomicCmpSwap32: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: move $2, $5 ++; MIPS64R6O0-NEXT: sw $2, 12($sp) ++; MIPS64R6O0-NEXT: lw $5, 12($sp) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB7_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R6O0-NEXT: move $1, $5 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 ++; MIPS64R6O0-NEXT: .LBB7_3: # %entry ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicCmpSwap32: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addiu $sp, $sp, -8 ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: sw $5, 4($sp) ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: $BB7_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: bne $2, $4, $BB7_3 ++; MM32-NEXT: nop ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MM32-NEXT: move $3, $5 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB7_1 ++; MM32-NEXT: $BB7_3: # %entry ++; MM32-NEXT: sync ++; MM32-NEXT: addiusp 8 ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicCmpSwap32: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addiu $sp, $sp, -8 ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: sw $5, 4($sp) ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: $BB7_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: bne $2, $4, $BB7_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O1-NEXT: move $3, $5 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB7_1 ++; O1-NEXT: nop ++; O1-NEXT: $BB7_3: # %entry ++; O1-NEXT: sync ++; O1-NEXT: jr $ra ++; O1-NEXT: addiu $sp, $sp, 8 ++; ++; O2-LABEL: AtomicCmpSwap32: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addiu $sp, $sp, -8 ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: sw $5, 4($sp) ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: $BB7_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: bne $2, $4, $BB7_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O2-NEXT: move $3, $5 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB7_1 ++; O2-NEXT: nop ++; O2-NEXT: $BB7_3: # %entry ++; O2-NEXT: sync ++; O2-NEXT: jr $ra ++; O2-NEXT: addiu $sp, $sp, 8 ++; ++; O3-LABEL: AtomicCmpSwap32: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addiu $sp, $sp, -8 ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: sw $5, 4($sp) ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: $BB7_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: bne $2, $4, $BB7_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O3-NEXT: move $3, $5 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB7_1 ++; O3-NEXT: nop ++; O3-NEXT: $BB7_3: # %entry ++; O3-NEXT: sync ++; O3-NEXT: jr $ra ++; O3-NEXT: addiu $sp, $sp, 8 ++; ++; MIPS32EB-LABEL: AtomicCmpSwap32: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addiu $sp, $sp, -8 ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: sw $5, 4($sp) ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: $BB7_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: bne $2, $4, $BB7_3 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS32EB-NEXT: move $3, $5 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB7_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: $BB7_3: # %entry ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: addiu $sp, $sp, 8 ++entry: ++ %newval.addr = alloca i32, align 4 ++ store i32 %newval, i32* %newval.addr, align 4 ++ %tmp = load i32, i32* %newval.addr, align 4 ++ %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic monotonic ++ %1 = extractvalue { i32, i1 } %0, 0 ++ ret i32 %1 ++ ++} ++ ++@y = common global i8 0, align 1 ++ ++define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadAdd8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(y)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 255 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: sllv $4, $4, $1 ++; MIPS32-NEXT: $BB8_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $7, 0($3) ++; MIPS32-NEXT: addu $8, $7, $4 ++; MIPS32-NEXT: and $8, $8, $5 ++; MIPS32-NEXT: and $9, $7, $6 ++; MIPS32-NEXT: or $9, $9, $8 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB8_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: and $2, $7, $5 ++; MIPS32-NEXT: srlv $2, $2, $1 ++; MIPS32-NEXT: sll $2, $2, 24 ++; MIPS32-NEXT: sra $2, $2, 24 ++; MIPS32-NEXT: # %bb.3: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadAdd8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(y)($1) ++; MIPS32O0-NEXT: addiu $2, $zero, -4 ++; MIPS32O0-NEXT: and $5, $1, $2 ++; MIPS32O0-NEXT: andi $1, $1, 3 ++; MIPS32O0-NEXT: sll $9, $1, 3 ++; MIPS32O0-NEXT: ori $1, $zero, 255 ++; MIPS32O0-NEXT: sllv $7, $1, $9 ++; MIPS32O0-NEXT: nor $8, $zero, $7 ++; MIPS32O0-NEXT: sllv $6, $4, $9 ++; MIPS32O0-NEXT: $BB8_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($5) ++; MIPS32O0-NEXT: addu $3, $2, $6 ++; MIPS32O0-NEXT: and $3, $3, $7 ++; MIPS32O0-NEXT: and $4, $2, $8 ++; MIPS32O0-NEXT: or $4, $4, $3 ++; MIPS32O0-NEXT: sc $4, 0($5) ++; MIPS32O0-NEXT: beqz $4, $BB8_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: and $1, $2, $7 ++; MIPS32O0-NEXT: srlv $1, $1, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.3: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $2, $1, 24 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadAdd8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(y)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 255 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: sllv $4, $4, $1 ++; MIPS32R2-NEXT: $BB8_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $7, 0($3) ++; MIPS32R2-NEXT: addu $8, $7, $4 ++; MIPS32R2-NEXT: and $8, $8, $5 ++; MIPS32R2-NEXT: and $9, $7, $6 ++; MIPS32R2-NEXT: or $9, $9, $8 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB8_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: and $2, $7, $5 ++; MIPS32R2-NEXT: srlv $2, $2, $1 ++; MIPS32R2-NEXT: seb $2, $2 ++; MIPS32R2-NEXT: # %bb.3: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadAdd8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(y)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 255 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: sllv $4, $4, $1 ++; MIPS32R6-NEXT: $BB8_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $7, 0($3) ++; MIPS32R6-NEXT: addu $8, $7, $4 ++; MIPS32R6-NEXT: and $8, $8, $5 ++; MIPS32R6-NEXT: and $9, $7, $6 ++; MIPS32R6-NEXT: or $9, $9, $8 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB8_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: and $2, $7, $5 ++; MIPS32R6-NEXT: srlv $2, $2, $1 ++; MIPS32R6-NEXT: seb $2, $2 ++; MIPS32R6-NEXT: # %bb.3: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadAdd8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ++; MIPS32R6O0-NEXT: lw $1, %got(y)($1) ++; MIPS32R6O0-NEXT: addiu $2, $zero, -4 ++; MIPS32R6O0-NEXT: and $5, $1, $2 ++; MIPS32R6O0-NEXT: andi $1, $1, 3 ++; MIPS32R6O0-NEXT: sll $9, $1, 3 ++; MIPS32R6O0-NEXT: ori $1, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $7, $1, $9 ++; MIPS32R6O0-NEXT: nor $8, $zero, $7 ++; MIPS32R6O0-NEXT: sllv $6, $4, $9 ++; MIPS32R6O0-NEXT: $BB8_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($5) ++; MIPS32R6O0-NEXT: addu $3, $2, $6 ++; MIPS32R6O0-NEXT: and $3, $3, $7 ++; MIPS32R6O0-NEXT: and $4, $2, $8 ++; MIPS32R6O0-NEXT: or $4, $4, $3 ++; MIPS32R6O0-NEXT: sc $4, 0($5) ++; MIPS32R6O0-NEXT: beqzc $4, $BB8_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: and $1, $2, $7 ++; MIPS32R6O0-NEXT: srlv $1, $1, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.3: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadAdd8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS4-NEXT: ld $1, %got_disp(y)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 255 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: sllv $4, $4, $1 ++; MIPS4-NEXT: .LBB8_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $7, 0($3) ++; MIPS4-NEXT: addu $8, $7, $4 ++; MIPS4-NEXT: and $8, $8, $5 ++; MIPS4-NEXT: and $9, $7, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB8_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: and $2, $7, $5 ++; MIPS4-NEXT: srlv $2, $2, $1 ++; MIPS4-NEXT: sll $2, $2, 24 ++; MIPS4-NEXT: sra $2, $2, 24 ++; MIPS4-NEXT: # %bb.3: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAdd8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 255 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: sllv $4, $4, $1 ++; MIPS64-NEXT: .LBB8_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $7, 0($3) ++; MIPS64-NEXT: addu $8, $7, $4 ++; MIPS64-NEXT: and $8, $8, $5 ++; MIPS64-NEXT: and $9, $7, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB8_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: and $2, $7, $5 ++; MIPS64-NEXT: srlv $2, $2, $1 ++; MIPS64-NEXT: sll $2, $2, 24 ++; MIPS64-NEXT: sra $2, $2, 24 ++; MIPS64-NEXT: # %bb.3: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAdd8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 255 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: sllv $4, $4, $1 ++; MIPS64R2-NEXT: .LBB8_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $7, 0($3) ++; MIPS64R2-NEXT: addu $8, $7, $4 ++; MIPS64R2-NEXT: and $8, $8, $5 ++; MIPS64R2-NEXT: and $9, $7, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB8_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: and $2, $7, $5 ++; MIPS64R2-NEXT: srlv $2, $2, $1 ++; MIPS64R2-NEXT: seb $2, $2 ++; MIPS64R2-NEXT: # %bb.3: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAdd8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 255 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: sllv $4, $4, $1 ++; MIPS64R6-NEXT: .LBB8_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $7, 0($3) ++; MIPS64R6-NEXT: addu $8, $7, $4 ++; MIPS64R6-NEXT: and $8, $8, $5 ++; MIPS64R6-NEXT: and $9, $7, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB8_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: and $2, $7, $5 ++; MIPS64R6-NEXT: srlv $2, $2, $1 ++; MIPS64R6-NEXT: seb $2, $2 ++; MIPS64R6-NEXT: # %bb.3: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAdd8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) ++; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6O0-NEXT: and $5, $2, $3 ++; MIPS64R6O0-NEXT: andi $2, $2, 3 ++; MIPS64R6O0-NEXT: xori $2, $2, 3 ++; MIPS64R6O0-NEXT: sll $9, $2, 3 ++; MIPS64R6O0-NEXT: ori $2, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $7, $2, $9 ++; MIPS64R6O0-NEXT: nor $8, $zero, $7 ++; MIPS64R6O0-NEXT: sllv $6, $1, $9 ++; MIPS64R6O0-NEXT: .LBB8_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($5) ++; MIPS64R6O0-NEXT: addu $3, $2, $6 ++; MIPS64R6O0-NEXT: and $3, $3, $7 ++; MIPS64R6O0-NEXT: and $4, $2, $8 ++; MIPS64R6O0-NEXT: or $4, $4, $3 ++; MIPS64R6O0-NEXT: sc $4, 0($5) ++; MIPS64R6O0-NEXT: beqzc $4, .LBB8_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: and $1, $2, $7 ++; MIPS64R6O0-NEXT: srlv $1, $1, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.3: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadAdd8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(y)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 255 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: sllv $4, $4, $1 ++; MM32-NEXT: $BB8_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $7, 0($3) ++; MM32-NEXT: addu $8, $7, $4 ++; MM32-NEXT: and $8, $8, $5 ++; MM32-NEXT: and $9, $7, $6 ++; MM32-NEXT: or $9, $9, $8 ++; MM32-NEXT: sc $9, 0($3) ++; MM32-NEXT: beqzc $9, $BB8_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: and $2, $7, $5 ++; MM32-NEXT: srlv $2, $2, $1 ++; MM32-NEXT: seb $2, $2 ++; MM32-NEXT: # %bb.3: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAdd8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(y)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 255 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: sllv $4, $4, $1 ++; O1-NEXT: $BB8_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $7, 0($3) ++; O1-NEXT: addu $8, $7, $4 ++; O1-NEXT: and $8, $8, $5 ++; O1-NEXT: and $9, $7, $6 ++; O1-NEXT: or $9, $9, $8 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB8_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: and $2, $7, $5 ++; O1-NEXT: srlv $2, $2, $1 ++; O1-NEXT: sll $2, $2, 24 ++; O1-NEXT: sra $2, $2, 24 ++; O1-NEXT: # %bb.3: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAdd8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(y)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 255 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: sllv $4, $4, $1 ++; O2-NEXT: $BB8_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $7, 0($3) ++; O2-NEXT: addu $8, $7, $4 ++; O2-NEXT: and $8, $8, $5 ++; O2-NEXT: and $9, $7, $6 ++; O2-NEXT: or $9, $9, $8 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB8_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: and $2, $7, $5 ++; O2-NEXT: srlv $2, $2, $1 ++; O2-NEXT: sll $2, $2, 24 ++; O2-NEXT: sra $2, $2, 24 ++; O2-NEXT: # %bb.3: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAdd8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(y)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 255 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: sllv $4, $4, $1 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: $BB8_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $7, 0($3) ++; O3-NEXT: addu $8, $7, $4 ++; O3-NEXT: and $8, $8, $5 ++; O3-NEXT: and $9, $7, $6 ++; O3-NEXT: or $9, $9, $8 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB8_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: and $2, $7, $5 ++; O3-NEXT: srlv $2, $2, $1 ++; O3-NEXT: sll $2, $2, 24 ++; O3-NEXT: sra $2, $2, 24 ++; O3-NEXT: # %bb.3: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadAdd8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(y)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 255 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: sllv $4, $4, $1 ++; MIPS32EB-NEXT: $BB8_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $7, 0($3) ++; MIPS32EB-NEXT: addu $8, $7, $4 ++; MIPS32EB-NEXT: and $8, $8, $5 ++; MIPS32EB-NEXT: and $9, $7, $6 ++; MIPS32EB-NEXT: or $9, $9, $8 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB8_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: and $2, $7, $5 ++; MIPS32EB-NEXT: srlv $2, $2, $1 ++; MIPS32EB-NEXT: sll $2, $2, 24 ++; MIPS32EB-NEXT: sra $2, $2, 24 ++; MIPS32EB-NEXT: # %bb.3: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i8* @y, i8 %incr monotonic ++ ret i8 %0 ++} ++ ++define signext i8 @AtomicLoadSub8(i8 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadSub8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(y)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 255 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: sllv $4, $4, $1 ++; MIPS32-NEXT: $BB9_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $7, 0($3) ++; MIPS32-NEXT: subu $8, $7, $4 ++; MIPS32-NEXT: and $8, $8, $5 ++; MIPS32-NEXT: and $9, $7, $6 ++; MIPS32-NEXT: or $9, $9, $8 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB9_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: and $2, $7, $5 ++; MIPS32-NEXT: srlv $2, $2, $1 ++; MIPS32-NEXT: sll $2, $2, 24 ++; MIPS32-NEXT: sra $2, $2, 24 ++; MIPS32-NEXT: # %bb.3: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadSub8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(y)($1) ++; MIPS32O0-NEXT: addiu $2, $zero, -4 ++; MIPS32O0-NEXT: and $5, $1, $2 ++; MIPS32O0-NEXT: andi $1, $1, 3 ++; MIPS32O0-NEXT: sll $9, $1, 3 ++; MIPS32O0-NEXT: ori $1, $zero, 255 ++; MIPS32O0-NEXT: sllv $7, $1, $9 ++; MIPS32O0-NEXT: nor $8, $zero, $7 ++; MIPS32O0-NEXT: sllv $6, $4, $9 ++; MIPS32O0-NEXT: $BB9_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($5) ++; MIPS32O0-NEXT: subu $3, $2, $6 ++; MIPS32O0-NEXT: and $3, $3, $7 ++; MIPS32O0-NEXT: and $4, $2, $8 ++; MIPS32O0-NEXT: or $4, $4, $3 ++; MIPS32O0-NEXT: sc $4, 0($5) ++; MIPS32O0-NEXT: beqz $4, $BB9_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: and $1, $2, $7 ++; MIPS32O0-NEXT: srlv $1, $1, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.3: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $2, $1, 24 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadSub8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(y)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 255 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: sllv $4, $4, $1 ++; MIPS32R2-NEXT: $BB9_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $7, 0($3) ++; MIPS32R2-NEXT: subu $8, $7, $4 ++; MIPS32R2-NEXT: and $8, $8, $5 ++; MIPS32R2-NEXT: and $9, $7, $6 ++; MIPS32R2-NEXT: or $9, $9, $8 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB9_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: and $2, $7, $5 ++; MIPS32R2-NEXT: srlv $2, $2, $1 ++; MIPS32R2-NEXT: seb $2, $2 ++; MIPS32R2-NEXT: # %bb.3: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadSub8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(y)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 255 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: sllv $4, $4, $1 ++; MIPS32R6-NEXT: $BB9_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $7, 0($3) ++; MIPS32R6-NEXT: subu $8, $7, $4 ++; MIPS32R6-NEXT: and $8, $8, $5 ++; MIPS32R6-NEXT: and $9, $7, $6 ++; MIPS32R6-NEXT: or $9, $9, $8 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB9_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: and $2, $7, $5 ++; MIPS32R6-NEXT: srlv $2, $2, $1 ++; MIPS32R6-NEXT: seb $2, $2 ++; MIPS32R6-NEXT: # %bb.3: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadSub8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ++; MIPS32R6O0-NEXT: lw $1, %got(y)($1) ++; MIPS32R6O0-NEXT: addiu $2, $zero, -4 ++; MIPS32R6O0-NEXT: and $5, $1, $2 ++; MIPS32R6O0-NEXT: andi $1, $1, 3 ++; MIPS32R6O0-NEXT: sll $9, $1, 3 ++; MIPS32R6O0-NEXT: ori $1, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $7, $1, $9 ++; MIPS32R6O0-NEXT: nor $8, $zero, $7 ++; MIPS32R6O0-NEXT: sllv $6, $4, $9 ++; MIPS32R6O0-NEXT: $BB9_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($5) ++; MIPS32R6O0-NEXT: subu $3, $2, $6 ++; MIPS32R6O0-NEXT: and $3, $3, $7 ++; MIPS32R6O0-NEXT: and $4, $2, $8 ++; MIPS32R6O0-NEXT: or $4, $4, $3 ++; MIPS32R6O0-NEXT: sc $4, 0($5) ++; MIPS32R6O0-NEXT: beqzc $4, $BB9_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: and $1, $2, $7 ++; MIPS32R6O0-NEXT: srlv $1, $1, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.3: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadSub8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS4-NEXT: ld $1, %got_disp(y)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 255 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: sllv $4, $4, $1 ++; MIPS4-NEXT: .LBB9_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $7, 0($3) ++; MIPS4-NEXT: subu $8, $7, $4 ++; MIPS4-NEXT: and $8, $8, $5 ++; MIPS4-NEXT: and $9, $7, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB9_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: and $2, $7, $5 ++; MIPS4-NEXT: srlv $2, $2, $1 ++; MIPS4-NEXT: sll $2, $2, 24 ++; MIPS4-NEXT: sra $2, $2, 24 ++; MIPS4-NEXT: # %bb.3: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadSub8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 255 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: sllv $4, $4, $1 ++; MIPS64-NEXT: .LBB9_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $7, 0($3) ++; MIPS64-NEXT: subu $8, $7, $4 ++; MIPS64-NEXT: and $8, $8, $5 ++; MIPS64-NEXT: and $9, $7, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB9_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: and $2, $7, $5 ++; MIPS64-NEXT: srlv $2, $2, $1 ++; MIPS64-NEXT: sll $2, $2, 24 ++; MIPS64-NEXT: sra $2, $2, 24 ++; MIPS64-NEXT: # %bb.3: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadSub8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 255 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: sllv $4, $4, $1 ++; MIPS64R2-NEXT: .LBB9_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $7, 0($3) ++; MIPS64R2-NEXT: subu $8, $7, $4 ++; MIPS64R2-NEXT: and $8, $8, $5 ++; MIPS64R2-NEXT: and $9, $7, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB9_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: and $2, $7, $5 ++; MIPS64R2-NEXT: srlv $2, $2, $1 ++; MIPS64R2-NEXT: seb $2, $2 ++; MIPS64R2-NEXT: # %bb.3: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadSub8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 255 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: sllv $4, $4, $1 ++; MIPS64R6-NEXT: .LBB9_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $7, 0($3) ++; MIPS64R6-NEXT: subu $8, $7, $4 ++; MIPS64R6-NEXT: and $8, $8, $5 ++; MIPS64R6-NEXT: and $9, $7, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB9_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: and $2, $7, $5 ++; MIPS64R6-NEXT: srlv $2, $2, $1 ++; MIPS64R6-NEXT: seb $2, $2 ++; MIPS64R6-NEXT: # %bb.3: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadSub8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) ++; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6O0-NEXT: and $5, $2, $3 ++; MIPS64R6O0-NEXT: andi $2, $2, 3 ++; MIPS64R6O0-NEXT: xori $2, $2, 3 ++; MIPS64R6O0-NEXT: sll $9, $2, 3 ++; MIPS64R6O0-NEXT: ori $2, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $7, $2, $9 ++; MIPS64R6O0-NEXT: nor $8, $zero, $7 ++; MIPS64R6O0-NEXT: sllv $6, $1, $9 ++; MIPS64R6O0-NEXT: .LBB9_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($5) ++; MIPS64R6O0-NEXT: subu $3, $2, $6 ++; MIPS64R6O0-NEXT: and $3, $3, $7 ++; MIPS64R6O0-NEXT: and $4, $2, $8 ++; MIPS64R6O0-NEXT: or $4, $4, $3 ++; MIPS64R6O0-NEXT: sc $4, 0($5) ++; MIPS64R6O0-NEXT: beqzc $4, .LBB9_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: and $1, $2, $7 ++; MIPS64R6O0-NEXT: srlv $1, $1, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.3: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadSub8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(y)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 255 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: sllv $4, $4, $1 ++; MM32-NEXT: $BB9_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $7, 0($3) ++; MM32-NEXT: subu $8, $7, $4 ++; MM32-NEXT: and $8, $8, $5 ++; MM32-NEXT: and $9, $7, $6 ++; MM32-NEXT: or $9, $9, $8 ++; MM32-NEXT: sc $9, 0($3) ++; MM32-NEXT: beqzc $9, $BB9_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: and $2, $7, $5 ++; MM32-NEXT: srlv $2, $2, $1 ++; MM32-NEXT: seb $2, $2 ++; MM32-NEXT: # %bb.3: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadSub8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(y)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 255 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: sllv $4, $4, $1 ++; O1-NEXT: $BB9_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $7, 0($3) ++; O1-NEXT: subu $8, $7, $4 ++; O1-NEXT: and $8, $8, $5 ++; O1-NEXT: and $9, $7, $6 ++; O1-NEXT: or $9, $9, $8 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB9_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: and $2, $7, $5 ++; O1-NEXT: srlv $2, $2, $1 ++; O1-NEXT: sll $2, $2, 24 ++; O1-NEXT: sra $2, $2, 24 ++; O1-NEXT: # %bb.3: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadSub8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(y)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 255 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: sllv $4, $4, $1 ++; O2-NEXT: $BB9_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $7, 0($3) ++; O2-NEXT: subu $8, $7, $4 ++; O2-NEXT: and $8, $8, $5 ++; O2-NEXT: and $9, $7, $6 ++; O2-NEXT: or $9, $9, $8 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB9_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: and $2, $7, $5 ++; O2-NEXT: srlv $2, $2, $1 ++; O2-NEXT: sll $2, $2, 24 ++; O2-NEXT: sra $2, $2, 24 ++; O2-NEXT: # %bb.3: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadSub8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(y)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 255 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: sllv $4, $4, $1 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: $BB9_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $7, 0($3) ++; O3-NEXT: subu $8, $7, $4 ++; O3-NEXT: and $8, $8, $5 ++; O3-NEXT: and $9, $7, $6 ++; O3-NEXT: or $9, $9, $8 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB9_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: and $2, $7, $5 ++; O3-NEXT: srlv $2, $2, $1 ++; O3-NEXT: sll $2, $2, 24 ++; O3-NEXT: sra $2, $2, 24 ++; O3-NEXT: # %bb.3: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadSub8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(y)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 255 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: sllv $4, $4, $1 ++; MIPS32EB-NEXT: $BB9_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $7, 0($3) ++; MIPS32EB-NEXT: subu $8, $7, $4 ++; MIPS32EB-NEXT: and $8, $8, $5 ++; MIPS32EB-NEXT: and $9, $7, $6 ++; MIPS32EB-NEXT: or $9, $9, $8 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB9_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: and $2, $7, $5 ++; MIPS32EB-NEXT: srlv $2, $2, $1 ++; MIPS32EB-NEXT: sll $2, $2, 24 ++; MIPS32EB-NEXT: sra $2, $2, 24 ++; MIPS32EB-NEXT: # %bb.3: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw sub i8* @y, i8 %incr monotonic ++ ret i8 %0 ++ ++} ++ ++define signext i8 @AtomicLoadNand8(i8 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadNand8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(y)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 255 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: sllv $4, $4, $1 ++; MIPS32-NEXT: $BB10_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $7, 0($3) ++; MIPS32-NEXT: and $8, $7, $4 ++; MIPS32-NEXT: nor $8, $zero, $8 ++; MIPS32-NEXT: and $8, $8, $5 ++; MIPS32-NEXT: and $9, $7, $6 ++; MIPS32-NEXT: or $9, $9, $8 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB10_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: and $2, $7, $5 ++; MIPS32-NEXT: srlv $2, $2, $1 ++; MIPS32-NEXT: sll $2, $2, 24 ++; MIPS32-NEXT: sra $2, $2, 24 ++; MIPS32-NEXT: # %bb.3: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadNand8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(y)($1) ++; MIPS32O0-NEXT: addiu $2, $zero, -4 ++; MIPS32O0-NEXT: and $5, $1, $2 ++; MIPS32O0-NEXT: andi $1, $1, 3 ++; MIPS32O0-NEXT: sll $9, $1, 3 ++; MIPS32O0-NEXT: ori $1, $zero, 255 ++; MIPS32O0-NEXT: sllv $7, $1, $9 ++; MIPS32O0-NEXT: nor $8, $zero, $7 ++; MIPS32O0-NEXT: sllv $6, $4, $9 ++; MIPS32O0-NEXT: $BB10_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($5) ++; MIPS32O0-NEXT: and $3, $2, $6 ++; MIPS32O0-NEXT: nor $3, $zero, $3 ++; MIPS32O0-NEXT: and $3, $3, $7 ++; MIPS32O0-NEXT: and $4, $2, $8 ++; MIPS32O0-NEXT: or $4, $4, $3 ++; MIPS32O0-NEXT: sc $4, 0($5) ++; MIPS32O0-NEXT: beqz $4, $BB10_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: and $1, $2, $7 ++; MIPS32O0-NEXT: srlv $1, $1, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.3: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $2, $1, 24 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadNand8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(y)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 255 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: sllv $4, $4, $1 ++; MIPS32R2-NEXT: $BB10_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $7, 0($3) ++; MIPS32R2-NEXT: and $8, $7, $4 ++; MIPS32R2-NEXT: nor $8, $zero, $8 ++; MIPS32R2-NEXT: and $8, $8, $5 ++; MIPS32R2-NEXT: and $9, $7, $6 ++; MIPS32R2-NEXT: or $9, $9, $8 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB10_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: and $2, $7, $5 ++; MIPS32R2-NEXT: srlv $2, $2, $1 ++; MIPS32R2-NEXT: seb $2, $2 ++; MIPS32R2-NEXT: # %bb.3: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadNand8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(y)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 255 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: sllv $4, $4, $1 ++; MIPS32R6-NEXT: $BB10_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $7, 0($3) ++; MIPS32R6-NEXT: and $8, $7, $4 ++; MIPS32R6-NEXT: nor $8, $zero, $8 ++; MIPS32R6-NEXT: and $8, $8, $5 ++; MIPS32R6-NEXT: and $9, $7, $6 ++; MIPS32R6-NEXT: or $9, $9, $8 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB10_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: and $2, $7, $5 ++; MIPS32R6-NEXT: srlv $2, $2, $1 ++; MIPS32R6-NEXT: seb $2, $2 ++; MIPS32R6-NEXT: # %bb.3: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadNand8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ++; MIPS32R6O0-NEXT: lw $1, %got(y)($1) ++; MIPS32R6O0-NEXT: addiu $2, $zero, -4 ++; MIPS32R6O0-NEXT: and $5, $1, $2 ++; MIPS32R6O0-NEXT: andi $1, $1, 3 ++; MIPS32R6O0-NEXT: sll $9, $1, 3 ++; MIPS32R6O0-NEXT: ori $1, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $7, $1, $9 ++; MIPS32R6O0-NEXT: nor $8, $zero, $7 ++; MIPS32R6O0-NEXT: sllv $6, $4, $9 ++; MIPS32R6O0-NEXT: $BB10_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($5) ++; MIPS32R6O0-NEXT: and $3, $2, $6 ++; MIPS32R6O0-NEXT: nor $3, $zero, $3 ++; MIPS32R6O0-NEXT: and $3, $3, $7 ++; MIPS32R6O0-NEXT: and $4, $2, $8 ++; MIPS32R6O0-NEXT: or $4, $4, $3 ++; MIPS32R6O0-NEXT: sc $4, 0($5) ++; MIPS32R6O0-NEXT: beqzc $4, $BB10_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: and $1, $2, $7 ++; MIPS32R6O0-NEXT: srlv $1, $1, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.3: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadNand8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS4-NEXT: ld $1, %got_disp(y)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 255 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: sllv $4, $4, $1 ++; MIPS4-NEXT: .LBB10_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $7, 0($3) ++; MIPS4-NEXT: and $8, $7, $4 ++; MIPS4-NEXT: nor $8, $zero, $8 ++; MIPS4-NEXT: and $8, $8, $5 ++; MIPS4-NEXT: and $9, $7, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB10_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: and $2, $7, $5 ++; MIPS4-NEXT: srlv $2, $2, $1 ++; MIPS4-NEXT: sll $2, $2, 24 ++; MIPS4-NEXT: sra $2, $2, 24 ++; MIPS4-NEXT: # %bb.3: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadNand8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 255 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: sllv $4, $4, $1 ++; MIPS64-NEXT: .LBB10_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $7, 0($3) ++; MIPS64-NEXT: and $8, $7, $4 ++; MIPS64-NEXT: nor $8, $zero, $8 ++; MIPS64-NEXT: and $8, $8, $5 ++; MIPS64-NEXT: and $9, $7, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB10_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: and $2, $7, $5 ++; MIPS64-NEXT: srlv $2, $2, $1 ++; MIPS64-NEXT: sll $2, $2, 24 ++; MIPS64-NEXT: sra $2, $2, 24 ++; MIPS64-NEXT: # %bb.3: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadNand8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 255 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: sllv $4, $4, $1 ++; MIPS64R2-NEXT: .LBB10_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $7, 0($3) ++; MIPS64R2-NEXT: and $8, $7, $4 ++; MIPS64R2-NEXT: nor $8, $zero, $8 ++; MIPS64R2-NEXT: and $8, $8, $5 ++; MIPS64R2-NEXT: and $9, $7, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB10_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: and $2, $7, $5 ++; MIPS64R2-NEXT: srlv $2, $2, $1 ++; MIPS64R2-NEXT: seb $2, $2 ++; MIPS64R2-NEXT: # %bb.3: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadNand8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 255 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: sllv $4, $4, $1 ++; MIPS64R6-NEXT: .LBB10_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $7, 0($3) ++; MIPS64R6-NEXT: and $8, $7, $4 ++; MIPS64R6-NEXT: nor $8, $zero, $8 ++; MIPS64R6-NEXT: and $8, $8, $5 ++; MIPS64R6-NEXT: and $9, $7, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB10_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: and $2, $7, $5 ++; MIPS64R6-NEXT: srlv $2, $2, $1 ++; MIPS64R6-NEXT: seb $2, $2 ++; MIPS64R6-NEXT: # %bb.3: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadNand8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) ++; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6O0-NEXT: and $5, $2, $3 ++; MIPS64R6O0-NEXT: andi $2, $2, 3 ++; MIPS64R6O0-NEXT: xori $2, $2, 3 ++; MIPS64R6O0-NEXT: sll $9, $2, 3 ++; MIPS64R6O0-NEXT: ori $2, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $7, $2, $9 ++; MIPS64R6O0-NEXT: nor $8, $zero, $7 ++; MIPS64R6O0-NEXT: sllv $6, $1, $9 ++; MIPS64R6O0-NEXT: .LBB10_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($5) ++; MIPS64R6O0-NEXT: and $3, $2, $6 ++; MIPS64R6O0-NEXT: nor $3, $zero, $3 ++; MIPS64R6O0-NEXT: and $3, $3, $7 ++; MIPS64R6O0-NEXT: and $4, $2, $8 ++; MIPS64R6O0-NEXT: or $4, $4, $3 ++; MIPS64R6O0-NEXT: sc $4, 0($5) ++; MIPS64R6O0-NEXT: beqzc $4, .LBB10_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: and $1, $2, $7 ++; MIPS64R6O0-NEXT: srlv $1, $1, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.3: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadNand8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(y)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 255 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: sllv $4, $4, $1 ++; MM32-NEXT: $BB10_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $7, 0($3) ++; MM32-NEXT: and $8, $7, $4 ++; MM32-NEXT: nor $8, $zero, $8 ++; MM32-NEXT: and $8, $8, $5 ++; MM32-NEXT: and $9, $7, $6 ++; MM32-NEXT: or $9, $9, $8 ++; MM32-NEXT: sc $9, 0($3) ++; MM32-NEXT: beqzc $9, $BB10_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: and $2, $7, $5 ++; MM32-NEXT: srlv $2, $2, $1 ++; MM32-NEXT: seb $2, $2 ++; MM32-NEXT: # %bb.3: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadNand8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(y)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 255 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: sllv $4, $4, $1 ++; O1-NEXT: $BB10_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $7, 0($3) ++; O1-NEXT: and $8, $7, $4 ++; O1-NEXT: nor $8, $zero, $8 ++; O1-NEXT: and $8, $8, $5 ++; O1-NEXT: and $9, $7, $6 ++; O1-NEXT: or $9, $9, $8 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB10_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: and $2, $7, $5 ++; O1-NEXT: srlv $2, $2, $1 ++; O1-NEXT: sll $2, $2, 24 ++; O1-NEXT: sra $2, $2, 24 ++; O1-NEXT: # %bb.3: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadNand8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(y)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 255 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: sllv $4, $4, $1 ++; O2-NEXT: $BB10_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $7, 0($3) ++; O2-NEXT: and $8, $7, $4 ++; O2-NEXT: nor $8, $zero, $8 ++; O2-NEXT: and $8, $8, $5 ++; O2-NEXT: and $9, $7, $6 ++; O2-NEXT: or $9, $9, $8 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB10_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: and $2, $7, $5 ++; O2-NEXT: srlv $2, $2, $1 ++; O2-NEXT: sll $2, $2, 24 ++; O2-NEXT: sra $2, $2, 24 ++; O2-NEXT: # %bb.3: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadNand8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(y)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 255 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: sllv $4, $4, $1 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: $BB10_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $7, 0($3) ++; O3-NEXT: and $8, $7, $4 ++; O3-NEXT: nor $8, $zero, $8 ++; O3-NEXT: and $8, $8, $5 ++; O3-NEXT: and $9, $7, $6 ++; O3-NEXT: or $9, $9, $8 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB10_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: and $2, $7, $5 ++; O3-NEXT: srlv $2, $2, $1 ++; O3-NEXT: sll $2, $2, 24 ++; O3-NEXT: sra $2, $2, 24 ++; O3-NEXT: # %bb.3: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadNand8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(y)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 255 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: sllv $4, $4, $1 ++; MIPS32EB-NEXT: $BB10_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $7, 0($3) ++; MIPS32EB-NEXT: and $8, $7, $4 ++; MIPS32EB-NEXT: nor $8, $zero, $8 ++; MIPS32EB-NEXT: and $8, $8, $5 ++; MIPS32EB-NEXT: and $9, $7, $6 ++; MIPS32EB-NEXT: or $9, $9, $8 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB10_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: and $2, $7, $5 ++; MIPS32EB-NEXT: srlv $2, $2, $1 ++; MIPS32EB-NEXT: sll $2, $2, 24 ++; MIPS32EB-NEXT: sra $2, $2, 24 ++; MIPS32EB-NEXT: # %bb.3: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw nand i8* @y, i8 %incr monotonic ++ ret i8 %0 ++ ++} ++ ++define signext i8 @AtomicSwap8(i8 signext %newval) nounwind { ++; MIPS32-LABEL: AtomicSwap8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(y)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 255 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: sllv $4, $4, $1 ++; MIPS32-NEXT: $BB11_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $7, 0($3) ++; MIPS32-NEXT: and $8, $4, $5 ++; MIPS32-NEXT: and $9, $7, $6 ++; MIPS32-NEXT: or $9, $9, $8 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB11_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: and $2, $7, $5 ++; MIPS32-NEXT: srlv $2, $2, $1 ++; MIPS32-NEXT: sll $2, $2, 24 ++; MIPS32-NEXT: sra $2, $2, 24 ++; MIPS32-NEXT: # %bb.3: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicSwap8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(y)($1) ++; MIPS32O0-NEXT: addiu $2, $zero, -4 ++; MIPS32O0-NEXT: and $5, $1, $2 ++; MIPS32O0-NEXT: andi $1, $1, 3 ++; MIPS32O0-NEXT: sll $9, $1, 3 ++; MIPS32O0-NEXT: ori $1, $zero, 255 ++; MIPS32O0-NEXT: sllv $7, $1, $9 ++; MIPS32O0-NEXT: nor $8, $zero, $7 ++; MIPS32O0-NEXT: sllv $6, $4, $9 ++; MIPS32O0-NEXT: $BB11_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($5) ++; MIPS32O0-NEXT: and $3, $6, $7 ++; MIPS32O0-NEXT: and $4, $2, $8 ++; MIPS32O0-NEXT: or $4, $4, $3 ++; MIPS32O0-NEXT: sc $4, 0($5) ++; MIPS32O0-NEXT: beqz $4, $BB11_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: and $1, $2, $7 ++; MIPS32O0-NEXT: srlv $1, $1, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.3: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $2, $1, 24 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicSwap8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(y)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 255 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: sllv $4, $4, $1 ++; MIPS32R2-NEXT: $BB11_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $7, 0($3) ++; MIPS32R2-NEXT: and $8, $4, $5 ++; MIPS32R2-NEXT: and $9, $7, $6 ++; MIPS32R2-NEXT: or $9, $9, $8 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB11_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: and $2, $7, $5 ++; MIPS32R2-NEXT: srlv $2, $2, $1 ++; MIPS32R2-NEXT: seb $2, $2 ++; MIPS32R2-NEXT: # %bb.3: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicSwap8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(y)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 255 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: sllv $4, $4, $1 ++; MIPS32R6-NEXT: $BB11_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $7, 0($3) ++; MIPS32R6-NEXT: and $8, $4, $5 ++; MIPS32R6-NEXT: and $9, $7, $6 ++; MIPS32R6-NEXT: or $9, $9, $8 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB11_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: and $2, $7, $5 ++; MIPS32R6-NEXT: srlv $2, $2, $1 ++; MIPS32R6-NEXT: seb $2, $2 ++; MIPS32R6-NEXT: # %bb.3: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicSwap8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ++; MIPS32R6O0-NEXT: lw $1, %got(y)($1) ++; MIPS32R6O0-NEXT: addiu $2, $zero, -4 ++; MIPS32R6O0-NEXT: and $5, $1, $2 ++; MIPS32R6O0-NEXT: andi $1, $1, 3 ++; MIPS32R6O0-NEXT: sll $9, $1, 3 ++; MIPS32R6O0-NEXT: ori $1, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $7, $1, $9 ++; MIPS32R6O0-NEXT: nor $8, $zero, $7 ++; MIPS32R6O0-NEXT: sllv $6, $4, $9 ++; MIPS32R6O0-NEXT: $BB11_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($5) ++; MIPS32R6O0-NEXT: and $3, $6, $7 ++; MIPS32R6O0-NEXT: and $4, $2, $8 ++; MIPS32R6O0-NEXT: or $4, $4, $3 ++; MIPS32R6O0-NEXT: sc $4, 0($5) ++; MIPS32R6O0-NEXT: beqzc $4, $BB11_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: and $1, $2, $7 ++; MIPS32R6O0-NEXT: srlv $1, $1, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.3: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicSwap8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) ++; MIPS4-NEXT: ld $1, %got_disp(y)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 255 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: sllv $4, $4, $1 ++; MIPS4-NEXT: .LBB11_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $7, 0($3) ++; MIPS4-NEXT: and $8, $4, $5 ++; MIPS4-NEXT: and $9, $7, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB11_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: and $2, $7, $5 ++; MIPS4-NEXT: srlv $2, $2, $1 ++; MIPS4-NEXT: sll $2, $2, 24 ++; MIPS4-NEXT: sra $2, $2, 24 ++; MIPS4-NEXT: # %bb.3: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicSwap8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 255 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: sllv $4, $4, $1 ++; MIPS64-NEXT: .LBB11_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $7, 0($3) ++; MIPS64-NEXT: and $8, $4, $5 ++; MIPS64-NEXT: and $9, $7, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB11_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: and $2, $7, $5 ++; MIPS64-NEXT: srlv $2, $2, $1 ++; MIPS64-NEXT: sll $2, $2, 24 ++; MIPS64-NEXT: sra $2, $2, 24 ++; MIPS64-NEXT: # %bb.3: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicSwap8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 255 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: sllv $4, $4, $1 ++; MIPS64R2-NEXT: .LBB11_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $7, 0($3) ++; MIPS64R2-NEXT: and $8, $4, $5 ++; MIPS64R2-NEXT: and $9, $7, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB11_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: and $2, $7, $5 ++; MIPS64R2-NEXT: srlv $2, $2, $1 ++; MIPS64R2-NEXT: seb $2, $2 ++; MIPS64R2-NEXT: # %bb.3: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicSwap8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 255 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: sllv $4, $4, $1 ++; MIPS64R6-NEXT: .LBB11_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $7, 0($3) ++; MIPS64R6-NEXT: and $8, $4, $5 ++; MIPS64R6-NEXT: and $9, $7, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB11_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: and $2, $7, $5 ++; MIPS64R6-NEXT: srlv $2, $2, $1 ++; MIPS64R6-NEXT: seb $2, $2 ++; MIPS64R6-NEXT: # %bb.3: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicSwap8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicSwap8))) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) ++; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6O0-NEXT: and $5, $2, $3 ++; MIPS64R6O0-NEXT: andi $2, $2, 3 ++; MIPS64R6O0-NEXT: xori $2, $2, 3 ++; MIPS64R6O0-NEXT: sll $9, $2, 3 ++; MIPS64R6O0-NEXT: ori $2, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $7, $2, $9 ++; MIPS64R6O0-NEXT: nor $8, $zero, $7 ++; MIPS64R6O0-NEXT: sllv $6, $1, $9 ++; MIPS64R6O0-NEXT: .LBB11_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($5) ++; MIPS64R6O0-NEXT: and $3, $6, $7 ++; MIPS64R6O0-NEXT: and $4, $2, $8 ++; MIPS64R6O0-NEXT: or $4, $4, $3 ++; MIPS64R6O0-NEXT: sc $4, 0($5) ++; MIPS64R6O0-NEXT: beqzc $4, .LBB11_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: and $1, $2, $7 ++; MIPS64R6O0-NEXT: srlv $1, $1, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.3: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicSwap8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(y)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 255 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: sllv $4, $4, $1 ++; MM32-NEXT: $BB11_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $7, 0($3) ++; MM32-NEXT: and $8, $4, $5 ++; MM32-NEXT: and $9, $7, $6 ++; MM32-NEXT: or $9, $9, $8 ++; MM32-NEXT: sc $9, 0($3) ++; MM32-NEXT: beqzc $9, $BB11_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: and $2, $7, $5 ++; MM32-NEXT: srlv $2, $2, $1 ++; MM32-NEXT: seb $2, $2 ++; MM32-NEXT: # %bb.3: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicSwap8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(y)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 255 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: sllv $4, $4, $1 ++; O1-NEXT: $BB11_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $7, 0($3) ++; O1-NEXT: and $8, $4, $5 ++; O1-NEXT: and $9, $7, $6 ++; O1-NEXT: or $9, $9, $8 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB11_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: and $2, $7, $5 ++; O1-NEXT: srlv $2, $2, $1 ++; O1-NEXT: sll $2, $2, 24 ++; O1-NEXT: sra $2, $2, 24 ++; O1-NEXT: # %bb.3: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicSwap8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(y)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 255 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: sllv $4, $4, $1 ++; O2-NEXT: $BB11_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $7, 0($3) ++; O2-NEXT: and $8, $4, $5 ++; O2-NEXT: and $9, $7, $6 ++; O2-NEXT: or $9, $9, $8 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB11_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: and $2, $7, $5 ++; O2-NEXT: srlv $2, $2, $1 ++; O2-NEXT: sll $2, $2, 24 ++; O2-NEXT: sra $2, $2, 24 ++; O2-NEXT: # %bb.3: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicSwap8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(y)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 255 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: sllv $4, $4, $1 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: $BB11_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $7, 0($3) ++; O3-NEXT: and $8, $4, $5 ++; O3-NEXT: and $9, $7, $6 ++; O3-NEXT: or $9, $9, $8 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB11_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: and $2, $7, $5 ++; O3-NEXT: srlv $2, $2, $1 ++; O3-NEXT: sll $2, $2, 24 ++; O3-NEXT: sra $2, $2, 24 ++; O3-NEXT: # %bb.3: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicSwap8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(y)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 255 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: sllv $4, $4, $1 ++; MIPS32EB-NEXT: $BB11_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $7, 0($3) ++; MIPS32EB-NEXT: and $8, $4, $5 ++; MIPS32EB-NEXT: and $9, $7, $6 ++; MIPS32EB-NEXT: or $9, $9, $8 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB11_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: and $2, $7, $5 ++; MIPS32EB-NEXT: srlv $2, $2, $1 ++; MIPS32EB-NEXT: sll $2, $2, 24 ++; MIPS32EB-NEXT: sra $2, $2, 24 ++; MIPS32EB-NEXT: # %bb.3: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw xchg i8* @y, i8 %newval monotonic ++ ret i8 %0 ++} ++ ++define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind { ++; MIPS32-LABEL: AtomicCmpSwap8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(y)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 255 ++; MIPS32-NEXT: sllv $6, $2, $1 ++; MIPS32-NEXT: nor $7, $zero, $6 ++; MIPS32-NEXT: andi $2, $4, 255 ++; MIPS32-NEXT: sllv $4, $2, $1 ++; MIPS32-NEXT: andi $2, $5, 255 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: $BB12_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $8, 0($3) ++; MIPS32-NEXT: and $9, $8, $6 ++; MIPS32-NEXT: bne $9, $4, $BB12_3 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32-NEXT: and $8, $8, $7 ++; MIPS32-NEXT: or $8, $8, $5 ++; MIPS32-NEXT: sc $8, 0($3) ++; MIPS32-NEXT: beqz $8, $BB12_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: $BB12_3: # %entry ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: srlv $2, $9, $1 ++; MIPS32-NEXT: sll $2, $2, 24 ++; MIPS32-NEXT: sra $2, $2, 24 ++; MIPS32-NEXT: # %bb.4: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicCmpSwap8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $3, $2, $25 ++; MIPS32O0-NEXT: move $1, $5 ++; MIPS32O0-NEXT: move $2, $4 ++; MIPS32O0-NEXT: lw $3, %got(y)($3) ++; MIPS32O0-NEXT: addiu $4, $zero, -4 ++; MIPS32O0-NEXT: and $4, $3, $4 ++; MIPS32O0-NEXT: andi $3, $3, 3 ++; MIPS32O0-NEXT: sll $9, $3, 3 ++; MIPS32O0-NEXT: ori $3, $zero, 255 ++; MIPS32O0-NEXT: sllv $5, $3, $9 ++; MIPS32O0-NEXT: nor $7, $zero, $5 ++; MIPS32O0-NEXT: andi $2, $2, 255 ++; MIPS32O0-NEXT: sllv $6, $2, $9 ++; MIPS32O0-NEXT: andi $1, $1, 255 ++; MIPS32O0-NEXT: sllv $8, $1, $9 ++; MIPS32O0-NEXT: $BB12_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($4) ++; MIPS32O0-NEXT: and $3, $2, $5 ++; MIPS32O0-NEXT: bne $3, $6, $BB12_3 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32O0-NEXT: and $2, $2, $7 ++; MIPS32O0-NEXT: or $2, $2, $8 ++; MIPS32O0-NEXT: sc $2, 0($4) ++; MIPS32O0-NEXT: beqz $2, $BB12_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: $BB12_3: # %entry ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: srlv $1, $3, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.5: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $2, $1, 24 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicCmpSwap8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(y)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 255 ++; MIPS32R2-NEXT: sllv $6, $2, $1 ++; MIPS32R2-NEXT: nor $7, $zero, $6 ++; MIPS32R2-NEXT: andi $2, $4, 255 ++; MIPS32R2-NEXT: sllv $4, $2, $1 ++; MIPS32R2-NEXT: andi $2, $5, 255 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: $BB12_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $8, 0($3) ++; MIPS32R2-NEXT: and $9, $8, $6 ++; MIPS32R2-NEXT: bne $9, $4, $BB12_3 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32R2-NEXT: and $8, $8, $7 ++; MIPS32R2-NEXT: or $8, $8, $5 ++; MIPS32R2-NEXT: sc $8, 0($3) ++; MIPS32R2-NEXT: beqz $8, $BB12_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: $BB12_3: # %entry ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: srlv $2, $9, $1 ++; MIPS32R2-NEXT: seb $2, $2 ++; MIPS32R2-NEXT: # %bb.4: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicCmpSwap8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(y)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 255 ++; MIPS32R6-NEXT: sllv $6, $2, $1 ++; MIPS32R6-NEXT: nor $7, $zero, $6 ++; MIPS32R6-NEXT: andi $2, $4, 255 ++; MIPS32R6-NEXT: sllv $4, $2, $1 ++; MIPS32R6-NEXT: andi $2, $5, 255 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: $BB12_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $8, 0($3) ++; MIPS32R6-NEXT: and $9, $8, $6 ++; MIPS32R6-NEXT: bnec $9, $4, $BB12_3 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32R6-NEXT: and $8, $8, $7 ++; MIPS32R6-NEXT: or $8, $8, $5 ++; MIPS32R6-NEXT: sc $8, 0($3) ++; MIPS32R6-NEXT: beqzc $8, $BB12_1 ++; MIPS32R6-NEXT: $BB12_3: # %entry ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: srlv $2, $9, $1 ++; MIPS32R6-NEXT: seb $2, $2 ++; MIPS32R6-NEXT: # %bb.4: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicCmpSwap8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $3, $2, $25 ++; MIPS32R6O0-NEXT: move $1, $5 ++; MIPS32R6O0-NEXT: move $2, $4 ++; MIPS32R6O0-NEXT: # kill: def $a1 killed $at ++; MIPS32R6O0-NEXT: # kill: def $a0 killed $v0 ++; MIPS32R6O0-NEXT: lw $3, %got(y)($3) ++; MIPS32R6O0-NEXT: addiu $4, $zero, -4 ++; MIPS32R6O0-NEXT: and $4, $3, $4 ++; MIPS32R6O0-NEXT: andi $3, $3, 3 ++; MIPS32R6O0-NEXT: sll $9, $3, 3 ++; MIPS32R6O0-NEXT: ori $3, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $5, $3, $9 ++; MIPS32R6O0-NEXT: nor $7, $zero, $5 ++; MIPS32R6O0-NEXT: andi $2, $2, 255 ++; MIPS32R6O0-NEXT: sllv $6, $2, $9 ++; MIPS32R6O0-NEXT: andi $1, $1, 255 ++; MIPS32R6O0-NEXT: sllv $8, $1, $9 ++; MIPS32R6O0-NEXT: $BB12_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($4) ++; MIPS32R6O0-NEXT: and $3, $2, $5 ++; MIPS32R6O0-NEXT: bnec $3, $6, $BB12_3 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32R6O0-NEXT: and $2, $2, $7 ++; MIPS32R6O0-NEXT: or $2, $2, $8 ++; MIPS32R6O0-NEXT: sc $2, 0($4) ++; MIPS32R6O0-NEXT: beqzc $2, $BB12_1 ++; MIPS32R6O0-NEXT: $BB12_3: # %entry ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: srlv $1, $3, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.5: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicCmpSwap8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS4-NEXT: ld $1, %got_disp(y)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 255 ++; MIPS4-NEXT: sllv $6, $2, $1 ++; MIPS4-NEXT: nor $7, $zero, $6 ++; MIPS4-NEXT: andi $2, $4, 255 ++; MIPS4-NEXT: sllv $4, $2, $1 ++; MIPS4-NEXT: andi $2, $5, 255 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: .LBB12_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $8, 0($3) ++; MIPS4-NEXT: and $9, $8, $6 ++; MIPS4-NEXT: bne $9, $4, .LBB12_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS4-NEXT: and $8, $8, $7 ++; MIPS4-NEXT: or $8, $8, $5 ++; MIPS4-NEXT: sc $8, 0($3) ++; MIPS4-NEXT: beqz $8, .LBB12_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB12_3: # %entry ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: srlv $2, $9, $1 ++; MIPS4-NEXT: sll $2, $2, 24 ++; MIPS4-NEXT: sra $2, $2, 24 ++; MIPS4-NEXT: # %bb.4: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicCmpSwap8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 255 ++; MIPS64-NEXT: sllv $6, $2, $1 ++; MIPS64-NEXT: nor $7, $zero, $6 ++; MIPS64-NEXT: andi $2, $4, 255 ++; MIPS64-NEXT: sllv $4, $2, $1 ++; MIPS64-NEXT: andi $2, $5, 255 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: .LBB12_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $8, 0($3) ++; MIPS64-NEXT: and $9, $8, $6 ++; MIPS64-NEXT: bne $9, $4, .LBB12_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS64-NEXT: and $8, $8, $7 ++; MIPS64-NEXT: or $8, $8, $5 ++; MIPS64-NEXT: sc $8, 0($3) ++; MIPS64-NEXT: beqz $8, .LBB12_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB12_3: # %entry ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: srlv $2, $9, $1 ++; MIPS64-NEXT: sll $2, $2, 24 ++; MIPS64-NEXT: sra $2, $2, 24 ++; MIPS64-NEXT: # %bb.4: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicCmpSwap8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R2-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 255 ++; MIPS64R2-NEXT: sllv $6, $2, $1 ++; MIPS64R2-NEXT: nor $7, $zero, $6 ++; MIPS64R2-NEXT: andi $2, $4, 255 ++; MIPS64R2-NEXT: sllv $4, $2, $1 ++; MIPS64R2-NEXT: andi $2, $5, 255 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: .LBB12_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $8, 0($3) ++; MIPS64R2-NEXT: and $9, $8, $6 ++; MIPS64R2-NEXT: bne $9, $4, .LBB12_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS64R2-NEXT: and $8, $8, $7 ++; MIPS64R2-NEXT: or $8, $8, $5 ++; MIPS64R2-NEXT: sc $8, 0($3) ++; MIPS64R2-NEXT: beqz $8, .LBB12_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB12_3: # %entry ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: srlv $2, $9, $1 ++; MIPS64R2-NEXT: seb $2, $2 ++; MIPS64R2-NEXT: # %bb.4: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicCmpSwap8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R6-NEXT: ld $1, %got_disp(y)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 255 ++; MIPS64R6-NEXT: sllv $6, $2, $1 ++; MIPS64R6-NEXT: nor $7, $zero, $6 ++; MIPS64R6-NEXT: andi $2, $4, 255 ++; MIPS64R6-NEXT: sllv $4, $2, $1 ++; MIPS64R6-NEXT: andi $2, $5, 255 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: .LBB12_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $8, 0($3) ++; MIPS64R6-NEXT: and $9, $8, $6 ++; MIPS64R6-NEXT: bnec $9, $4, .LBB12_3 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS64R6-NEXT: and $8, $8, $7 ++; MIPS64R6-NEXT: or $8, $8, $5 ++; MIPS64R6-NEXT: sc $8, 0($3) ++; MIPS64R6-NEXT: beqzc $8, .LBB12_1 ++; MIPS64R6-NEXT: .LBB12_3: # %entry ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: srlv $2, $9, $1 ++; MIPS64R6-NEXT: seb $2, $2 ++; MIPS64R6-NEXT: # %bb.4: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicCmpSwap8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $3, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) ++; MIPS64R6O0-NEXT: move $1, $5 ++; MIPS64R6O0-NEXT: move $2, $4 ++; MIPS64R6O0-NEXT: ld $3, %got_disp(y)($3) ++; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 ++; MIPS64R6O0-NEXT: and $4, $3, $4 ++; MIPS64R6O0-NEXT: andi $3, $3, 3 ++; MIPS64R6O0-NEXT: xori $3, $3, 3 ++; MIPS64R6O0-NEXT: sll $9, $3, 3 ++; MIPS64R6O0-NEXT: ori $3, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $5, $3, $9 ++; MIPS64R6O0-NEXT: nor $7, $zero, $5 ++; MIPS64R6O0-NEXT: andi $2, $2, 255 ++; MIPS64R6O0-NEXT: sllv $6, $2, $9 ++; MIPS64R6O0-NEXT: andi $1, $1, 255 ++; MIPS64R6O0-NEXT: sllv $8, $1, $9 ++; MIPS64R6O0-NEXT: .LBB12_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($4) ++; MIPS64R6O0-NEXT: and $3, $2, $5 ++; MIPS64R6O0-NEXT: bnec $3, $6, .LBB12_3 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS64R6O0-NEXT: and $2, $2, $7 ++; MIPS64R6O0-NEXT: or $2, $2, $8 ++; MIPS64R6O0-NEXT: sc $2, 0($4) ++; MIPS64R6O0-NEXT: beqzc $2, .LBB12_1 ++; MIPS64R6O0-NEXT: .LBB12_3: # %entry ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: srlv $1, $3, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.5: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicCmpSwap8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(y)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 255 ++; MM32-NEXT: sllv $6, $2, $1 ++; MM32-NEXT: nor $7, $zero, $6 ++; MM32-NEXT: andi $2, $4, 255 ++; MM32-NEXT: sllv $4, $2, $1 ++; MM32-NEXT: andi $2, $5, 255 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: $BB12_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $8, 0($3) ++; MM32-NEXT: and $9, $8, $6 ++; MM32-NEXT: bne $9, $4, $BB12_3 ++; MM32-NEXT: nop ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MM32-NEXT: and $8, $8, $7 ++; MM32-NEXT: or $8, $8, $5 ++; MM32-NEXT: sc $8, 0($3) ++; MM32-NEXT: beqzc $8, $BB12_1 ++; MM32-NEXT: $BB12_3: # %entry ++; MM32-NEXT: sync ++; MM32-NEXT: srlv $2, $9, $1 ++; MM32-NEXT: seb $2, $2 ++; MM32-NEXT: # %bb.4: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicCmpSwap8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(y)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 255 ++; O1-NEXT: sllv $6, $2, $1 ++; O1-NEXT: nor $7, $zero, $6 ++; O1-NEXT: andi $2, $4, 255 ++; O1-NEXT: sllv $4, $2, $1 ++; O1-NEXT: andi $2, $5, 255 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: $BB12_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $8, 0($3) ++; O1-NEXT: and $9, $8, $6 ++; O1-NEXT: bne $9, $4, $BB12_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; O1-NEXT: and $8, $8, $7 ++; O1-NEXT: or $8, $8, $5 ++; O1-NEXT: sc $8, 0($3) ++; O1-NEXT: beqz $8, $BB12_1 ++; O1-NEXT: nop ++; O1-NEXT: $BB12_3: # %entry ++; O1-NEXT: sync ++; O1-NEXT: srlv $2, $9, $1 ++; O1-NEXT: sll $2, $2, 24 ++; O1-NEXT: sra $2, $2, 24 ++; O1-NEXT: # %bb.4: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicCmpSwap8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(y)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 255 ++; O2-NEXT: sllv $6, $2, $1 ++; O2-NEXT: nor $7, $zero, $6 ++; O2-NEXT: andi $2, $4, 255 ++; O2-NEXT: sllv $4, $2, $1 ++; O2-NEXT: andi $2, $5, 255 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: $BB12_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $8, 0($3) ++; O2-NEXT: and $9, $8, $6 ++; O2-NEXT: bne $9, $4, $BB12_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; O2-NEXT: and $8, $8, $7 ++; O2-NEXT: or $8, $8, $5 ++; O2-NEXT: sc $8, 0($3) ++; O2-NEXT: beqz $8, $BB12_1 ++; O2-NEXT: nop ++; O2-NEXT: $BB12_3: # %entry ++; O2-NEXT: sync ++; O2-NEXT: srlv $2, $9, $1 ++; O2-NEXT: sll $2, $2, 24 ++; O2-NEXT: sra $2, $2, 24 ++; O2-NEXT: # %bb.4: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicCmpSwap8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(y)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 255 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $6, $2, $1 ++; O3-NEXT: andi $2, $4, 255 ++; O3-NEXT: sllv $4, $2, $1 ++; O3-NEXT: andi $2, $5, 255 ++; O3-NEXT: nor $7, $zero, $6 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: $BB12_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $8, 0($3) ++; O3-NEXT: and $9, $8, $6 ++; O3-NEXT: bne $9, $4, $BB12_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; O3-NEXT: and $8, $8, $7 ++; O3-NEXT: or $8, $8, $5 ++; O3-NEXT: sc $8, 0($3) ++; O3-NEXT: beqz $8, $BB12_1 ++; O3-NEXT: nop ++; O3-NEXT: $BB12_3: # %entry ++; O3-NEXT: sync ++; O3-NEXT: srlv $2, $9, $1 ++; O3-NEXT: sll $2, $2, 24 ++; O3-NEXT: sra $2, $2, 24 ++; O3-NEXT: # %bb.4: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicCmpSwap8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(y)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 255 ++; MIPS32EB-NEXT: sllv $6, $2, $1 ++; MIPS32EB-NEXT: nor $7, $zero, $6 ++; MIPS32EB-NEXT: andi $2, $4, 255 ++; MIPS32EB-NEXT: sllv $4, $2, $1 ++; MIPS32EB-NEXT: andi $2, $5, 255 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: $BB12_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $8, 0($3) ++; MIPS32EB-NEXT: and $9, $8, $6 ++; MIPS32EB-NEXT: bne $9, $4, $BB12_3 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: # in Loop: Header=BB12_1 Depth=1 ++; MIPS32EB-NEXT: and $8, $8, $7 ++; MIPS32EB-NEXT: or $8, $8, $5 ++; MIPS32EB-NEXT: sc $8, 0($3) ++; MIPS32EB-NEXT: beqz $8, $BB12_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: $BB12_3: # %entry ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: srlv $2, $9, $1 ++; MIPS32EB-NEXT: sll $2, $2, 24 ++; MIPS32EB-NEXT: sra $2, $2, 24 ++; MIPS32EB-NEXT: # %bb.4: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %pair0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic monotonic ++ %0 = extractvalue { i8, i1 } %pair0, 0 ++ ret i8 %0 ++} ++ ++define i1 @AtomicCmpSwapRes8(i8* %ptr, i8 signext %oldval, i8 signext %newval) nounwind { ++; MIPS32-LABEL: AtomicCmpSwapRes8: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: addiu $1, $zero, -4 ++; MIPS32-NEXT: and $2, $4, $1 ++; MIPS32-NEXT: andi $1, $4, 3 ++; MIPS32-NEXT: sll $3, $1, 3 ++; MIPS32-NEXT: ori $1, $zero, 255 ++; MIPS32-NEXT: sllv $4, $1, $3 ++; MIPS32-NEXT: nor $7, $zero, $4 ++; MIPS32-NEXT: andi $1, $5, 255 ++; MIPS32-NEXT: sllv $8, $1, $3 ++; MIPS32-NEXT: andi $1, $6, 255 ++; MIPS32-NEXT: sllv $6, $1, $3 ++; MIPS32-NEXT: $BB13_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $9, 0($2) ++; MIPS32-NEXT: and $10, $9, $4 ++; MIPS32-NEXT: bne $10, $8, $BB13_3 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32-NEXT: and $9, $9, $7 ++; MIPS32-NEXT: or $9, $9, $6 ++; MIPS32-NEXT: sc $9, 0($2) ++; MIPS32-NEXT: beqz $9, $BB13_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: $BB13_3: # %entry ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: srlv $1, $10, $3 ++; MIPS32-NEXT: sll $1, $1, 24 ++; MIPS32-NEXT: sra $1, $1, 24 ++; MIPS32-NEXT: # %bb.4: # %entry ++; MIPS32-NEXT: xor $1, $1, $5 ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: sltiu $2, $1, 1 ++; ++; MIPS32O0-LABEL: AtomicCmpSwapRes8: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: move $1, $6 ++; MIPS32O0-NEXT: move $2, $5 ++; MIPS32O0-NEXT: move $3, $4 ++; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: addiu $4, $zero, -4 ++; MIPS32O0-NEXT: and $4, $3, $4 ++; MIPS32O0-NEXT: andi $3, $3, 3 ++; MIPS32O0-NEXT: sll $9, $3, 3 ++; MIPS32O0-NEXT: ori $3, $zero, 255 ++; MIPS32O0-NEXT: sllv $5, $3, $9 ++; MIPS32O0-NEXT: nor $7, $zero, $5 ++; MIPS32O0-NEXT: andi $2, $2, 255 ++; MIPS32O0-NEXT: sllv $6, $2, $9 ++; MIPS32O0-NEXT: andi $1, $1, 255 ++; MIPS32O0-NEXT: sllv $8, $1, $9 ++; MIPS32O0-NEXT: $BB13_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($4) ++; MIPS32O0-NEXT: and $3, $2, $5 ++; MIPS32O0-NEXT: bne $3, $6, $BB13_3 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32O0-NEXT: and $2, $2, $7 ++; MIPS32O0-NEXT: or $2, $2, $8 ++; MIPS32O0-NEXT: sc $2, 0($4) ++; MIPS32O0-NEXT: beqz $2, $BB13_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: $BB13_3: # %entry ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: srlv $1, $3, $9 ++; MIPS32O0-NEXT: sll $1, $1, 24 ++; MIPS32O0-NEXT: sra $1, $1, 24 ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.5: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $2, $2, 24 ++; MIPS32O0-NEXT: sra $2, $2, 24 ++; MIPS32O0-NEXT: xor $1, $1, $2 ++; MIPS32O0-NEXT: sltiu $2, $1, 1 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicCmpSwapRes8: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: addiu $1, $zero, -4 ++; MIPS32R2-NEXT: and $2, $4, $1 ++; MIPS32R2-NEXT: andi $1, $4, 3 ++; MIPS32R2-NEXT: sll $3, $1, 3 ++; MIPS32R2-NEXT: ori $1, $zero, 255 ++; MIPS32R2-NEXT: sllv $4, $1, $3 ++; MIPS32R2-NEXT: nor $7, $zero, $4 ++; MIPS32R2-NEXT: andi $1, $5, 255 ++; MIPS32R2-NEXT: sllv $8, $1, $3 ++; MIPS32R2-NEXT: andi $1, $6, 255 ++; MIPS32R2-NEXT: sllv $6, $1, $3 ++; MIPS32R2-NEXT: $BB13_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $9, 0($2) ++; MIPS32R2-NEXT: and $10, $9, $4 ++; MIPS32R2-NEXT: bne $10, $8, $BB13_3 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32R2-NEXT: and $9, $9, $7 ++; MIPS32R2-NEXT: or $9, $9, $6 ++; MIPS32R2-NEXT: sc $9, 0($2) ++; MIPS32R2-NEXT: beqz $9, $BB13_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: $BB13_3: # %entry ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: srlv $1, $10, $3 ++; MIPS32R2-NEXT: seb $1, $1 ++; MIPS32R2-NEXT: # %bb.4: # %entry ++; MIPS32R2-NEXT: xor $1, $1, $5 ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: sltiu $2, $1, 1 ++; ++; MIPS32R6-LABEL: AtomicCmpSwapRes8: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: addiu $1, $zero, -4 ++; MIPS32R6-NEXT: and $2, $4, $1 ++; MIPS32R6-NEXT: andi $1, $4, 3 ++; MIPS32R6-NEXT: sll $3, $1, 3 ++; MIPS32R6-NEXT: ori $1, $zero, 255 ++; MIPS32R6-NEXT: sllv $4, $1, $3 ++; MIPS32R6-NEXT: nor $7, $zero, $4 ++; MIPS32R6-NEXT: andi $1, $5, 255 ++; MIPS32R6-NEXT: sllv $8, $1, $3 ++; MIPS32R6-NEXT: andi $1, $6, 255 ++; MIPS32R6-NEXT: sllv $6, $1, $3 ++; MIPS32R6-NEXT: $BB13_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $9, 0($2) ++; MIPS32R6-NEXT: and $10, $9, $4 ++; MIPS32R6-NEXT: bnec $10, $8, $BB13_3 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32R6-NEXT: and $9, $9, $7 ++; MIPS32R6-NEXT: or $9, $9, $6 ++; MIPS32R6-NEXT: sc $9, 0($2) ++; MIPS32R6-NEXT: beqzc $9, $BB13_1 ++; MIPS32R6-NEXT: $BB13_3: # %entry ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: srlv $1, $10, $3 ++; MIPS32R6-NEXT: seb $1, $1 ++; MIPS32R6-NEXT: # %bb.4: # %entry ++; MIPS32R6-NEXT: xor $1, $1, $5 ++; MIPS32R6-NEXT: jr $ra ++; MIPS32R6-NEXT: sltiu $2, $1, 1 ++; ++; MIPS32R6O0-LABEL: AtomicCmpSwapRes8: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: move $1, $6 ++; MIPS32R6O0-NEXT: move $2, $5 ++; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: move $3, $4 ++; MIPS32R6O0-NEXT: # kill: def $a2 killed $at ++; MIPS32R6O0-NEXT: # kill: def $a1 killed $v0 ++; MIPS32R6O0-NEXT: addiu $4, $zero, -4 ++; MIPS32R6O0-NEXT: and $4, $3, $4 ++; MIPS32R6O0-NEXT: andi $3, $3, 3 ++; MIPS32R6O0-NEXT: sll $9, $3, 3 ++; MIPS32R6O0-NEXT: ori $3, $zero, 255 ++; MIPS32R6O0-NEXT: sllv $5, $3, $9 ++; MIPS32R6O0-NEXT: nor $7, $zero, $5 ++; MIPS32R6O0-NEXT: andi $2, $2, 255 ++; MIPS32R6O0-NEXT: sllv $6, $2, $9 ++; MIPS32R6O0-NEXT: andi $1, $1, 255 ++; MIPS32R6O0-NEXT: sllv $8, $1, $9 ++; MIPS32R6O0-NEXT: $BB13_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($4) ++; MIPS32R6O0-NEXT: and $3, $2, $5 ++; MIPS32R6O0-NEXT: bnec $3, $6, $BB13_3 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32R6O0-NEXT: and $2, $2, $7 ++; MIPS32R6O0-NEXT: or $2, $2, $8 ++; MIPS32R6O0-NEXT: sc $2, 0($4) ++; MIPS32R6O0-NEXT: beqzc $2, $BB13_1 ++; MIPS32R6O0-NEXT: $BB13_3: # %entry ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: srlv $1, $3, $9 ++; MIPS32R6O0-NEXT: seb $1, $1 ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.5: # %entry ++; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: xor $1, $1, $2 ++; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicCmpSwapRes8: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: daddiu $1, $zero, -4 ++; MIPS4-NEXT: and $2, $4, $1 ++; MIPS4-NEXT: andi $1, $4, 3 ++; MIPS4-NEXT: sll $3, $1, 3 ++; MIPS4-NEXT: ori $1, $zero, 255 ++; MIPS4-NEXT: sllv $4, $1, $3 ++; MIPS4-NEXT: nor $7, $zero, $4 ++; MIPS4-NEXT: andi $1, $5, 255 ++; MIPS4-NEXT: sllv $8, $1, $3 ++; MIPS4-NEXT: andi $1, $6, 255 ++; MIPS4-NEXT: sllv $6, $1, $3 ++; MIPS4-NEXT: .LBB13_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $9, 0($2) ++; MIPS4-NEXT: and $10, $9, $4 ++; MIPS4-NEXT: bne $10, $8, .LBB13_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS4-NEXT: and $9, $9, $7 ++; MIPS4-NEXT: or $9, $9, $6 ++; MIPS4-NEXT: sc $9, 0($2) ++; MIPS4-NEXT: beqz $9, .LBB13_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB13_3: # %entry ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: srlv $1, $10, $3 ++; MIPS4-NEXT: sll $1, $1, 24 ++; MIPS4-NEXT: sra $1, $1, 24 ++; MIPS4-NEXT: # %bb.4: # %entry ++; MIPS4-NEXT: xor $1, $1, $5 ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: sltiu $2, $1, 1 ++; ++; MIPS64-LABEL: AtomicCmpSwapRes8: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: daddiu $1, $zero, -4 ++; MIPS64-NEXT: and $2, $4, $1 ++; MIPS64-NEXT: andi $1, $4, 3 ++; MIPS64-NEXT: sll $3, $1, 3 ++; MIPS64-NEXT: ori $1, $zero, 255 ++; MIPS64-NEXT: sllv $4, $1, $3 ++; MIPS64-NEXT: nor $7, $zero, $4 ++; MIPS64-NEXT: andi $1, $5, 255 ++; MIPS64-NEXT: sllv $8, $1, $3 ++; MIPS64-NEXT: andi $1, $6, 255 ++; MIPS64-NEXT: sllv $6, $1, $3 ++; MIPS64-NEXT: .LBB13_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $9, 0($2) ++; MIPS64-NEXT: and $10, $9, $4 ++; MIPS64-NEXT: bne $10, $8, .LBB13_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS64-NEXT: and $9, $9, $7 ++; MIPS64-NEXT: or $9, $9, $6 ++; MIPS64-NEXT: sc $9, 0($2) ++; MIPS64-NEXT: beqz $9, .LBB13_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB13_3: # %entry ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: srlv $1, $10, $3 ++; MIPS64-NEXT: sll $1, $1, 24 ++; MIPS64-NEXT: sra $1, $1, 24 ++; MIPS64-NEXT: # %bb.4: # %entry ++; MIPS64-NEXT: xor $1, $1, $5 ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: sltiu $2, $1, 1 ++; ++; MIPS64R2-LABEL: AtomicCmpSwapRes8: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: daddiu $1, $zero, -4 ++; MIPS64R2-NEXT: and $2, $4, $1 ++; MIPS64R2-NEXT: andi $1, $4, 3 ++; MIPS64R2-NEXT: sll $3, $1, 3 ++; MIPS64R2-NEXT: ori $1, $zero, 255 ++; MIPS64R2-NEXT: sllv $4, $1, $3 ++; MIPS64R2-NEXT: nor $7, $zero, $4 ++; MIPS64R2-NEXT: andi $1, $5, 255 ++; MIPS64R2-NEXT: sllv $8, $1, $3 ++; MIPS64R2-NEXT: andi $1, $6, 255 ++; MIPS64R2-NEXT: sllv $6, $1, $3 ++; MIPS64R2-NEXT: .LBB13_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $9, 0($2) ++; MIPS64R2-NEXT: and $10, $9, $4 ++; MIPS64R2-NEXT: bne $10, $8, .LBB13_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS64R2-NEXT: and $9, $9, $7 ++; MIPS64R2-NEXT: or $9, $9, $6 ++; MIPS64R2-NEXT: sc $9, 0($2) ++; MIPS64R2-NEXT: beqz $9, .LBB13_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB13_3: # %entry ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: srlv $1, $10, $3 ++; MIPS64R2-NEXT: seb $1, $1 ++; MIPS64R2-NEXT: # %bb.4: # %entry ++; MIPS64R2-NEXT: xor $1, $1, $5 ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: sltiu $2, $1, 1 ++; ++; MIPS64R6-LABEL: AtomicCmpSwapRes8: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: daddiu $1, $zero, -4 ++; MIPS64R6-NEXT: and $2, $4, $1 ++; MIPS64R6-NEXT: andi $1, $4, 3 ++; MIPS64R6-NEXT: sll $3, $1, 3 ++; MIPS64R6-NEXT: ori $1, $zero, 255 ++; MIPS64R6-NEXT: sllv $4, $1, $3 ++; MIPS64R6-NEXT: nor $7, $zero, $4 ++; MIPS64R6-NEXT: andi $1, $5, 255 ++; MIPS64R6-NEXT: sllv $8, $1, $3 ++; MIPS64R6-NEXT: andi $1, $6, 255 ++; MIPS64R6-NEXT: sllv $6, $1, $3 ++; MIPS64R6-NEXT: .LBB13_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $9, 0($2) ++; MIPS64R6-NEXT: and $10, $9, $4 ++; MIPS64R6-NEXT: bnec $10, $8, .LBB13_3 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS64R6-NEXT: and $9, $9, $7 ++; MIPS64R6-NEXT: or $9, $9, $6 ++; MIPS64R6-NEXT: sc $9, 0($2) ++; MIPS64R6-NEXT: beqzc $9, .LBB13_1 ++; MIPS64R6-NEXT: .LBB13_3: # %entry ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: srlv $1, $10, $3 ++; MIPS64R6-NEXT: seb $1, $1 ++; MIPS64R6-NEXT: # %bb.4: # %entry ++; MIPS64R6-NEXT: xor $1, $1, $5 ++; MIPS64R6-NEXT: jr $ra ++; MIPS64R6-NEXT: sltiu $2, $1, 1 ++; ++; MIPS64R6O0-LABEL: AtomicCmpSwapRes8: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: move $3, $4 ++; MIPS64R6O0-NEXT: move $1, $6 ++; MIPS64R6O0-NEXT: move $2, $5 ++; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 ++; MIPS64R6O0-NEXT: and $4, $3, $4 ++; MIPS64R6O0-NEXT: andi $3, $3, 3 ++; MIPS64R6O0-NEXT: xori $3, $3, 3 ++; MIPS64R6O0-NEXT: sll $9, $3, 3 ++; MIPS64R6O0-NEXT: ori $3, $zero, 255 ++; MIPS64R6O0-NEXT: sllv $5, $3, $9 ++; MIPS64R6O0-NEXT: nor $7, $zero, $5 ++; MIPS64R6O0-NEXT: andi $2, $2, 255 ++; MIPS64R6O0-NEXT: sllv $6, $2, $9 ++; MIPS64R6O0-NEXT: andi $1, $1, 255 ++; MIPS64R6O0-NEXT: sllv $8, $1, $9 ++; MIPS64R6O0-NEXT: .LBB13_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($4) ++; MIPS64R6O0-NEXT: and $3, $2, $5 ++; MIPS64R6O0-NEXT: bnec $3, $6, .LBB13_3 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS64R6O0-NEXT: and $2, $2, $7 ++; MIPS64R6O0-NEXT: or $2, $2, $8 ++; MIPS64R6O0-NEXT: sc $2, 0($4) ++; MIPS64R6O0-NEXT: beqzc $2, .LBB13_1 ++; MIPS64R6O0-NEXT: .LBB13_3: # %entry ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: srlv $1, $3, $9 ++; MIPS64R6O0-NEXT: seb $1, $1 ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.5: # %entry ++; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: xor $1, $1, $2 ++; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicCmpSwapRes8: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: addiu $1, $zero, -4 ++; MM32-NEXT: and $2, $4, $1 ++; MM32-NEXT: andi $1, $4, 3 ++; MM32-NEXT: sll $3, $1, 3 ++; MM32-NEXT: ori $1, $zero, 255 ++; MM32-NEXT: sllv $4, $1, $3 ++; MM32-NEXT: nor $7, $zero, $4 ++; MM32-NEXT: andi $1, $5, 255 ++; MM32-NEXT: sllv $8, $1, $3 ++; MM32-NEXT: andi $1, $6, 255 ++; MM32-NEXT: sllv $6, $1, $3 ++; MM32-NEXT: $BB13_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $9, 0($2) ++; MM32-NEXT: and $10, $9, $4 ++; MM32-NEXT: bne $10, $8, $BB13_3 ++; MM32-NEXT: nop ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MM32-NEXT: and $9, $9, $7 ++; MM32-NEXT: or $9, $9, $6 ++; MM32-NEXT: sc $9, 0($2) ++; MM32-NEXT: beqzc $9, $BB13_1 ++; MM32-NEXT: $BB13_3: # %entry ++; MM32-NEXT: sync ++; MM32-NEXT: srlv $1, $10, $3 ++; MM32-NEXT: seb $1, $1 ++; MM32-NEXT: # %bb.4: # %entry ++; MM32-NEXT: xor $1, $1, $5 ++; MM32-NEXT: jr $ra ++; MM32-NEXT: sltiu $2, $1, 1 ++; ++; O1-LABEL: AtomicCmpSwapRes8: ++; O1: # %bb.0: # %entry ++; O1-NEXT: addiu $1, $zero, -4 ++; O1-NEXT: and $2, $4, $1 ++; O1-NEXT: andi $1, $4, 3 ++; O1-NEXT: sll $3, $1, 3 ++; O1-NEXT: ori $1, $zero, 255 ++; O1-NEXT: sllv $4, $1, $3 ++; O1-NEXT: nor $7, $zero, $4 ++; O1-NEXT: andi $1, $5, 255 ++; O1-NEXT: sllv $8, $1, $3 ++; O1-NEXT: andi $1, $6, 255 ++; O1-NEXT: sllv $6, $1, $3 ++; O1-NEXT: $BB13_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $9, 0($2) ++; O1-NEXT: and $10, $9, $4 ++; O1-NEXT: bne $10, $8, $BB13_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; O1-NEXT: and $9, $9, $7 ++; O1-NEXT: or $9, $9, $6 ++; O1-NEXT: sc $9, 0($2) ++; O1-NEXT: beqz $9, $BB13_1 ++; O1-NEXT: nop ++; O1-NEXT: $BB13_3: # %entry ++; O1-NEXT: sync ++; O1-NEXT: srlv $1, $10, $3 ++; O1-NEXT: sll $1, $1, 24 ++; O1-NEXT: sra $1, $1, 24 ++; O1-NEXT: # %bb.4: # %entry ++; O1-NEXT: xor $1, $1, $5 ++; O1-NEXT: jr $ra ++; O1-NEXT: sltiu $2, $1, 1 ++; ++; O2-LABEL: AtomicCmpSwapRes8: ++; O2: # %bb.0: # %entry ++; O2-NEXT: addiu $1, $zero, -4 ++; O2-NEXT: and $2, $4, $1 ++; O2-NEXT: andi $1, $4, 3 ++; O2-NEXT: sll $3, $1, 3 ++; O2-NEXT: ori $1, $zero, 255 ++; O2-NEXT: sllv $4, $1, $3 ++; O2-NEXT: nor $7, $zero, $4 ++; O2-NEXT: andi $1, $5, 255 ++; O2-NEXT: sllv $8, $1, $3 ++; O2-NEXT: andi $1, $6, 255 ++; O2-NEXT: sllv $6, $1, $3 ++; O2-NEXT: $BB13_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $9, 0($2) ++; O2-NEXT: and $10, $9, $4 ++; O2-NEXT: bne $10, $8, $BB13_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; O2-NEXT: and $9, $9, $7 ++; O2-NEXT: or $9, $9, $6 ++; O2-NEXT: sc $9, 0($2) ++; O2-NEXT: beqz $9, $BB13_1 ++; O2-NEXT: nop ++; O2-NEXT: $BB13_3: # %entry ++; O2-NEXT: sync ++; O2-NEXT: srlv $1, $10, $3 ++; O2-NEXT: sll $1, $1, 24 ++; O2-NEXT: sra $1, $1, 24 ++; O2-NEXT: # %bb.4: # %entry ++; O2-NEXT: xor $1, $1, $5 ++; O2-NEXT: jr $ra ++; O2-NEXT: sltiu $2, $1, 1 ++; ++; O3-LABEL: AtomicCmpSwapRes8: ++; O3: # %bb.0: # %entry ++; O3-NEXT: addiu $1, $zero, -4 ++; O3-NEXT: and $2, $4, $1 ++; O3-NEXT: andi $1, $4, 3 ++; O3-NEXT: sll $3, $1, 3 ++; O3-NEXT: ori $1, $zero, 255 ++; O3-NEXT: sllv $4, $1, $3 ++; O3-NEXT: andi $1, $5, 255 ++; O3-NEXT: sllv $8, $1, $3 ++; O3-NEXT: andi $1, $6, 255 ++; O3-NEXT: nor $7, $zero, $4 ++; O3-NEXT: sllv $6, $1, $3 ++; O3-NEXT: $BB13_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $9, 0($2) ++; O3-NEXT: and $10, $9, $4 ++; O3-NEXT: bne $10, $8, $BB13_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; O3-NEXT: and $9, $9, $7 ++; O3-NEXT: or $9, $9, $6 ++; O3-NEXT: sc $9, 0($2) ++; O3-NEXT: beqz $9, $BB13_1 ++; O3-NEXT: nop ++; O3-NEXT: $BB13_3: # %entry ++; O3-NEXT: sync ++; O3-NEXT: srlv $1, $10, $3 ++; O3-NEXT: sll $1, $1, 24 ++; O3-NEXT: sra $1, $1, 24 ++; O3-NEXT: # %bb.4: # %entry ++; O3-NEXT: xor $1, $1, $5 ++; O3-NEXT: jr $ra ++; O3-NEXT: sltiu $2, $1, 1 ++; ++; MIPS32EB-LABEL: AtomicCmpSwapRes8: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: addiu $1, $zero, -4 ++; MIPS32EB-NEXT: and $2, $4, $1 ++; MIPS32EB-NEXT: andi $1, $4, 3 ++; MIPS32EB-NEXT: xori $1, $1, 3 ++; MIPS32EB-NEXT: sll $3, $1, 3 ++; MIPS32EB-NEXT: ori $1, $zero, 255 ++; MIPS32EB-NEXT: sllv $4, $1, $3 ++; MIPS32EB-NEXT: nor $7, $zero, $4 ++; MIPS32EB-NEXT: andi $1, $5, 255 ++; MIPS32EB-NEXT: sllv $8, $1, $3 ++; MIPS32EB-NEXT: andi $1, $6, 255 ++; MIPS32EB-NEXT: sllv $6, $1, $3 ++; MIPS32EB-NEXT: $BB13_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $9, 0($2) ++; MIPS32EB-NEXT: and $10, $9, $4 ++; MIPS32EB-NEXT: bne $10, $8, $BB13_3 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: # in Loop: Header=BB13_1 Depth=1 ++; MIPS32EB-NEXT: and $9, $9, $7 ++; MIPS32EB-NEXT: or $9, $9, $6 ++; MIPS32EB-NEXT: sc $9, 0($2) ++; MIPS32EB-NEXT: beqz $9, $BB13_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: $BB13_3: # %entry ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: srlv $1, $10, $3 ++; MIPS32EB-NEXT: sll $1, $1, 24 ++; MIPS32EB-NEXT: sra $1, $1, 24 ++; MIPS32EB-NEXT: # %bb.4: # %entry ++; MIPS32EB-NEXT: xor $1, $1, $5 ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: sltiu $2, $1, 1 ++entry: ++ %0 = cmpxchg i8* %ptr, i8 %oldval, i8 %newval monotonic monotonic ++ %1 = extractvalue { i8, i1 } %0, 1 ++ ret i1 %1 ++; FIXME: -march=mips produces a redundant sign extension here... ++; FIXME: ...Leading to this split check. ++ ++} ++ ++; Check one i16 so that we cover the seh sign extend ++@z = common global i16 0, align 1 ++ ++define signext i16 @AtomicLoadAdd16(i16 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadAdd16: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(z)($1) ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $1, $2 ++; MIPS32-NEXT: andi $1, $1, 3 ++; MIPS32-NEXT: sll $1, $1, 3 ++; MIPS32-NEXT: ori $2, $zero, 65535 ++; MIPS32-NEXT: sllv $5, $2, $1 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: sllv $4, $4, $1 ++; MIPS32-NEXT: $BB14_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $7, 0($3) ++; MIPS32-NEXT: addu $8, $7, $4 ++; MIPS32-NEXT: and $8, $8, $5 ++; MIPS32-NEXT: and $9, $7, $6 ++; MIPS32-NEXT: or $9, $9, $8 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB14_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: and $2, $7, $5 ++; MIPS32-NEXT: srlv $2, $2, $1 ++; MIPS32-NEXT: sll $2, $2, 16 ++; MIPS32-NEXT: sra $2, $2, 16 ++; MIPS32-NEXT: # %bb.3: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadAdd16: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(z)($1) ++; MIPS32O0-NEXT: addiu $2, $zero, -4 ++; MIPS32O0-NEXT: and $5, $1, $2 ++; MIPS32O0-NEXT: andi $1, $1, 3 ++; MIPS32O0-NEXT: sll $9, $1, 3 ++; MIPS32O0-NEXT: ori $1, $zero, 65535 ++; MIPS32O0-NEXT: sllv $7, $1, $9 ++; MIPS32O0-NEXT: nor $8, $zero, $7 ++; MIPS32O0-NEXT: sllv $6, $4, $9 ++; MIPS32O0-NEXT: $BB14_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($5) ++; MIPS32O0-NEXT: addu $3, $2, $6 ++; MIPS32O0-NEXT: and $3, $3, $7 ++; MIPS32O0-NEXT: and $4, $2, $8 ++; MIPS32O0-NEXT: or $4, $4, $3 ++; MIPS32O0-NEXT: sc $4, 0($5) ++; MIPS32O0-NEXT: beqz $4, $BB14_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: and $1, $2, $7 ++; MIPS32O0-NEXT: srlv $1, $1, $9 ++; MIPS32O0-NEXT: sll $1, $1, 16 ++; MIPS32O0-NEXT: sra $1, $1, 16 ++; MIPS32O0-NEXT: # %bb.3: # %entry ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.4: # %entry ++; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 16 ++; MIPS32O0-NEXT: sra $2, $1, 16 ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadAdd16: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(z)($1) ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $1, $2 ++; MIPS32R2-NEXT: andi $1, $1, 3 ++; MIPS32R2-NEXT: sll $1, $1, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 65535 ++; MIPS32R2-NEXT: sllv $5, $2, $1 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: sllv $4, $4, $1 ++; MIPS32R2-NEXT: $BB14_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $7, 0($3) ++; MIPS32R2-NEXT: addu $8, $7, $4 ++; MIPS32R2-NEXT: and $8, $8, $5 ++; MIPS32R2-NEXT: and $9, $7, $6 ++; MIPS32R2-NEXT: or $9, $9, $8 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB14_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: and $2, $7, $5 ++; MIPS32R2-NEXT: srlv $2, $2, $1 ++; MIPS32R2-NEXT: seh $2, $2 ++; MIPS32R2-NEXT: # %bb.3: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadAdd16: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(z)($1) ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $1, $2 ++; MIPS32R6-NEXT: andi $1, $1, 3 ++; MIPS32R6-NEXT: sll $1, $1, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 65535 ++; MIPS32R6-NEXT: sllv $5, $2, $1 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: sllv $4, $4, $1 ++; MIPS32R6-NEXT: $BB14_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $7, 0($3) ++; MIPS32R6-NEXT: addu $8, $7, $4 ++; MIPS32R6-NEXT: and $8, $8, $5 ++; MIPS32R6-NEXT: and $9, $7, $6 ++; MIPS32R6-NEXT: or $9, $9, $8 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB14_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: and $2, $7, $5 ++; MIPS32R6-NEXT: srlv $2, $2, $1 ++; MIPS32R6-NEXT: seh $2, $2 ++; MIPS32R6-NEXT: # %bb.3: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadAdd16: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ++; MIPS32R6O0-NEXT: lw $1, %got(z)($1) ++; MIPS32R6O0-NEXT: addiu $2, $zero, -4 ++; MIPS32R6O0-NEXT: and $5, $1, $2 ++; MIPS32R6O0-NEXT: andi $1, $1, 3 ++; MIPS32R6O0-NEXT: sll $9, $1, 3 ++; MIPS32R6O0-NEXT: ori $1, $zero, 65535 ++; MIPS32R6O0-NEXT: sllv $7, $1, $9 ++; MIPS32R6O0-NEXT: nor $8, $zero, $7 ++; MIPS32R6O0-NEXT: sllv $6, $4, $9 ++; MIPS32R6O0-NEXT: $BB14_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($5) ++; MIPS32R6O0-NEXT: addu $3, $2, $6 ++; MIPS32R6O0-NEXT: and $3, $3, $7 ++; MIPS32R6O0-NEXT: and $4, $2, $8 ++; MIPS32R6O0-NEXT: or $4, $4, $3 ++; MIPS32R6O0-NEXT: sc $4, 0($5) ++; MIPS32R6O0-NEXT: beqzc $4, $BB14_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: and $1, $2, $7 ++; MIPS32R6O0-NEXT: srlv $1, $1, $9 ++; MIPS32R6O0-NEXT: seh $1, $1 ++; MIPS32R6O0-NEXT: # %bb.3: # %entry ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.4: # %entry ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadAdd16: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS4-NEXT: ld $1, %got_disp(z)($1) ++; MIPS4-NEXT: daddiu $2, $zero, -4 ++; MIPS4-NEXT: and $3, $1, $2 ++; MIPS4-NEXT: andi $1, $1, 3 ++; MIPS4-NEXT: sll $1, $1, 3 ++; MIPS4-NEXT: ori $2, $zero, 65535 ++; MIPS4-NEXT: sllv $5, $2, $1 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: sllv $4, $4, $1 ++; MIPS4-NEXT: .LBB14_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $7, 0($3) ++; MIPS4-NEXT: addu $8, $7, $4 ++; MIPS4-NEXT: and $8, $8, $5 ++; MIPS4-NEXT: and $9, $7, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB14_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: and $2, $7, $5 ++; MIPS4-NEXT: srlv $2, $2, $1 ++; MIPS4-NEXT: sll $2, $2, 16 ++; MIPS4-NEXT: sra $2, $2, 16 ++; MIPS4-NEXT: # %bb.3: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAdd16: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64-NEXT: ld $1, %got_disp(z)($1) ++; MIPS64-NEXT: daddiu $2, $zero, -4 ++; MIPS64-NEXT: and $3, $1, $2 ++; MIPS64-NEXT: andi $1, $1, 3 ++; MIPS64-NEXT: sll $1, $1, 3 ++; MIPS64-NEXT: ori $2, $zero, 65535 ++; MIPS64-NEXT: sllv $5, $2, $1 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: sllv $4, $4, $1 ++; MIPS64-NEXT: .LBB14_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $7, 0($3) ++; MIPS64-NEXT: addu $8, $7, $4 ++; MIPS64-NEXT: and $8, $8, $5 ++; MIPS64-NEXT: and $9, $7, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB14_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: and $2, $7, $5 ++; MIPS64-NEXT: srlv $2, $2, $1 ++; MIPS64-NEXT: sll $2, $2, 16 ++; MIPS64-NEXT: sra $2, $2, 16 ++; MIPS64-NEXT: # %bb.3: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAdd16: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R2-NEXT: ld $1, %got_disp(z)($1) ++; MIPS64R2-NEXT: daddiu $2, $zero, -4 ++; MIPS64R2-NEXT: and $3, $1, $2 ++; MIPS64R2-NEXT: andi $1, $1, 3 ++; MIPS64R2-NEXT: sll $1, $1, 3 ++; MIPS64R2-NEXT: ori $2, $zero, 65535 ++; MIPS64R2-NEXT: sllv $5, $2, $1 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: sllv $4, $4, $1 ++; MIPS64R2-NEXT: .LBB14_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $7, 0($3) ++; MIPS64R2-NEXT: addu $8, $7, $4 ++; MIPS64R2-NEXT: and $8, $8, $5 ++; MIPS64R2-NEXT: and $9, $7, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB14_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: and $2, $7, $5 ++; MIPS64R2-NEXT: srlv $2, $2, $1 ++; MIPS64R2-NEXT: seh $2, $2 ++; MIPS64R2-NEXT: # %bb.3: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAdd16: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R6-NEXT: ld $1, %got_disp(z)($1) ++; MIPS64R6-NEXT: daddiu $2, $zero, -4 ++; MIPS64R6-NEXT: and $3, $1, $2 ++; MIPS64R6-NEXT: andi $1, $1, 3 ++; MIPS64R6-NEXT: sll $1, $1, 3 ++; MIPS64R6-NEXT: ori $2, $zero, 65535 ++; MIPS64R6-NEXT: sllv $5, $2, $1 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: sllv $4, $4, $1 ++; MIPS64R6-NEXT: .LBB14_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $7, 0($3) ++; MIPS64R6-NEXT: addu $8, $7, $4 ++; MIPS64R6-NEXT: and $8, $8, $5 ++; MIPS64R6-NEXT: and $9, $7, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB14_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: and $2, $7, $5 ++; MIPS64R6-NEXT: srlv $2, $2, $1 ++; MIPS64R6-NEXT: seh $2, $2 ++; MIPS64R6-NEXT: # %bb.3: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAdd16: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: ld $2, %got_disp(z)($2) ++; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6O0-NEXT: and $5, $2, $3 ++; MIPS64R6O0-NEXT: andi $2, $2, 3 ++; MIPS64R6O0-NEXT: xori $2, $2, 2 ++; MIPS64R6O0-NEXT: sll $9, $2, 3 ++; MIPS64R6O0-NEXT: ori $2, $zero, 65535 ++; MIPS64R6O0-NEXT: sllv $7, $2, $9 ++; MIPS64R6O0-NEXT: nor $8, $zero, $7 ++; MIPS64R6O0-NEXT: sllv $6, $1, $9 ++; MIPS64R6O0-NEXT: .LBB14_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($5) ++; MIPS64R6O0-NEXT: addu $3, $2, $6 ++; MIPS64R6O0-NEXT: and $3, $3, $7 ++; MIPS64R6O0-NEXT: and $4, $2, $8 ++; MIPS64R6O0-NEXT: or $4, $4, $3 ++; MIPS64R6O0-NEXT: sc $4, 0($5) ++; MIPS64R6O0-NEXT: beqzc $4, .LBB14_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: and $1, $2, $7 ++; MIPS64R6O0-NEXT: srlv $1, $1, $9 ++; MIPS64R6O0-NEXT: seh $1, $1 ++; MIPS64R6O0-NEXT: # %bb.3: # %entry ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.4: # %entry ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadAdd16: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(z)($2) ++; MM32-NEXT: addiu $2, $zero, -4 ++; MM32-NEXT: and $3, $1, $2 ++; MM32-NEXT: andi $1, $1, 3 ++; MM32-NEXT: sll $1, $1, 3 ++; MM32-NEXT: ori $2, $zero, 65535 ++; MM32-NEXT: sllv $5, $2, $1 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: sllv $4, $4, $1 ++; MM32-NEXT: $BB14_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $7, 0($3) ++; MM32-NEXT: addu $8, $7, $4 ++; MM32-NEXT: and $8, $8, $5 ++; MM32-NEXT: and $9, $7, $6 ++; MM32-NEXT: or $9, $9, $8 ++; MM32-NEXT: sc $9, 0($3) ++; MM32-NEXT: beqzc $9, $BB14_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: and $2, $7, $5 ++; MM32-NEXT: srlv $2, $2, $1 ++; MM32-NEXT: seh $2, $2 ++; MM32-NEXT: # %bb.3: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAdd16: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(z)($1) ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $1, $2 ++; O1-NEXT: andi $1, $1, 3 ++; O1-NEXT: sll $1, $1, 3 ++; O1-NEXT: ori $2, $zero, 65535 ++; O1-NEXT: sllv $5, $2, $1 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: sllv $4, $4, $1 ++; O1-NEXT: $BB14_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $7, 0($3) ++; O1-NEXT: addu $8, $7, $4 ++; O1-NEXT: and $8, $8, $5 ++; O1-NEXT: and $9, $7, $6 ++; O1-NEXT: or $9, $9, $8 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB14_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: and $2, $7, $5 ++; O1-NEXT: srlv $2, $2, $1 ++; O1-NEXT: sll $2, $2, 16 ++; O1-NEXT: sra $2, $2, 16 ++; O1-NEXT: # %bb.3: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAdd16: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(z)($1) ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $1, $2 ++; O2-NEXT: andi $1, $1, 3 ++; O2-NEXT: sll $1, $1, 3 ++; O2-NEXT: ori $2, $zero, 65535 ++; O2-NEXT: sllv $5, $2, $1 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: sllv $4, $4, $1 ++; O2-NEXT: $BB14_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $7, 0($3) ++; O2-NEXT: addu $8, $7, $4 ++; O2-NEXT: and $8, $8, $5 ++; O2-NEXT: and $9, $7, $6 ++; O2-NEXT: or $9, $9, $8 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB14_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: and $2, $7, $5 ++; O2-NEXT: srlv $2, $2, $1 ++; O2-NEXT: sll $2, $2, 16 ++; O2-NEXT: sra $2, $2, 16 ++; O2-NEXT: # %bb.3: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAdd16: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: lw $1, %got(z)($1) ++; O3-NEXT: and $3, $1, $2 ++; O3-NEXT: andi $1, $1, 3 ++; O3-NEXT: ori $2, $zero, 65535 ++; O3-NEXT: sll $1, $1, 3 ++; O3-NEXT: sllv $5, $2, $1 ++; O3-NEXT: sllv $4, $4, $1 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: $BB14_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $7, 0($3) ++; O3-NEXT: addu $8, $7, $4 ++; O3-NEXT: and $8, $8, $5 ++; O3-NEXT: and $9, $7, $6 ++; O3-NEXT: or $9, $9, $8 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB14_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: and $2, $7, $5 ++; O3-NEXT: srlv $2, $2, $1 ++; O3-NEXT: sll $2, $2, 16 ++; O3-NEXT: sra $2, $2, 16 ++; O3-NEXT: # %bb.3: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadAdd16: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(z)($1) ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $1, $2 ++; MIPS32EB-NEXT: andi $1, $1, 3 ++; MIPS32EB-NEXT: xori $1, $1, 2 ++; MIPS32EB-NEXT: sll $1, $1, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 65535 ++; MIPS32EB-NEXT: sllv $5, $2, $1 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: sllv $4, $4, $1 ++; MIPS32EB-NEXT: $BB14_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $7, 0($3) ++; MIPS32EB-NEXT: addu $8, $7, $4 ++; MIPS32EB-NEXT: and $8, $8, $5 ++; MIPS32EB-NEXT: and $9, $7, $6 ++; MIPS32EB-NEXT: or $9, $9, $8 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB14_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: and $2, $7, $5 ++; MIPS32EB-NEXT: srlv $2, $2, $1 ++; MIPS32EB-NEXT: sll $2, $2, 16 ++; MIPS32EB-NEXT: sra $2, $2, 16 ++; MIPS32EB-NEXT: # %bb.3: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i16* @z, i16 %incr monotonic ++ ret i16 %0 ++ ++} ++ ++; Test that the i16 return value from cmpxchg is recognised as signed, ++; so that setCC doesn't end up comparing an unsigned value to a signed ++; value. ++; The rest of the functions here are testing the atomic expansion, so ++; we just match the end of the function. ++define {i16, i1} @foo(i16* %addr, i16 %l, i16 %r, i16 %new) { ++; MIPS32-LABEL: foo: ++; MIPS32: # %bb.0: ++; MIPS32-NEXT: addu $1, $5, $6 ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: addiu $2, $zero, -4 ++; MIPS32-NEXT: and $3, $4, $2 ++; MIPS32-NEXT: andi $2, $4, 3 ++; MIPS32-NEXT: sll $4, $2, 3 ++; MIPS32-NEXT: ori $2, $zero, 65535 ++; MIPS32-NEXT: sllv $5, $2, $4 ++; MIPS32-NEXT: nor $6, $zero, $5 ++; MIPS32-NEXT: andi $2, $1, 65535 ++; MIPS32-NEXT: sllv $8, $2, $4 ++; MIPS32-NEXT: andi $2, $7, 65535 ++; MIPS32-NEXT: sllv $7, $2, $4 ++; MIPS32-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $9, 0($3) ++; MIPS32-NEXT: and $10, $9, $5 ++; MIPS32-NEXT: bne $10, $8, $BB15_3 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32-NEXT: and $9, $9, $6 ++; MIPS32-NEXT: or $9, $9, $7 ++; MIPS32-NEXT: sc $9, 0($3) ++; MIPS32-NEXT: beqz $9, $BB15_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: $BB15_3: ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: srlv $2, $10, $4 ++; MIPS32-NEXT: sll $2, $2, 16 ++; MIPS32-NEXT: sra $2, $2, 16 ++; MIPS32-NEXT: # %bb.4: ++; MIPS32-NEXT: sll $1, $1, 16 ++; MIPS32-NEXT: sra $1, $1, 16 ++; MIPS32-NEXT: xor $1, $2, $1 ++; MIPS32-NEXT: sltiu $3, $1, 1 ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: foo: ++; MIPS32O0: # %bb.0: ++; MIPS32O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32O0-NEXT: .cfi_def_cfa_offset 8 ++; MIPS32O0-NEXT: move $1, $7 ++; MIPS32O0-NEXT: move $3, $4 ++; MIPS32O0-NEXT: addu $2, $5, $6 ++; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: addiu $4, $zero, -4 ++; MIPS32O0-NEXT: and $4, $3, $4 ++; MIPS32O0-NEXT: andi $3, $3, 3 ++; MIPS32O0-NEXT: sll $9, $3, 3 ++; MIPS32O0-NEXT: ori $3, $zero, 65535 ++; MIPS32O0-NEXT: sllv $5, $3, $9 ++; MIPS32O0-NEXT: nor $7, $zero, $5 ++; MIPS32O0-NEXT: andi $2, $2, 65535 ++; MIPS32O0-NEXT: sllv $6, $2, $9 ++; MIPS32O0-NEXT: andi $1, $1, 65535 ++; MIPS32O0-NEXT: sllv $8, $1, $9 ++; MIPS32O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($4) ++; MIPS32O0-NEXT: and $3, $2, $5 ++; MIPS32O0-NEXT: bne $3, $6, $BB15_3 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32O0-NEXT: and $2, $2, $7 ++; MIPS32O0-NEXT: or $2, $2, $8 ++; MIPS32O0-NEXT: sc $2, 0($4) ++; MIPS32O0-NEXT: beqz $2, $BB15_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: $BB15_3: ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: srlv $1, $3, $9 ++; MIPS32O0-NEXT: sll $1, $1, 16 ++; MIPS32O0-NEXT: sra $1, $1, 16 ++; MIPS32O0-NEXT: # %bb.4: ++; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32O0-NEXT: # %bb.5: ++; MIPS32O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ++; MIPS32O0-NEXT: sll $1, $1, 16 ++; MIPS32O0-NEXT: sra $1, $1, 16 ++; MIPS32O0-NEXT: xor $1, $2, $1 ++; MIPS32O0-NEXT: sltiu $3, $1, 1 ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: foo: ++; MIPS32R2: # %bb.0: ++; MIPS32R2-NEXT: addu $1, $5, $6 ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: addiu $2, $zero, -4 ++; MIPS32R2-NEXT: and $3, $4, $2 ++; MIPS32R2-NEXT: andi $2, $4, 3 ++; MIPS32R2-NEXT: sll $4, $2, 3 ++; MIPS32R2-NEXT: ori $2, $zero, 65535 ++; MIPS32R2-NEXT: sllv $5, $2, $4 ++; MIPS32R2-NEXT: nor $6, $zero, $5 ++; MIPS32R2-NEXT: andi $2, $1, 65535 ++; MIPS32R2-NEXT: sllv $8, $2, $4 ++; MIPS32R2-NEXT: andi $2, $7, 65535 ++; MIPS32R2-NEXT: sllv $7, $2, $4 ++; MIPS32R2-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $9, 0($3) ++; MIPS32R2-NEXT: and $10, $9, $5 ++; MIPS32R2-NEXT: bne $10, $8, $BB15_3 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32R2-NEXT: and $9, $9, $6 ++; MIPS32R2-NEXT: or $9, $9, $7 ++; MIPS32R2-NEXT: sc $9, 0($3) ++; MIPS32R2-NEXT: beqz $9, $BB15_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: $BB15_3: ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: srlv $2, $10, $4 ++; MIPS32R2-NEXT: seh $2, $2 ++; MIPS32R2-NEXT: # %bb.4: ++; MIPS32R2-NEXT: seh $1, $1 ++; MIPS32R2-NEXT: xor $1, $2, $1 ++; MIPS32R2-NEXT: sltiu $3, $1, 1 ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: foo: ++; MIPS32R6: # %bb.0: ++; MIPS32R6-NEXT: addu $1, $5, $6 ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: addiu $2, $zero, -4 ++; MIPS32R6-NEXT: and $3, $4, $2 ++; MIPS32R6-NEXT: andi $2, $4, 3 ++; MIPS32R6-NEXT: sll $4, $2, 3 ++; MIPS32R6-NEXT: ori $2, $zero, 65535 ++; MIPS32R6-NEXT: sllv $5, $2, $4 ++; MIPS32R6-NEXT: nor $6, $zero, $5 ++; MIPS32R6-NEXT: andi $2, $1, 65535 ++; MIPS32R6-NEXT: sllv $8, $2, $4 ++; MIPS32R6-NEXT: andi $2, $7, 65535 ++; MIPS32R6-NEXT: sllv $7, $2, $4 ++; MIPS32R6-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $9, 0($3) ++; MIPS32R6-NEXT: and $10, $9, $5 ++; MIPS32R6-NEXT: bnec $10, $8, $BB15_3 ++; MIPS32R6-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32R6-NEXT: and $9, $9, $6 ++; MIPS32R6-NEXT: or $9, $9, $7 ++; MIPS32R6-NEXT: sc $9, 0($3) ++; MIPS32R6-NEXT: beqzc $9, $BB15_1 ++; MIPS32R6-NEXT: $BB15_3: ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: srlv $2, $10, $4 ++; MIPS32R6-NEXT: seh $2, $2 ++; MIPS32R6-NEXT: # %bb.4: ++; MIPS32R6-NEXT: seh $1, $1 ++; MIPS32R6-NEXT: xor $1, $2, $1 ++; MIPS32R6-NEXT: sltiu $3, $1, 1 ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: foo: ++; MIPS32R6O0: # %bb.0: ++; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ++; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 8 ++; MIPS32R6O0-NEXT: move $1, $7 ++; MIPS32R6O0-NEXT: move $3, $4 ++; MIPS32R6O0-NEXT: # kill: def $a3 killed $at ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a2 ++; MIPS32R6O0-NEXT: # kill: def $v0 killed $a1 ++; MIPS32R6O0-NEXT: addu $2, $5, $6 ++; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: addiu $4, $zero, -4 ++; MIPS32R6O0-NEXT: and $4, $3, $4 ++; MIPS32R6O0-NEXT: andi $3, $3, 3 ++; MIPS32R6O0-NEXT: sll $9, $3, 3 ++; MIPS32R6O0-NEXT: ori $3, $zero, 65535 ++; MIPS32R6O0-NEXT: sllv $5, $3, $9 ++; MIPS32R6O0-NEXT: nor $7, $zero, $5 ++; MIPS32R6O0-NEXT: andi $2, $2, 65535 ++; MIPS32R6O0-NEXT: sllv $6, $2, $9 ++; MIPS32R6O0-NEXT: andi $1, $1, 65535 ++; MIPS32R6O0-NEXT: sllv $8, $1, $9 ++; MIPS32R6O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($4) ++; MIPS32R6O0-NEXT: and $3, $2, $5 ++; MIPS32R6O0-NEXT: bnec $3, $6, $BB15_3 ++; MIPS32R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32R6O0-NEXT: and $2, $2, $7 ++; MIPS32R6O0-NEXT: or $2, $2, $8 ++; MIPS32R6O0-NEXT: sc $2, 0($4) ++; MIPS32R6O0-NEXT: beqzc $2, $BB15_1 ++; MIPS32R6O0-NEXT: $BB15_3: ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: srlv $1, $3, $9 ++; MIPS32R6O0-NEXT: seh $1, $1 ++; MIPS32R6O0-NEXT: # %bb.4: ++; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ++; MIPS32R6O0-NEXT: # %bb.5: ++; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ++; MIPS32R6O0-NEXT: seh $1, $1 ++; MIPS32R6O0-NEXT: xor $1, $2, $1 ++; MIPS32R6O0-NEXT: sltiu $3, $1, 1 ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: foo: ++; MIPS4: # %bb.0: ++; MIPS4-NEXT: sll $1, $6, 0 ++; MIPS4-NEXT: sll $2, $5, 0 ++; MIPS4-NEXT: addu $1, $2, $1 ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: sll $2, $7, 0 ++; MIPS4-NEXT: daddiu $3, $zero, -4 ++; MIPS4-NEXT: and $3, $4, $3 ++; MIPS4-NEXT: andi $4, $4, 3 ++; MIPS4-NEXT: sll $4, $4, 3 ++; MIPS4-NEXT: ori $5, $zero, 65535 ++; MIPS4-NEXT: sllv $5, $5, $4 ++; MIPS4-NEXT: nor $6, $zero, $5 ++; MIPS4-NEXT: andi $7, $1, 65535 ++; MIPS4-NEXT: sllv $7, $7, $4 ++; MIPS4-NEXT: andi $2, $2, 65535 ++; MIPS4-NEXT: sllv $8, $2, $4 ++; MIPS4-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $9, 0($3) ++; MIPS4-NEXT: and $10, $9, $5 ++; MIPS4-NEXT: bne $10, $7, .LBB15_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS4-NEXT: and $9, $9, $6 ++; MIPS4-NEXT: or $9, $9, $8 ++; MIPS4-NEXT: sc $9, 0($3) ++; MIPS4-NEXT: beqz $9, .LBB15_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB15_3: ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: srlv $2, $10, $4 ++; MIPS4-NEXT: sll $2, $2, 16 ++; MIPS4-NEXT: sra $2, $2, 16 ++; MIPS4-NEXT: # %bb.4: ++; MIPS4-NEXT: sll $1, $1, 16 ++; MIPS4-NEXT: sra $1, $1, 16 ++; MIPS4-NEXT: xor $1, $2, $1 ++; MIPS4-NEXT: sltiu $3, $1, 1 ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: foo: ++; MIPS64: # %bb.0: ++; MIPS64-NEXT: sll $1, $6, 0 ++; MIPS64-NEXT: sll $2, $5, 0 ++; MIPS64-NEXT: addu $1, $2, $1 ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: sll $2, $7, 0 ++; MIPS64-NEXT: daddiu $3, $zero, -4 ++; MIPS64-NEXT: and $3, $4, $3 ++; MIPS64-NEXT: andi $4, $4, 3 ++; MIPS64-NEXT: sll $4, $4, 3 ++; MIPS64-NEXT: ori $5, $zero, 65535 ++; MIPS64-NEXT: sllv $5, $5, $4 ++; MIPS64-NEXT: nor $6, $zero, $5 ++; MIPS64-NEXT: andi $7, $1, 65535 ++; MIPS64-NEXT: sllv $7, $7, $4 ++; MIPS64-NEXT: andi $2, $2, 65535 ++; MIPS64-NEXT: sllv $8, $2, $4 ++; MIPS64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $9, 0($3) ++; MIPS64-NEXT: and $10, $9, $5 ++; MIPS64-NEXT: bne $10, $7, .LBB15_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS64-NEXT: and $9, $9, $6 ++; MIPS64-NEXT: or $9, $9, $8 ++; MIPS64-NEXT: sc $9, 0($3) ++; MIPS64-NEXT: beqz $9, .LBB15_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB15_3: ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: srlv $2, $10, $4 ++; MIPS64-NEXT: sll $2, $2, 16 ++; MIPS64-NEXT: sra $2, $2, 16 ++; MIPS64-NEXT: # %bb.4: ++; MIPS64-NEXT: sll $1, $1, 16 ++; MIPS64-NEXT: sra $1, $1, 16 ++; MIPS64-NEXT: xor $1, $2, $1 ++; MIPS64-NEXT: sltiu $3, $1, 1 ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: foo: ++; MIPS64R2: # %bb.0: ++; MIPS64R2-NEXT: sll $1, $6, 0 ++; MIPS64R2-NEXT: sll $2, $5, 0 ++; MIPS64R2-NEXT: addu $1, $2, $1 ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: sll $2, $7, 0 ++; MIPS64R2-NEXT: daddiu $3, $zero, -4 ++; MIPS64R2-NEXT: and $3, $4, $3 ++; MIPS64R2-NEXT: andi $4, $4, 3 ++; MIPS64R2-NEXT: sll $4, $4, 3 ++; MIPS64R2-NEXT: ori $5, $zero, 65535 ++; MIPS64R2-NEXT: sllv $5, $5, $4 ++; MIPS64R2-NEXT: nor $6, $zero, $5 ++; MIPS64R2-NEXT: andi $7, $1, 65535 ++; MIPS64R2-NEXT: sllv $7, $7, $4 ++; MIPS64R2-NEXT: andi $2, $2, 65535 ++; MIPS64R2-NEXT: sllv $8, $2, $4 ++; MIPS64R2-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $9, 0($3) ++; MIPS64R2-NEXT: and $10, $9, $5 ++; MIPS64R2-NEXT: bne $10, $7, .LBB15_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS64R2-NEXT: and $9, $9, $6 ++; MIPS64R2-NEXT: or $9, $9, $8 ++; MIPS64R2-NEXT: sc $9, 0($3) ++; MIPS64R2-NEXT: beqz $9, .LBB15_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB15_3: ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: srlv $2, $10, $4 ++; MIPS64R2-NEXT: seh $2, $2 ++; MIPS64R2-NEXT: # %bb.4: ++; MIPS64R2-NEXT: seh $1, $1 ++; MIPS64R2-NEXT: xor $1, $2, $1 ++; MIPS64R2-NEXT: sltiu $3, $1, 1 ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: foo: ++; MIPS64R6: # %bb.0: ++; MIPS64R6-NEXT: sll $1, $6, 0 ++; MIPS64R6-NEXT: sll $2, $5, 0 ++; MIPS64R6-NEXT: addu $1, $2, $1 ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: sll $2, $7, 0 ++; MIPS64R6-NEXT: daddiu $3, $zero, -4 ++; MIPS64R6-NEXT: and $3, $4, $3 ++; MIPS64R6-NEXT: andi $4, $4, 3 ++; MIPS64R6-NEXT: sll $4, $4, 3 ++; MIPS64R6-NEXT: ori $5, $zero, 65535 ++; MIPS64R6-NEXT: sllv $5, $5, $4 ++; MIPS64R6-NEXT: nor $6, $zero, $5 ++; MIPS64R6-NEXT: andi $7, $1, 65535 ++; MIPS64R6-NEXT: sllv $7, $7, $4 ++; MIPS64R6-NEXT: andi $2, $2, 65535 ++; MIPS64R6-NEXT: sllv $8, $2, $4 ++; MIPS64R6-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $9, 0($3) ++; MIPS64R6-NEXT: and $10, $9, $5 ++; MIPS64R6-NEXT: bnec $10, $7, .LBB15_3 ++; MIPS64R6-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS64R6-NEXT: and $9, $9, $6 ++; MIPS64R6-NEXT: or $9, $9, $8 ++; MIPS64R6-NEXT: sc $9, 0($3) ++; MIPS64R6-NEXT: beqzc $9, .LBB15_1 ++; MIPS64R6-NEXT: .LBB15_3: ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: srlv $2, $10, $4 ++; MIPS64R6-NEXT: seh $2, $2 ++; MIPS64R6-NEXT: # %bb.4: ++; MIPS64R6-NEXT: seh $1, $1 ++; MIPS64R6-NEXT: xor $1, $2, $1 ++; MIPS64R6-NEXT: sltiu $3, $1, 1 ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: foo: ++; MIPS64R6O0: # %bb.0: ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: .cfi_def_cfa_offset 16 ++; MIPS64R6O0-NEXT: move $3, $4 ++; MIPS64R6O0-NEXT: move $1, $7 ++; MIPS64R6O0-NEXT: sll $1, $1, 0 ++; MIPS64R6O0-NEXT: move $2, $6 ++; MIPS64R6O0-NEXT: sll $4, $2, 0 ++; MIPS64R6O0-NEXT: move $2, $5 ++; MIPS64R6O0-NEXT: sll $2, $2, 0 ++; MIPS64R6O0-NEXT: addu $2, $2, $4 ++; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 ++; MIPS64R6O0-NEXT: and $4, $3, $4 ++; MIPS64R6O0-NEXT: andi $3, $3, 3 ++; MIPS64R6O0-NEXT: xori $3, $3, 2 ++; MIPS64R6O0-NEXT: sll $9, $3, 3 ++; MIPS64R6O0-NEXT: ori $3, $zero, 65535 ++; MIPS64R6O0-NEXT: sllv $5, $3, $9 ++; MIPS64R6O0-NEXT: nor $7, $zero, $5 ++; MIPS64R6O0-NEXT: andi $2, $2, 65535 ++; MIPS64R6O0-NEXT: sllv $6, $2, $9 ++; MIPS64R6O0-NEXT: andi $1, $1, 65535 ++; MIPS64R6O0-NEXT: sllv $8, $1, $9 ++; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($4) ++; MIPS64R6O0-NEXT: and $3, $2, $5 ++; MIPS64R6O0-NEXT: bnec $3, $6, .LBB15_3 ++; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS64R6O0-NEXT: and $2, $2, $7 ++; MIPS64R6O0-NEXT: or $2, $2, $8 ++; MIPS64R6O0-NEXT: sc $2, 0($4) ++; MIPS64R6O0-NEXT: beqzc $2, .LBB15_1 ++; MIPS64R6O0-NEXT: .LBB15_3: ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: srlv $1, $3, $9 ++; MIPS64R6O0-NEXT: seh $1, $1 ++; MIPS64R6O0-NEXT: # %bb.4: ++; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ++; MIPS64R6O0-NEXT: # %bb.5: ++; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ++; MIPS64R6O0-NEXT: seh $1, $1 ++; MIPS64R6O0-NEXT: xor $1, $2, $1 ++; MIPS64R6O0-NEXT: sltiu $3, $1, 1 ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: foo: ++; MM32: # %bb.0: ++; MM32-NEXT: addu16 $3, $5, $6 ++; MM32-NEXT: sync ++; MM32-NEXT: addiu $1, $zero, -4 ++; MM32-NEXT: and $1, $4, $1 ++; MM32-NEXT: andi $2, $4, 3 ++; MM32-NEXT: sll $4, $2, 3 ++; MM32-NEXT: ori $2, $zero, 65535 ++; MM32-NEXT: sllv $5, $2, $4 ++; MM32-NEXT: nor $6, $zero, $5 ++; MM32-NEXT: andi $2, $3, 65535 ++; MM32-NEXT: sllv $8, $2, $4 ++; MM32-NEXT: andi $2, $7, 65535 ++; MM32-NEXT: sllv $7, $2, $4 ++; MM32-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $9, 0($1) ++; MM32-NEXT: and $10, $9, $5 ++; MM32-NEXT: bne $10, $8, $BB15_3 ++; MM32-NEXT: nop ++; MM32-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MM32-NEXT: and $9, $9, $6 ++; MM32-NEXT: or $9, $9, $7 ++; MM32-NEXT: sc $9, 0($1) ++; MM32-NEXT: beqzc $9, $BB15_1 ++; MM32-NEXT: $BB15_3: ++; MM32-NEXT: sync ++; MM32-NEXT: srlv $2, $10, $4 ++; MM32-NEXT: seh $2, $2 ++; MM32-NEXT: # %bb.4: ++; MM32-NEXT: seh $1, $3 ++; MM32-NEXT: xor $1, $2, $1 ++; MM32-NEXT: sltiu $3, $1, 1 ++; MM32-NEXT: sync ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: foo: ++; O1: # %bb.0: ++; O1-NEXT: addu $1, $5, $6 ++; O1-NEXT: sync ++; O1-NEXT: addiu $2, $zero, -4 ++; O1-NEXT: and $3, $4, $2 ++; O1-NEXT: andi $2, $4, 3 ++; O1-NEXT: sll $4, $2, 3 ++; O1-NEXT: ori $2, $zero, 65535 ++; O1-NEXT: sllv $5, $2, $4 ++; O1-NEXT: nor $6, $zero, $5 ++; O1-NEXT: andi $2, $1, 65535 ++; O1-NEXT: sllv $8, $2, $4 ++; O1-NEXT: andi $2, $7, 65535 ++; O1-NEXT: sllv $7, $2, $4 ++; O1-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $9, 0($3) ++; O1-NEXT: and $10, $9, $5 ++; O1-NEXT: bne $10, $8, $BB15_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; O1-NEXT: and $9, $9, $6 ++; O1-NEXT: or $9, $9, $7 ++; O1-NEXT: sc $9, 0($3) ++; O1-NEXT: beqz $9, $BB15_1 ++; O1-NEXT: nop ++; O1-NEXT: $BB15_3: ++; O1-NEXT: sync ++; O1-NEXT: srlv $2, $10, $4 ++; O1-NEXT: sll $2, $2, 16 ++; O1-NEXT: sra $2, $2, 16 ++; O1-NEXT: # %bb.4: ++; O1-NEXT: sll $1, $1, 16 ++; O1-NEXT: sra $1, $1, 16 ++; O1-NEXT: xor $1, $2, $1 ++; O1-NEXT: sltiu $3, $1, 1 ++; O1-NEXT: sync ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: foo: ++; O2: # %bb.0: ++; O2-NEXT: addu $1, $5, $6 ++; O2-NEXT: sync ++; O2-NEXT: addiu $2, $zero, -4 ++; O2-NEXT: and $3, $4, $2 ++; O2-NEXT: andi $2, $4, 3 ++; O2-NEXT: sll $4, $2, 3 ++; O2-NEXT: ori $2, $zero, 65535 ++; O2-NEXT: sllv $5, $2, $4 ++; O2-NEXT: nor $6, $zero, $5 ++; O2-NEXT: andi $2, $1, 65535 ++; O2-NEXT: sllv $8, $2, $4 ++; O2-NEXT: andi $2, $7, 65535 ++; O2-NEXT: sllv $7, $2, $4 ++; O2-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $9, 0($3) ++; O2-NEXT: and $10, $9, $5 ++; O2-NEXT: bne $10, $8, $BB15_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; O2-NEXT: and $9, $9, $6 ++; O2-NEXT: or $9, $9, $7 ++; O2-NEXT: sc $9, 0($3) ++; O2-NEXT: beqz $9, $BB15_1 ++; O2-NEXT: nop ++; O2-NEXT: $BB15_3: ++; O2-NEXT: sync ++; O2-NEXT: srlv $2, $10, $4 ++; O2-NEXT: sll $2, $2, 16 ++; O2-NEXT: sra $2, $2, 16 ++; O2-NEXT: # %bb.4: ++; O2-NEXT: sll $1, $1, 16 ++; O2-NEXT: sra $1, $1, 16 ++; O2-NEXT: xor $1, $2, $1 ++; O2-NEXT: sltiu $3, $1, 1 ++; O2-NEXT: sync ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: foo: ++; O3: # %bb.0: ++; O3-NEXT: addiu $2, $zero, -4 ++; O3-NEXT: addu $1, $5, $6 ++; O3-NEXT: sync ++; O3-NEXT: and $3, $4, $2 ++; O3-NEXT: andi $2, $4, 3 ++; O3-NEXT: sll $4, $2, 3 ++; O3-NEXT: ori $2, $zero, 65535 ++; O3-NEXT: sllv $5, $2, $4 ++; O3-NEXT: andi $2, $1, 65535 ++; O3-NEXT: sll $1, $1, 16 ++; O3-NEXT: sllv $8, $2, $4 ++; O3-NEXT: andi $2, $7, 65535 ++; O3-NEXT: nor $6, $zero, $5 ++; O3-NEXT: sra $1, $1, 16 ++; O3-NEXT: sllv $7, $2, $4 ++; O3-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $9, 0($3) ++; O3-NEXT: and $10, $9, $5 ++; O3-NEXT: bne $10, $8, $BB15_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; O3-NEXT: and $9, $9, $6 ++; O3-NEXT: or $9, $9, $7 ++; O3-NEXT: sc $9, 0($3) ++; O3-NEXT: beqz $9, $BB15_1 ++; O3-NEXT: nop ++; O3-NEXT: $BB15_3: ++; O3-NEXT: sync ++; O3-NEXT: srlv $2, $10, $4 ++; O3-NEXT: sll $2, $2, 16 ++; O3-NEXT: sra $2, $2, 16 ++; O3-NEXT: # %bb.4: ++; O3-NEXT: sync ++; O3-NEXT: xor $1, $2, $1 ++; O3-NEXT: jr $ra ++; O3-NEXT: sltiu $3, $1, 1 ++; ++; MIPS32EB-LABEL: foo: ++; MIPS32EB: # %bb.0: ++; MIPS32EB-NEXT: addu $1, $5, $6 ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: addiu $2, $zero, -4 ++; MIPS32EB-NEXT: and $3, $4, $2 ++; MIPS32EB-NEXT: andi $2, $4, 3 ++; MIPS32EB-NEXT: xori $2, $2, 2 ++; MIPS32EB-NEXT: sll $4, $2, 3 ++; MIPS32EB-NEXT: ori $2, $zero, 65535 ++; MIPS32EB-NEXT: sllv $5, $2, $4 ++; MIPS32EB-NEXT: nor $6, $zero, $5 ++; MIPS32EB-NEXT: andi $2, $1, 65535 ++; MIPS32EB-NEXT: sllv $8, $2, $4 ++; MIPS32EB-NEXT: andi $2, $7, 65535 ++; MIPS32EB-NEXT: sllv $7, $2, $4 ++; MIPS32EB-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $9, 0($3) ++; MIPS32EB-NEXT: and $10, $9, $5 ++; MIPS32EB-NEXT: bne $10, $8, $BB15_3 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 ++; MIPS32EB-NEXT: and $9, $9, $6 ++; MIPS32EB-NEXT: or $9, $9, $7 ++; MIPS32EB-NEXT: sc $9, 0($3) ++; MIPS32EB-NEXT: beqz $9, $BB15_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: $BB15_3: ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: srlv $2, $10, $4 ++; MIPS32EB-NEXT: sll $2, $2, 16 ++; MIPS32EB-NEXT: sra $2, $2, 16 ++; MIPS32EB-NEXT: # %bb.4: ++; MIPS32EB-NEXT: sll $1, $1, 16 ++; MIPS32EB-NEXT: sra $1, $1, 16 ++; MIPS32EB-NEXT: xor $1, $2, $1 ++; MIPS32EB-NEXT: sltiu $3, $1, 1 ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++ %desired = add i16 %l, %r ++ %res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst seq_cst ++ ret {i16, i1} %res ++} ++ ++@countsint = common global i32 0, align 4 ++ ++define i32 @CheckSync(i32 signext %v) nounwind noinline { ++; MIPS32-LABEL: CheckSync: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: lw $1, %got(countsint)($1) ++; MIPS32-NEXT: $BB16_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: addu $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB16_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: CheckSync: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: lw $3, %got(countsint)($1) ++; MIPS32O0-NEXT: $BB16_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: addu $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB16_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: CheckSync: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: lw $1, %got(countsint)($1) ++; MIPS32R2-NEXT: $BB16_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: addu $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB16_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: CheckSync: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: lw $1, %got(countsint)($1) ++; MIPS32R6-NEXT: $BB16_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: addu $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB16_1 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: CheckSync: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: lw $3, %got(countsint)($1) ++; MIPS32R6O0-NEXT: $BB16_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: addu $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB16_1 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: CheckSync: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: ld $1, %got_disp(countsint)($1) ++; MIPS4-NEXT: .LBB16_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: addu $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB16_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: CheckSync: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: ld $1, %got_disp(countsint)($1) ++; MIPS64-NEXT: .LBB16_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: addu $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB16_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: CheckSync: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: ld $1, %got_disp(countsint)($1) ++; MIPS64R2-NEXT: .LBB16_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: addu $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB16_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: CheckSync: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: ld $1, %got_disp(countsint)($1) ++; MIPS64R6-NEXT: .LBB16_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: addu $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB16_1 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: CheckSync: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(CheckSync))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: ld $3, %got_disp(countsint)($1) ++; MIPS64R6O0-NEXT: .LBB16_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: addu $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB16_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: CheckSync: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: sync ++; MM32-NEXT: lw $1, %got(countsint)($2) ++; MM32-NEXT: $BB16_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: addu16 $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB16_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: sync ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: CheckSync: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: sync ++; O1-NEXT: lw $1, %got(countsint)($1) ++; O1-NEXT: $BB16_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: addu $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB16_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: sync ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: CheckSync: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: sync ++; O2-NEXT: lw $1, %got(countsint)($1) ++; O2-NEXT: $BB16_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: addu $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB16_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: sync ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: CheckSync: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: sync ++; O3-NEXT: lw $1, %got(countsint)($1) ++; O3-NEXT: $BB16_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: addu $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB16_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: sync ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: CheckSync: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: lw $1, %got(countsint)($1) ++; MIPS32EB-NEXT: $BB16_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: addu $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB16_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i32* @countsint, i32 %v seq_cst ++ ret i32 %0 ++} ++ ++; make sure that this assertion in ++; TwoAddressInstructionPass::TryInstructionTransform does not fail: ++; ++; line 1203: assert(TargetRegisterInfo::isVirtualRegister(regB) && ++; ++; it failed when MipsDAGToDAGISel::ReplaceUsesWithZeroReg replaced an ++; operand of an atomic instruction with register $zero. ++@a = external global i32 ++ ++define i32 @zeroreg() nounwind { ++; MIPS32-LABEL: zeroreg: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: addiu $2, $zero, 0 ++; MIPS32-NEXT: addiu $3, $zero, 1 ++; MIPS32-NEXT: lw $1, %got(a)($1) ++; MIPS32-NEXT: $BB17_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $4, 0($1) ++; MIPS32-NEXT: bne $4, $3, $BB17_3 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32-NEXT: move $5, $2 ++; MIPS32-NEXT: sc $5, 0($1) ++; MIPS32-NEXT: beqz $5, $BB17_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: $BB17_3: # %entry ++; MIPS32-NEXT: xor $1, $4, $3 ++; MIPS32-NEXT: sltiu $2, $1, 1 ++; MIPS32-NEXT: sync ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: zeroreg: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: lw $4, %got(a)($1) ++; MIPS32O0-NEXT: addiu $6, $zero, 0 ++; MIPS32O0-NEXT: addiu $2, $zero, 1 ++; MIPS32O0-NEXT: move $5, $2 ++; MIPS32O0-NEXT: $BB17_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $1, 0($4) ++; MIPS32O0-NEXT: bne $1, $5, $BB17_3 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32O0-NEXT: move $3, $6 ++; MIPS32O0-NEXT: sc $3, 0($4) ++; MIPS32O0-NEXT: beqz $3, $BB17_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: $BB17_3: # %entry ++; MIPS32O0-NEXT: xor $2, $1, $2 ++; MIPS32O0-NEXT: sltiu $2, $2, 1 ++; MIPS32O0-NEXT: sync ++; MIPS32O0-NEXT: addiu $2, $zero, 1 ++; MIPS32O0-NEXT: xor $1, $1, $2 ++; MIPS32O0-NEXT: sltiu $1, $1, 1 ++; MIPS32O0-NEXT: andi $2, $1, 1 ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: zeroreg: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: addiu $2, $zero, 0 ++; MIPS32R2-NEXT: addiu $3, $zero, 1 ++; MIPS32R2-NEXT: lw $1, %got(a)($1) ++; MIPS32R2-NEXT: $BB17_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $4, 0($1) ++; MIPS32R2-NEXT: bne $4, $3, $BB17_3 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32R2-NEXT: move $5, $2 ++; MIPS32R2-NEXT: sc $5, 0($1) ++; MIPS32R2-NEXT: beqz $5, $BB17_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: $BB17_3: # %entry ++; MIPS32R2-NEXT: xor $1, $4, $3 ++; MIPS32R2-NEXT: sltiu $2, $1, 1 ++; MIPS32R2-NEXT: sync ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: zeroreg: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: addiu $2, $zero, 0 ++; MIPS32R6-NEXT: addiu $3, $zero, 1 ++; MIPS32R6-NEXT: lw $1, %got(a)($1) ++; MIPS32R6-NEXT: $BB17_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $4, 0($1) ++; MIPS32R6-NEXT: bnec $4, $3, $BB17_3 ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32R6-NEXT: move $5, $2 ++; MIPS32R6-NEXT: sc $5, 0($1) ++; MIPS32R6-NEXT: beqzc $5, $BB17_1 ++; MIPS32R6-NEXT: $BB17_3: # %entry ++; MIPS32R6-NEXT: xor $1, $4, $3 ++; MIPS32R6-NEXT: sltiu $2, $1, 1 ++; MIPS32R6-NEXT: sync ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: zeroreg: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: lw $4, %got(a)($1) ++; MIPS32R6O0-NEXT: addiu $6, $zero, 0 ++; MIPS32R6O0-NEXT: addiu $2, $zero, 1 ++; MIPS32R6O0-NEXT: move $5, $2 ++; MIPS32R6O0-NEXT: $BB17_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $1, 0($4) ++; MIPS32R6O0-NEXT: bnec $1, $5, $BB17_3 ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32R6O0-NEXT: move $3, $6 ++; MIPS32R6O0-NEXT: sc $3, 0($4) ++; MIPS32R6O0-NEXT: beqzc $3, $BB17_1 ++; MIPS32R6O0-NEXT: $BB17_3: # %entry ++; MIPS32R6O0-NEXT: xor $1, $1, $2 ++; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ++; MIPS32R6O0-NEXT: sync ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: zeroreg: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: addiu $2, $zero, 0 ++; MIPS4-NEXT: addiu $3, $zero, 1 ++; MIPS4-NEXT: ld $1, %got_disp(a)($1) ++; MIPS4-NEXT: .LBB17_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $4, 0($1) ++; MIPS4-NEXT: bne $4, $3, .LBB17_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS4-NEXT: move $5, $2 ++; MIPS4-NEXT: sc $5, 0($1) ++; MIPS4-NEXT: beqz $5, .LBB17_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB17_3: # %entry ++; MIPS4-NEXT: xor $1, $4, $3 ++; MIPS4-NEXT: sltiu $2, $1, 1 ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: zeroreg: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: addiu $2, $zero, 0 ++; MIPS64-NEXT: addiu $3, $zero, 1 ++; MIPS64-NEXT: ld $1, %got_disp(a)($1) ++; MIPS64-NEXT: .LBB17_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $4, 0($1) ++; MIPS64-NEXT: bne $4, $3, .LBB17_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS64-NEXT: move $5, $2 ++; MIPS64-NEXT: sc $5, 0($1) ++; MIPS64-NEXT: beqz $5, .LBB17_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB17_3: # %entry ++; MIPS64-NEXT: xor $1, $4, $3 ++; MIPS64-NEXT: sltiu $2, $1, 1 ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: zeroreg: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: addiu $2, $zero, 0 ++; MIPS64R2-NEXT: addiu $3, $zero, 1 ++; MIPS64R2-NEXT: ld $1, %got_disp(a)($1) ++; MIPS64R2-NEXT: .LBB17_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $4, 0($1) ++; MIPS64R2-NEXT: bne $4, $3, .LBB17_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS64R2-NEXT: move $5, $2 ++; MIPS64R2-NEXT: sc $5, 0($1) ++; MIPS64R2-NEXT: beqz $5, .LBB17_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB17_3: # %entry ++; MIPS64R2-NEXT: xor $1, $4, $3 ++; MIPS64R2-NEXT: sltiu $2, $1, 1 ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: zeroreg: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: addiu $2, $zero, 0 ++; MIPS64R6-NEXT: addiu $3, $zero, 1 ++; MIPS64R6-NEXT: ld $1, %got_disp(a)($1) ++; MIPS64R6-NEXT: .LBB17_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $4, 0($1) ++; MIPS64R6-NEXT: bnec $4, $3, .LBB17_3 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS64R6-NEXT: move $5, $2 ++; MIPS64R6-NEXT: sc $5, 0($1) ++; MIPS64R6-NEXT: beqzc $5, .LBB17_1 ++; MIPS64R6-NEXT: .LBB17_3: # %entry ++; MIPS64R6-NEXT: xor $1, $4, $3 ++; MIPS64R6-NEXT: sltiu $2, $1, 1 ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: zeroreg: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(zeroreg))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: ld $4, %got_disp(a)($1) ++; MIPS64R6O0-NEXT: addiu $6, $zero, 0 ++; MIPS64R6O0-NEXT: addiu $2, $zero, 1 ++; MIPS64R6O0-NEXT: move $5, $2 ++; MIPS64R6O0-NEXT: .LBB17_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $1, 0($4) ++; MIPS64R6O0-NEXT: bnec $1, $5, .LBB17_3 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS64R6O0-NEXT: move $3, $6 ++; MIPS64R6O0-NEXT: sc $3, 0($4) ++; MIPS64R6O0-NEXT: beqzc $3, .LBB17_1 ++; MIPS64R6O0-NEXT: .LBB17_3: # %entry ++; MIPS64R6O0-NEXT: xor $1, $1, $2 ++; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: zeroreg: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: sync ++; MM32-NEXT: li16 $3, 0 ++; MM32-NEXT: li16 $4, 1 ++; MM32-NEXT: lw $1, %got(a)($2) ++; MM32-NEXT: $BB17_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: bne $2, $4, $BB17_3 ++; MM32-NEXT: nop ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MM32-NEXT: move $5, $3 ++; MM32-NEXT: sc $5, 0($1) ++; MM32-NEXT: beqzc $5, $BB17_1 ++; MM32-NEXT: $BB17_3: # %entry ++; MM32-NEXT: sync ++; MM32-NEXT: xor $1, $2, $4 ++; MM32-NEXT: sltiu $2, $1, 1 ++; MM32-NEXT: sync ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: zeroreg: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: sync ++; O1-NEXT: addiu $2, $zero, 0 ++; O1-NEXT: addiu $3, $zero, 1 ++; O1-NEXT: lw $1, %got(a)($1) ++; O1-NEXT: $BB17_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $4, 0($1) ++; O1-NEXT: bne $4, $3, $BB17_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; O1-NEXT: move $5, $2 ++; O1-NEXT: sc $5, 0($1) ++; O1-NEXT: beqz $5, $BB17_1 ++; O1-NEXT: nop ++; O1-NEXT: $BB17_3: # %entry ++; O1-NEXT: xor $1, $4, $3 ++; O1-NEXT: sltiu $2, $1, 1 ++; O1-NEXT: sync ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: zeroreg: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: sync ++; O2-NEXT: addiu $2, $zero, 0 ++; O2-NEXT: addiu $3, $zero, 1 ++; O2-NEXT: lw $1, %got(a)($1) ++; O2-NEXT: $BB17_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $4, 0($1) ++; O2-NEXT: bne $4, $3, $BB17_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; O2-NEXT: move $5, $2 ++; O2-NEXT: sc $5, 0($1) ++; O2-NEXT: beqz $5, $BB17_1 ++; O2-NEXT: nop ++; O2-NEXT: $BB17_3: # %entry ++; O2-NEXT: xor $1, $4, $3 ++; O2-NEXT: sltiu $2, $1, 1 ++; O2-NEXT: sync ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: zeroreg: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: addiu $2, $zero, 0 ++; O3-NEXT: addiu $3, $zero, 1 ++; O3-NEXT: sync ++; O3-NEXT: lw $1, %got(a)($1) ++; O3-NEXT: $BB17_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $4, 0($1) ++; O3-NEXT: bne $4, $3, $BB17_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; O3-NEXT: move $5, $2 ++; O3-NEXT: sc $5, 0($1) ++; O3-NEXT: beqz $5, $BB17_1 ++; O3-NEXT: nop ++; O3-NEXT: $BB17_3: # %entry ++; O3-NEXT: sync ++; O3-NEXT: xor $1, $4, $3 ++; O3-NEXT: jr $ra ++; O3-NEXT: sltiu $2, $1, 1 ++; ++; MIPS32EB-LABEL: zeroreg: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: addiu $2, $zero, 0 ++; MIPS32EB-NEXT: addiu $3, $zero, 1 ++; MIPS32EB-NEXT: lw $1, %got(a)($1) ++; MIPS32EB-NEXT: $BB17_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $4, 0($1) ++; MIPS32EB-NEXT: bne $4, $3, $BB17_3 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: # in Loop: Header=BB17_1 Depth=1 ++; MIPS32EB-NEXT: move $5, $2 ++; MIPS32EB-NEXT: sc $5, 0($1) ++; MIPS32EB-NEXT: beqz $5, $BB17_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: $BB17_3: # %entry ++; MIPS32EB-NEXT: xor $1, $4, $3 ++; MIPS32EB-NEXT: sltiu $2, $1, 1 ++; MIPS32EB-NEXT: sync ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %pair0 = cmpxchg i32* @a, i32 1, i32 0 seq_cst seq_cst ++ %0 = extractvalue { i32, i1 } %pair0, 0 ++ %1 = icmp eq i32 %0, 1 ++ %conv = zext i1 %1 to i32 ++ ret i32 %conv ++} ++ ++; Check that MIPS32R6 has the correct offset range. ++; FIXME: At the moment, we don't seem to do addr+offset for any atomic load/store. ++define i32 @AtomicLoadAdd32_OffGt9Bit(i32 signext %incr) nounwind { ++; MIPS32-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32: # %bb.0: # %entry ++; MIPS32-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32-NEXT: addu $1, $2, $25 ++; MIPS32-NEXT: lw $1, %got(x)($1) ++; MIPS32-NEXT: addiu $1, $1, 1024 ++; MIPS32-NEXT: $BB18_1: # %entry ++; MIPS32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32-NEXT: ll $2, 0($1) ++; MIPS32-NEXT: addu $3, $2, $4 ++; MIPS32-NEXT: sc $3, 0($1) ++; MIPS32-NEXT: beqz $3, $BB18_1 ++; MIPS32-NEXT: nop ++; MIPS32-NEXT: # %bb.2: # %entry ++; MIPS32-NEXT: jr $ra ++; MIPS32-NEXT: nop ++; ++; MIPS32O0-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32O0: # %bb.0: # %entry ++; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32O0-NEXT: addu $1, $2, $25 ++; MIPS32O0-NEXT: lw $1, %got(x)($1) ++; MIPS32O0-NEXT: addiu $3, $1, 1024 ++; MIPS32O0-NEXT: $BB18_1: # %entry ++; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32O0-NEXT: ll $2, 0($3) ++; MIPS32O0-NEXT: addu $1, $2, $4 ++; MIPS32O0-NEXT: sc $1, 0($3) ++; MIPS32O0-NEXT: beqz $1, $BB18_1 ++; MIPS32O0-NEXT: nop ++; MIPS32O0-NEXT: # %bb.2: # %entry ++; MIPS32O0-NEXT: jr $ra ++; MIPS32O0-NEXT: nop ++; ++; MIPS32R2-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32R2: # %bb.0: # %entry ++; MIPS32R2-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R2-NEXT: addu $1, $2, $25 ++; MIPS32R2-NEXT: lw $1, %got(x)($1) ++; MIPS32R2-NEXT: addiu $1, $1, 1024 ++; MIPS32R2-NEXT: $BB18_1: # %entry ++; MIPS32R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R2-NEXT: ll $2, 0($1) ++; MIPS32R2-NEXT: addu $3, $2, $4 ++; MIPS32R2-NEXT: sc $3, 0($1) ++; MIPS32R2-NEXT: beqz $3, $BB18_1 ++; MIPS32R2-NEXT: nop ++; MIPS32R2-NEXT: # %bb.2: # %entry ++; MIPS32R2-NEXT: jr $ra ++; MIPS32R2-NEXT: nop ++; ++; MIPS32R6-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32R6: # %bb.0: # %entry ++; MIPS32R6-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6-NEXT: addu $1, $2, $25 ++; MIPS32R6-NEXT: lw $1, %got(x)($1) ++; MIPS32R6-NEXT: addiu $1, $1, 1024 ++; MIPS32R6-NEXT: $BB18_1: # %entry ++; MIPS32R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6-NEXT: ll $2, 0($1) ++; MIPS32R6-NEXT: addu $3, $2, $4 ++; MIPS32R6-NEXT: sc $3, 0($1) ++; MIPS32R6-NEXT: beqzc $3, $BB18_1 ++; MIPS32R6-NEXT: nop ++; MIPS32R6-NEXT: # %bb.2: # %entry ++; MIPS32R6-NEXT: jrc $ra ++; ++; MIPS32R6O0-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32R6O0: # %bb.0: # %entry ++; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32R6O0-NEXT: addu $1, $2, $25 ++; MIPS32R6O0-NEXT: lw $1, %got(x)($1) ++; MIPS32R6O0-NEXT: addiu $3, $1, 1024 ++; MIPS32R6O0-NEXT: $BB18_1: # %entry ++; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32R6O0-NEXT: ll $2, 0($3) ++; MIPS32R6O0-NEXT: addu $1, $2, $4 ++; MIPS32R6O0-NEXT: sc $1, 0($3) ++; MIPS32R6O0-NEXT: beqzc $1, $BB18_1 ++; MIPS32R6O0-NEXT: nop ++; MIPS32R6O0-NEXT: # %bb.2: # %entry ++; MIPS32R6O0-NEXT: jrc $ra ++; ++; MIPS4-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: daddiu $1, $1, 1024 ++; MIPS4-NEXT: .LBB18_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: ll $2, 0($1) ++; MIPS4-NEXT: addu $3, $2, $4 ++; MIPS4-NEXT: sc $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB18_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: daddiu $1, $1, 1024 ++; MIPS64-NEXT: .LBB18_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: ll $2, 0($1) ++; MIPS64-NEXT: addu $3, $2, $4 ++; MIPS64-NEXT: sc $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB18_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: daddiu $1, $1, 1024 ++; MIPS64R2-NEXT: .LBB18_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: ll $2, 0($1) ++; MIPS64R2-NEXT: addu $3, $2, $4 ++; MIPS64R2-NEXT: sc $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB18_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: daddiu $1, $1, 1024 ++; MIPS64R6-NEXT: .LBB18_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: ll $2, 0($1) ++; MIPS64R6-NEXT: addu $3, $2, $4 ++; MIPS64R6-NEXT: sc $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB18_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ++; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ++; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: daddiu $3, $1, 1024 ++; MIPS64R6O0-NEXT: .LBB18_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: ll $2, 0($3) ++; MIPS64R6O0-NEXT: addu $1, $2, $4 ++; MIPS64R6O0-NEXT: sc $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB18_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; MM32-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MM32: # %bb.0: # %entry ++; MM32-NEXT: lui $2, %hi(_gp_disp) ++; MM32-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MM32-NEXT: addu $2, $2, $25 ++; MM32-NEXT: lw $1, %got(x)($2) ++; MM32-NEXT: addiu $1, $1, 1024 ++; MM32-NEXT: $BB18_1: # %entry ++; MM32-NEXT: # =>This Inner Loop Header: Depth=1 ++; MM32-NEXT: ll $2, 0($1) ++; MM32-NEXT: addu16 $3, $2, $4 ++; MM32-NEXT: sc $3, 0($1) ++; MM32-NEXT: beqzc $3, $BB18_1 ++; MM32-NEXT: # %bb.2: # %entry ++; MM32-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $2, %hi(_gp_disp) ++; O1-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O1-NEXT: addu $1, $2, $25 ++; O1-NEXT: lw $1, %got(x)($1) ++; O1-NEXT: addiu $1, $1, 1024 ++; O1-NEXT: $BB18_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: ll $2, 0($1) ++; O1-NEXT: addu $3, $2, $4 ++; O1-NEXT: sc $3, 0($1) ++; O1-NEXT: beqz $3, $BB18_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $2, %hi(_gp_disp) ++; O2-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O2-NEXT: addu $1, $2, $25 ++; O2-NEXT: lw $1, %got(x)($1) ++; O2-NEXT: addiu $1, $1, 1024 ++; O2-NEXT: $BB18_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: ll $2, 0($1) ++; O2-NEXT: addu $3, $2, $4 ++; O2-NEXT: sc $3, 0($1) ++; O2-NEXT: beqz $3, $BB18_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $2, %hi(_gp_disp) ++; O3-NEXT: addiu $2, $2, %lo(_gp_disp) ++; O3-NEXT: addu $1, $2, $25 ++; O3-NEXT: lw $1, %got(x)($1) ++; O3-NEXT: addiu $1, $1, 1024 ++; O3-NEXT: $BB18_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: ll $2, 0($1) ++; O3-NEXT: addu $3, $2, $4 ++; O3-NEXT: sc $3, 0($1) ++; O3-NEXT: beqz $3, $BB18_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS32EB-LABEL: AtomicLoadAdd32_OffGt9Bit: ++; MIPS32EB: # %bb.0: # %entry ++; MIPS32EB-NEXT: lui $2, %hi(_gp_disp) ++; MIPS32EB-NEXT: addiu $2, $2, %lo(_gp_disp) ++; MIPS32EB-NEXT: addu $1, $2, $25 ++; MIPS32EB-NEXT: lw $1, %got(x)($1) ++; MIPS32EB-NEXT: addiu $1, $1, 1024 ++; MIPS32EB-NEXT: $BB18_1: # %entry ++; MIPS32EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS32EB-NEXT: ll $2, 0($1) ++; MIPS32EB-NEXT: addu $3, $2, $4 ++; MIPS32EB-NEXT: sc $3, 0($1) ++; MIPS32EB-NEXT: beqz $3, $BB18_1 ++; MIPS32EB-NEXT: nop ++; MIPS32EB-NEXT: # %bb.2: # %entry ++; MIPS32EB-NEXT: jr $ra ++; MIPS32EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i32* getelementptr(i32, i32* @x, i32 256), i32 %incr monotonic ++ ret i32 %0 ++ ++} +diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll +index c8b67eda1..4005ea17e 100644 +--- a/llvm/test/CodeGen/Mips/atomic.ll ++++ b/llvm/test/CodeGen/Mips/atomic.ll +@@ -1,35 +1,35 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32O0 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32R2 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32R6 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O0 --disable-machine-licm -mcpu=mips32r6 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32R6O0 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS4 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R2 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R6 +-; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R6O0 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu --disable-machine-licm -mcpu=mips32r2 -mattr=micromips -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MM32 + + ; We want to verify the produced code is well formed all optimization levels, the rest of the tests which ensure correctness. +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 +-; RUN: llc -mtriple=mipsel-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mipsel-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 + + ; Keep one big-endian check so that we don't reduce testing, but don't add more + ; since endianness doesn't affect the body of the atomic operations. +-; RUN: llc -mtriple=mips-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips-unknown-linux-gnu --disable-machine-licm -mcpu=mips32 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS32EB + + @x = common global i32 0, align 4 +diff --git a/llvm/test/CodeGen/Mips/atomic64-fix-loongson3-llsc.ll b/llvm/test/CodeGen/Mips/atomic64-fix-loongson3-llsc.ll +new file mode 100644 +index 000000000..48c202940 +--- /dev/null ++++ b/llvm/test/CodeGen/Mips/atomic64-fix-loongson3-llsc.ll +@@ -0,0 +1,1377 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS4 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R2 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R6 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64R6O0 ++ ++; We want to verify the produced code is well formed all optimization levels, the rest of the test which ensure correctness. ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64el-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 ++ ++; Keep one big-endian check so that we don't reduce testing, but don't add more ++; since endianness doesn't affect the body of the atomic operations. ++; RUN: llc -mips-fix-loongson3-llsc=1 -mtriple=mips64-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: FileCheck %s -check-prefix=MIPS64EB ++ ++@x = common global i64 0, align 4 ++ ++define i64 @AtomicLoadAdd(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadAdd: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB0_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: daddu $3, $2, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB0_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAdd: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB0_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: daddu $3, $2, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB0_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAdd: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB0_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: daddu $3, $2, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB0_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAdd: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB0_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: daddu $3, $2, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB0_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAdd: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB0_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: daddu $1, $2, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAdd: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB0_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: daddu $3, $2, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB0_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAdd: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB0_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: daddu $3, $2, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB0_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAdd: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB0_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: daddu $3, $2, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB0_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadAdd: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB0_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: daddu $3, $2, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB0_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw add i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicLoadSub(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadSub: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB1_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: dsubu $3, $2, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB1_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadSub: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB1_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: dsubu $3, $2, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB1_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadSub: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB1_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: dsubu $3, $2, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB1_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadSub: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB1_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: dsubu $3, $2, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB1_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadSub: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB1_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: dsubu $1, $2, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadSub: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB1_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: dsubu $3, $2, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB1_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadSub: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB1_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: dsubu $3, $2, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB1_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadSub: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB1_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: dsubu $3, $2, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB1_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadSub: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB1_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: dsubu $3, $2, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB1_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw sub i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicLoadAnd(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadAnd: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB2_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: and $3, $2, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB2_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadAnd: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB2_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: and $3, $2, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB2_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadAnd: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB2_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: and $3, $2, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB2_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadAnd: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB2_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: and $3, $2, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB2_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadAnd: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB2_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: and $1, $2, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadAnd: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB2_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: and $3, $2, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB2_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadAnd: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB2_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: and $3, $2, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB2_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadAnd: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB2_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: and $3, $2, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB2_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadAnd: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB2_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: and $3, $2, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB2_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw and i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicLoadOr(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadOr: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB3_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: or $3, $2, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB3_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadOr: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB3_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: or $3, $2, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB3_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadOr: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB3_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: or $3, $2, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB3_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadOr: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB3_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: or $3, $2, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB3_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadOr: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB3_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: or $1, $2, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadOr: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB3_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: or $3, $2, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB3_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadOr: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB3_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: or $3, $2, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB3_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadOr: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB3_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: or $3, $2, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB3_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadOr: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB3_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: or $3, $2, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB3_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw or i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicLoadXor(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadXor: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB4_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: xor $3, $2, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB4_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadXor: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB4_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: xor $3, $2, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB4_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadXor: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB4_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: xor $3, $2, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB4_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadXor: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB4_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: xor $3, $2, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB4_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadXor: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB4_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: xor $1, $2, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadXor: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB4_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: xor $3, $2, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB4_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadXor: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB4_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: xor $3, $2, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB4_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadXor: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB4_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: xor $3, $2, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB4_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadXor: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB4_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: xor $3, $2, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB4_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw xor i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicLoadNand(i64 signext %incr) nounwind { ++; MIPS4-LABEL: AtomicLoadNand: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB5_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: and $3, $2, $4 ++; MIPS4-NEXT: nor $3, $zero, $3 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB5_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: nop ++; ++; MIPS64-LABEL: AtomicLoadNand: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB5_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: and $3, $2, $4 ++; MIPS64-NEXT: nor $3, $zero, $3 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB5_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: nop ++; ++; MIPS64R2-LABEL: AtomicLoadNand: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB5_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: and $3, $2, $4 ++; MIPS64R2-NEXT: nor $3, $zero, $3 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB5_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: nop ++; ++; MIPS64R6-LABEL: AtomicLoadNand: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB5_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: and $3, $2, $4 ++; MIPS64R6-NEXT: nor $3, $zero, $3 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB5_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jrc $ra ++; ++; MIPS64R6O0-LABEL: AtomicLoadNand: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB5_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: and $1, $2, $4 ++; MIPS64R6O0-NEXT: nor $1, $zero, $1 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 ++; MIPS64R6O0-NEXT: nop ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicLoadNand: ++; O1: # %bb.0: # %entry ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB5_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: and $3, $2, $4 ++; O1-NEXT: nor $3, $zero, $3 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB5_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: nop ++; ++; O2-LABEL: AtomicLoadNand: ++; O2: # %bb.0: # %entry ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB5_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: and $3, $2, $4 ++; O2-NEXT: nor $3, $zero, $3 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB5_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: nop ++; ++; O3-LABEL: AtomicLoadNand: ++; O3: # %bb.0: # %entry ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB5_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: and $3, $2, $4 ++; O3-NEXT: nor $3, $zero, $3 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB5_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: nop ++; ++; MIPS64EB-LABEL: AtomicLoadNand: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB5_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: and $3, $2, $4 ++; MIPS64EB-NEXT: nor $3, $zero, $3 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB5_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: nop ++entry: ++ %0 = atomicrmw nand i64* @x, i64 %incr monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicSwap64(i64 signext %newval) nounwind { ++; MIPS4-LABEL: AtomicSwap64: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: daddiu $sp, $sp, -16 ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS4-NEXT: sd $4, 8($sp) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB6_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: move $3, $4 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB6_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64-LABEL: AtomicSwap64: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: daddiu $sp, $sp, -16 ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64-NEXT: sd $4, 8($sp) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB6_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: move $3, $4 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB6_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R2-LABEL: AtomicSwap64: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R2-NEXT: sd $4, 8($sp) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB6_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: move $3, $4 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB6_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6-LABEL: AtomicSwap64: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R6-NEXT: sd $4, 8($sp) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB6_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: move $3, $4 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB6_1 ++; MIPS64R6-NEXT: nop ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: jr $ra ++; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6O0-LABEL: AtomicSwap64: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64R6O0-NEXT: sd $4, 8($sp) ++; MIPS64R6O0-NEXT: ld $4, 8($sp) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB6_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: move $1, $4 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicSwap64: ++; O1: # %bb.0: # %entry ++; O1-NEXT: daddiu $sp, $sp, -16 ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; O1-NEXT: sd $4, 8($sp) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB6_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: move $3, $4 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB6_1 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: jr $ra ++; O1-NEXT: daddiu $sp, $sp, 16 ++; ++; O2-LABEL: AtomicSwap64: ++; O2: # %bb.0: # %entry ++; O2-NEXT: daddiu $sp, $sp, -16 ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; O2-NEXT: sd $4, 8($sp) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB6_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: move $3, $4 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB6_1 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: jr $ra ++; O2-NEXT: daddiu $sp, $sp, 16 ++; ++; O3-LABEL: AtomicSwap64: ++; O3: # %bb.0: # %entry ++; O3-NEXT: daddiu $sp, $sp, -16 ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; O3-NEXT: sd $4, 8($sp) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB6_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: move $3, $4 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB6_1 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: jr $ra ++; O3-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64EB-LABEL: AtomicSwap64: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: daddiu $sp, $sp, -16 ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ++; MIPS64EB-NEXT: sd $4, 8($sp) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB6_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: move $3, $4 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB6_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: daddiu $sp, $sp, 16 ++entry: ++ %newval.addr = alloca i64, align 4 ++ store i64 %newval, i64* %newval.addr, align 4 ++ %tmp = load i64, i64* %newval.addr, align 4 ++ %0 = atomicrmw xchg i64* @x, i64 %tmp monotonic ++ ret i64 %0 ++ ++} ++ ++define i64 @AtomicCmpSwap64(i64 signext %oldval, i64 signext %newval) nounwind { ++; MIPS4-LABEL: AtomicCmpSwap64: ++; MIPS4: # %bb.0: # %entry ++; MIPS4-NEXT: daddiu $sp, $sp, -16 ++; MIPS4-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS4-NEXT: daddu $1, $1, $25 ++; MIPS4-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS4-NEXT: sd $5, 8($sp) ++; MIPS4-NEXT: ld $1, %got_disp(x)($1) ++; MIPS4-NEXT: .LBB7_1: # %entry ++; MIPS4-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS4-NEXT: lld $2, 0($1) ++; MIPS4-NEXT: bne $2, $4, .LBB7_3 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: # %bb.2: # %entry ++; MIPS4-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS4-NEXT: move $3, $5 ++; MIPS4-NEXT: scd $3, 0($1) ++; MIPS4-NEXT: beqz $3, .LBB7_1 ++; MIPS4-NEXT: nop ++; MIPS4-NEXT: .LBB7_3: # %entry ++; MIPS4-NEXT: sync ++; MIPS4-NEXT: jr $ra ++; MIPS4-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64-LABEL: AtomicCmpSwap64: ++; MIPS64: # %bb.0: # %entry ++; MIPS64-NEXT: daddiu $sp, $sp, -16 ++; MIPS64-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64-NEXT: daddu $1, $1, $25 ++; MIPS64-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64-NEXT: sd $5, 8($sp) ++; MIPS64-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64-NEXT: .LBB7_1: # %entry ++; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64-NEXT: lld $2, 0($1) ++; MIPS64-NEXT: bne $2, $4, .LBB7_3 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: # %bb.2: # %entry ++; MIPS64-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64-NEXT: move $3, $5 ++; MIPS64-NEXT: scd $3, 0($1) ++; MIPS64-NEXT: beqz $3, .LBB7_1 ++; MIPS64-NEXT: nop ++; MIPS64-NEXT: .LBB7_3: # %entry ++; MIPS64-NEXT: sync ++; MIPS64-NEXT: jr $ra ++; MIPS64-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R2-LABEL: AtomicCmpSwap64: ++; MIPS64R2: # %bb.0: # %entry ++; MIPS64R2-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R2-NEXT: daddu $1, $1, $25 ++; MIPS64R2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R2-NEXT: sd $5, 8($sp) ++; MIPS64R2-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R2-NEXT: .LBB7_1: # %entry ++; MIPS64R2-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R2-NEXT: lld $2, 0($1) ++; MIPS64R2-NEXT: bne $2, $4, .LBB7_3 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: # %bb.2: # %entry ++; MIPS64R2-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R2-NEXT: move $3, $5 ++; MIPS64R2-NEXT: scd $3, 0($1) ++; MIPS64R2-NEXT: beqz $3, .LBB7_1 ++; MIPS64R2-NEXT: nop ++; MIPS64R2-NEXT: .LBB7_3: # %entry ++; MIPS64R2-NEXT: sync ++; MIPS64R2-NEXT: jr $ra ++; MIPS64R2-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6-LABEL: AtomicCmpSwap64: ++; MIPS64R6: # %bb.0: # %entry ++; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R6-NEXT: daddu $1, $1, $25 ++; MIPS64R6-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R6-NEXT: sd $5, 8($sp) ++; MIPS64R6-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64R6-NEXT: .LBB7_1: # %entry ++; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6-NEXT: lld $2, 0($1) ++; MIPS64R6-NEXT: bnec $2, $4, .LBB7_3 ++; MIPS64R6-NEXT: # %bb.2: # %entry ++; MIPS64R6-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R6-NEXT: move $3, $5 ++; MIPS64R6-NEXT: scd $3, 0($1) ++; MIPS64R6-NEXT: beqzc $3, .LBB7_1 ++; MIPS64R6-NEXT: .LBB7_3: # %entry ++; MIPS64R6-NEXT: sync ++; MIPS64R6-NEXT: jr $ra ++; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64R6O0-LABEL: AtomicCmpSwap64: ++; MIPS64R6O0: # %bb.0: # %entry ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ++; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R6O0-NEXT: daddu $1, $1, $25 ++; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64R6O0-NEXT: sd $5, 8($sp) ++; MIPS64R6O0-NEXT: ld $5, 8($sp) ++; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ++; MIPS64R6O0-NEXT: .LBB7_1: # %entry ++; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64R6O0-NEXT: lld $2, 0($3) ++; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 ++; MIPS64R6O0-NEXT: # %bb.2: # %entry ++; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64R6O0-NEXT: move $1, $5 ++; MIPS64R6O0-NEXT: scd $1, 0($3) ++; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 ++; MIPS64R6O0-NEXT: .LBB7_3: # %entry ++; MIPS64R6O0-NEXT: sync ++; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ++; MIPS64R6O0-NEXT: jrc $ra ++; ++; O1-LABEL: AtomicCmpSwap64: ++; O1: # %bb.0: # %entry ++; O1-NEXT: daddiu $sp, $sp, -16 ++; O1-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; O1-NEXT: daddu $1, $1, $25 ++; O1-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; O1-NEXT: sd $5, 8($sp) ++; O1-NEXT: ld $1, %got_disp(x)($1) ++; O1-NEXT: .LBB7_1: # %entry ++; O1-NEXT: # =>This Inner Loop Header: Depth=1 ++; O1-NEXT: lld $2, 0($1) ++; O1-NEXT: bne $2, $4, .LBB7_3 ++; O1-NEXT: nop ++; O1-NEXT: # %bb.2: # %entry ++; O1-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O1-NEXT: move $3, $5 ++; O1-NEXT: scd $3, 0($1) ++; O1-NEXT: beqz $3, .LBB7_1 ++; O1-NEXT: nop ++; O1-NEXT: .LBB7_3: # %entry ++; O1-NEXT: sync ++; O1-NEXT: jr $ra ++; O1-NEXT: daddiu $sp, $sp, 16 ++; ++; O2-LABEL: AtomicCmpSwap64: ++; O2: # %bb.0: # %entry ++; O2-NEXT: daddiu $sp, $sp, -16 ++; O2-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; O2-NEXT: daddu $1, $1, $25 ++; O2-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; O2-NEXT: sd $5, 8($sp) ++; O2-NEXT: ld $1, %got_disp(x)($1) ++; O2-NEXT: .LBB7_1: # %entry ++; O2-NEXT: # =>This Inner Loop Header: Depth=1 ++; O2-NEXT: lld $2, 0($1) ++; O2-NEXT: bne $2, $4, .LBB7_3 ++; O2-NEXT: nop ++; O2-NEXT: # %bb.2: # %entry ++; O2-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O2-NEXT: move $3, $5 ++; O2-NEXT: scd $3, 0($1) ++; O2-NEXT: beqz $3, .LBB7_1 ++; O2-NEXT: nop ++; O2-NEXT: .LBB7_3: # %entry ++; O2-NEXT: sync ++; O2-NEXT: jr $ra ++; O2-NEXT: daddiu $sp, $sp, 16 ++; ++; O3-LABEL: AtomicCmpSwap64: ++; O3: # %bb.0: # %entry ++; O3-NEXT: daddiu $sp, $sp, -16 ++; O3-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; O3-NEXT: sd $5, 8($sp) ++; O3-NEXT: daddu $1, $1, $25 ++; O3-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; O3-NEXT: ld $1, %got_disp(x)($1) ++; O3-NEXT: .LBB7_1: # %entry ++; O3-NEXT: # =>This Inner Loop Header: Depth=1 ++; O3-NEXT: lld $2, 0($1) ++; O3-NEXT: bne $2, $4, .LBB7_3 ++; O3-NEXT: nop ++; O3-NEXT: # %bb.2: # %entry ++; O3-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; O3-NEXT: move $3, $5 ++; O3-NEXT: scd $3, 0($1) ++; O3-NEXT: beqz $3, .LBB7_1 ++; O3-NEXT: nop ++; O3-NEXT: .LBB7_3: # %entry ++; O3-NEXT: sync ++; O3-NEXT: jr $ra ++; O3-NEXT: daddiu $sp, $sp, 16 ++; ++; MIPS64EB-LABEL: AtomicCmpSwap64: ++; MIPS64EB: # %bb.0: # %entry ++; MIPS64EB-NEXT: daddiu $sp, $sp, -16 ++; MIPS64EB-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64EB-NEXT: daddu $1, $1, $25 ++; MIPS64EB-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ++; MIPS64EB-NEXT: sd $5, 8($sp) ++; MIPS64EB-NEXT: ld $1, %got_disp(x)($1) ++; MIPS64EB-NEXT: .LBB7_1: # %entry ++; MIPS64EB-NEXT: # =>This Inner Loop Header: Depth=1 ++; MIPS64EB-NEXT: lld $2, 0($1) ++; MIPS64EB-NEXT: bne $2, $4, .LBB7_3 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: # %bb.2: # %entry ++; MIPS64EB-NEXT: # in Loop: Header=BB7_1 Depth=1 ++; MIPS64EB-NEXT: move $3, $5 ++; MIPS64EB-NEXT: scd $3, 0($1) ++; MIPS64EB-NEXT: beqz $3, .LBB7_1 ++; MIPS64EB-NEXT: nop ++; MIPS64EB-NEXT: .LBB7_3: # %entry ++; MIPS64EB-NEXT: sync ++; MIPS64EB-NEXT: jr $ra ++; MIPS64EB-NEXT: daddiu $sp, $sp, 16 ++entry: ++ %newval.addr = alloca i64, align 4 ++ store i64 %newval, i64* %newval.addr, align 4 ++ %tmp = load i64, i64* %newval.addr, align 4 ++ %0 = cmpxchg i64* @x, i64 %oldval, i64 %tmp monotonic monotonic ++ %1 = extractvalue { i64, i1 } %0, 0 ++ ret i64 %1 ++ ++} +diff --git a/llvm/test/CodeGen/Mips/atomic64.ll b/llvm/test/CodeGen/Mips/atomic64.ll +index d27c9ac42..6e647211d 100644 +--- a/llvm/test/CodeGen/Mips/atomic64.ll ++++ b/llvm/test/CodeGen/Mips/atomic64.ll +@@ -1,23 +1,23 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips4 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS4 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r2 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R2 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu --disable-machine-licm -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R6 +-; RUN: llc -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64-unknown-linux-gnu -O0 -mcpu=mips64r6 -relocation-model=pic -verify-machineinstrs -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64R6O0 + + ; We want to verify the produced code is well formed all optimization levels, the rest of the test which ensure correctness. +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 +-; RUN: llc -mtriple=mips64el-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu -O1 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O1 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu -O2 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O2 ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64el-unknown-linux-gnu -O3 --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s --check-prefix=O3 + + ; Keep one big-endian check so that we don't reduce testing, but don't add more + ; since endianness doesn't affect the body of the atomic operations. +-; RUN: llc -mtriple=mips64-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ ++; RUN: llc -mips-fix-loongson3-llsc=0 -mtriple=mips64-unknown-linux-gnu --disable-machine-licm -mcpu=mips64 -relocation-model=pic -verify-machineinstrs < %s | \ + ; RUN: FileCheck %s -check-prefix=MIPS64EB + + @x = common global i64 0, align 4 +diff --git a/llvm/test/MC/LoongArch/cgprofile.ll b/llvm/test/MC/LoongArch/cgprofile.ll +new file mode 100644 +index 000000000..686dd6a0a +--- /dev/null ++++ b/llvm/test/MC/LoongArch/cgprofile.ll +@@ -0,0 +1,63 @@ ++;; Copied from llvm/test/MC/ELF/cgprofile.ll but use different triple. ++ ++; RUN: llc -filetype=asm %s -o - --mtriple=loongarch64-linux-linux-gnu | FileCheck %s ++; RUN: llc -filetype=obj %s -o %t --mtriple=loongarch64-linux-linux-gnu ++; RUN: llvm-readobj -r --cg-profile %t | FileCheck %s --check-prefix=OBJ ++ ++declare void @b() ++ ++define void @a() { ++ call void @b() ++ ret void ++} ++ ++define void @freq(i1 %cond) { ++ br i1 %cond, label %A, label %B ++A: ++ call void @a(); ++ ret void ++B: ++ call void @b(); ++ ret void ++} ++ ++!llvm.module.flags = !{!0} ++ ++!0 = !{i32 5, !"CG Profile", !1} ++!1 = !{!2, !3, !4, !5} ++!2 = !{void ()* @a, void ()* @b, i64 32} ++!3 = !{void (i1)* @freq, void ()* @a, i64 11} ++!4 = !{void (i1)* @freq, void ()* @b, i64 20} ++!5 = !{void (i1)* @freq, null, i64 20} ++ ++; CHECK: .cg_profile a, b, 32 ++; CHECK: .cg_profile freq, a, 11 ++; CHECK: .cg_profile freq, b, 20 ++ ++; OBJ: Relocations [ ++; OBJ: Section ({{.*}}) .rel.llvm.call-graph-profile { ++; OBJ-NEXT: 0x0 R_LARCH_NONE a ++; OBJ-NEXT: 0x0 R_LARCH_NONE b ++; OBJ-NEXT: 0x8 R_LARCH_NONE freq ++; OBJ-NEXT: 0x8 R_LARCH_NONE a ++; OBJ-NEXT: 0x10 R_LARCH_NONE freq ++; OBJ-NEXT: 0x10 R_LARCH_NONE b ++; OBJ-NEXT: } ++ ++; OBJ: CGProfile [ ++; OBJ: CGProfileEntry { ++; OBJ: From: a ++; OBJ: To: b ++; OBJ: Weight: 32 ++; OBJ: } ++; OBJ: CGProfileEntry { ++; OBJ: From: freq ++; OBJ: To: a ++; OBJ: Weight: 11 ++; OBJ: } ++; OBJ: CGProfileEntry { ++; OBJ: From: freq ++; OBJ: To: b ++; OBJ: Weight: 20 ++; OBJ: } ++; OBJ:] +diff --git a/llvm/test/MC/LoongArch/cgprofile.s b/llvm/test/MC/LoongArch/cgprofile.s +new file mode 100644 +index 000000000..53f59e5d3 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/cgprofile.s +@@ -0,0 +1,30 @@ ++## Copied from llvm/test/MC/ELF/cgprofile.s but use different triple. ++ ++# RUN: llvm-mc --filetype=obj --triple=loongarch64-linux-gnu %s -o - | llvm-readobj -r -S --symbols --sd --cg-profile - | FileCheck %s ++ ++ .section .test,"aw",@progbits ++a: .word b ++ ++ .cg_profile a, b, 32 ++ .cg_profile freq, a, 11 ++ .cg_profile late, late2, 20 ++ .cg_profile .L.local, b, 42 ++ ++ .globl late ++late: ++late2: .word 0 ++late3: ++.L.local: ++ ++# CHECK: Relocations [ ++# CHECK: Section ({{.*}}) .rel.llvm.call-graph-profile { ++# CHECK-NEXT: 0x0 R_LARCH_NONE a ++# CHECK-NEXT: 0x0 R_LARCH_NONE b ++# CHECK-NEXT: 0x8 R_LARCH_NONE freq ++# CHECK-NEXT: 0x8 R_LARCH_NONE a ++# CHECK-NEXT: 0x10 R_LARCH_NONE late ++# CHECK-NEXT: 0x10 R_LARCH_NONE late2 ++# CHECK-NEXT: 0x18 R_LARCH_NONE .test ++# CHECK-NEXT: 0x18 R_LARCH_NONE b ++# CHECK-NEXT: } ++# CHECK-NEXT: ] +diff --git a/llvm/test/MC/LoongArch/data_half.s b/llvm/test/MC/LoongArch/data_half.s +new file mode 100644 +index 000000000..a8efeaace +--- /dev/null ++++ b/llvm/test/MC/LoongArch/data_half.s +@@ -0,0 +1,13 @@ ++# RUN: llvm-mc --triple=loongarch64 < %s | FileCheck %s ++ ++.data ++ ++# CHECK: .half 1 ++# CHECK-NEXT: .half 65535 ++.half 0x1 ++.half 0xffff ++ ++# CHECK: .half 1 ++# CHECK-NEXT: .half 65535 ++.2byte 0x1 ++.2byte 0xffff +diff --git a/llvm/test/MC/LoongArch/reloc-directive-err.s b/llvm/test/MC/LoongArch/reloc-directive-err.s +new file mode 100644 +index 000000000..60fd14556 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/reloc-directive-err.s +@@ -0,0 +1,7 @@ ++# RUN: llvm-mc --triple=loongarch64 %s | FileCheck --check-prefix=PRINT %s ++# RUN: not llvm-mc --filetype=obj --triple=loongarch64 %s -o /dev/null 2>&1 \ ++# RUN: | FileCheck %s ++ ++# PRINT: .reloc 0, R_INVALID, 0 ++# CHECK: {{.*}}.s:[[# @LINE+1]]:11: error: unknown relocation name ++.reloc 0, R_INVALID, 0 +diff --git a/llvm/test/MC/LoongArch/reloc-directive.s b/llvm/test/MC/LoongArch/reloc-directive.s +new file mode 100644 +index 000000000..282da7f28 +--- /dev/null ++++ b/llvm/test/MC/LoongArch/reloc-directive.s +@@ -0,0 +1,177 @@ ++# RUN: llvm-mc --triple=loongarch64 %s | FileCheck --check-prefix=PRINT %s ++# RUN: llvm-mc --filetype=obj --triple=loongarch64 %s \ ++# RUN: | llvm-readobj -r - | FileCheck %s ++ ++# PRINT: .reloc 0, R_LARCH_NONE, 0 ++# PRINT-NEXT: .reloc 1, R_LARCH_32, 1 ++# PRINT-NEXT: .reloc 2, R_LARCH_64, 2 ++# PRINT-NEXT: .reloc 3, R_LARCH_RELATIVE, 3 ++# PRINT-NEXT: .reloc 4, R_LARCH_COPY, 4 ++# PRINT-NEXT: .reloc 5, R_LARCH_JUMP_SLOT, 5 ++# PRINT-NEXT: .reloc 6, R_LARCH_TLS_DTPMOD32, 6 ++# PRINT-NEXT: .reloc 7, R_LARCH_TLS_DTPMOD64, 7 ++# PRINT-NEXT: .reloc 8, R_LARCH_TLS_DTPREL32, 8 ++# PRINT-NEXT: .reloc 9, R_LARCH_TLS_DTPREL64, 9 ++# PRINT-NEXT: .reloc 10, R_LARCH_TLS_TPREL32, 10 ++# PRINT-NEXT: .reloc 11, R_LARCH_TLS_TPREL64, 11 ++# PRINT-NEXT: .reloc 12, R_LARCH_IRELATIVE, 12 ++# PRINT-NEXT: .reloc 13, BFD_RELOC_NONE, 13 ++# PRINT-NEXT: .reloc 14, BFD_RELOC_32, 14 ++# PRINT-NEXT: .reloc 15, BFD_RELOC_64, 15 ++# PRINT-NEXT: .reloc 20, R_LARCH_MARK_LA, 20 ++# PRINT-NEXT: .reloc 21, R_LARCH_MARK_PCREL, 21 ++# PRINT-NEXT: .reloc 22, R_LARCH_SOP_PUSH_PCREL, 22 ++# PRINT-NEXT: .reloc 23, R_LARCH_SOP_PUSH_ABSOLUTE, 23 ++# PRINT-NEXT: .reloc 24, R_LARCH_SOP_PUSH_DUP, 24 ++# PRINT-NEXT: .reloc 25, R_LARCH_SOP_PUSH_GPREL, 25 ++# PRINT-NEXT: .reloc 26, R_LARCH_SOP_PUSH_TLS_TPREL, 26 ++# PRINT-NEXT: .reloc 27, R_LARCH_SOP_PUSH_TLS_GOT, 27 ++# PRINT-NEXT: .reloc 28, R_LARCH_SOP_PUSH_TLS_GD, 28 ++# PRINT-NEXT: .reloc 29, R_LARCH_SOP_PUSH_PLT_PCREL, 29 ++# PRINT-NEXT: .reloc 30, R_LARCH_SOP_ASSERT, 30 ++# PRINT-NEXT: .reloc 31, R_LARCH_SOP_NOT, 31 ++# PRINT-NEXT: .reloc 32, R_LARCH_SOP_SUB, 32 ++# PRINT-NEXT: .reloc 33, R_LARCH_SOP_SL, 33 ++# PRINT-NEXT: .reloc 34, R_LARCH_SOP_SR, 34 ++# PRINT-NEXT: .reloc 35, R_LARCH_SOP_ADD, 35 ++# PRINT-NEXT: .reloc 36, R_LARCH_SOP_AND, 36 ++# PRINT-NEXT: .reloc 37, R_LARCH_SOP_IF_ELSE, 37 ++# PRINT-NEXT: .reloc 38, R_LARCH_SOP_POP_32_S_10_5, 38 ++# PRINT-NEXT: .reloc 39, R_LARCH_SOP_POP_32_U_10_12, 39 ++# PRINT-NEXT: .reloc 40, R_LARCH_SOP_POP_32_S_10_12, 40 ++# PRINT-NEXT: .reloc 41, R_LARCH_SOP_POP_32_S_10_16, 41 ++# PRINT-NEXT: .reloc 42, R_LARCH_SOP_POP_32_S_10_16_S2, 42 ++# PRINT-NEXT: .reloc 43, R_LARCH_SOP_POP_32_S_5_20, 43 ++# PRINT-NEXT: .reloc 44, R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44 ++# PRINT-NEXT: .reloc 45, R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45 ++# PRINT-NEXT: .reloc 46, R_LARCH_SOP_POP_32_U, 46 ++# PRINT-NEXT: .reloc 47, R_LARCH_ADD8, 47 ++# PRINT-NEXT: .reloc 48, R_LARCH_ADD16, 48 ++# PRINT-NEXT: .reloc 49, R_LARCH_ADD24, 49 ++# PRINT-NEXT: .reloc 50, R_LARCH_ADD32, 50 ++# PRINT-NEXT: .reloc 51, R_LARCH_ADD64, 51 ++# PRINT-NEXT: .reloc 52, R_LARCH_SUB8, 52 ++# PRINT-NEXT: .reloc 53, R_LARCH_SUB16, 53 ++# PRINT-NEXT: .reloc 54, R_LARCH_SUB24, 54 ++# PRINT-NEXT: .reloc 55, R_LARCH_SUB32, 55 ++# PRINT-NEXT: .reloc 56, R_LARCH_SUB64, 56 ++# PRINT-NEXT: .reloc 57, R_LARCH_GNU_VTINHERIT, 57 ++# PRINT-NEXT: .reloc 58, R_LARCH_GNU_VTENTRY, 58 ++ ++.text ++ .fill 59, 1, 0x0 ++ .reloc 0, R_LARCH_NONE, 0 ++ .reloc 1, R_LARCH_32, 1 ++ .reloc 2, R_LARCH_64, 2 ++ .reloc 3, R_LARCH_RELATIVE, 3 ++ .reloc 4, R_LARCH_COPY, 4 ++ .reloc 5, R_LARCH_JUMP_SLOT, 5 ++ .reloc 6, R_LARCH_TLS_DTPMOD32, 6 ++ .reloc 7, R_LARCH_TLS_DTPMOD64, 7 ++ .reloc 8, R_LARCH_TLS_DTPREL32, 8 ++ .reloc 9, R_LARCH_TLS_DTPREL64, 9 ++ .reloc 10, R_LARCH_TLS_TPREL32, 10 ++ .reloc 11, R_LARCH_TLS_TPREL64, 11 ++ .reloc 12, R_LARCH_IRELATIVE, 12 ++ .reloc 13, BFD_RELOC_NONE, 13 ++ .reloc 14, BFD_RELOC_32, 14 ++ .reloc 15, BFD_RELOC_64, 15 ++ .reloc 20, R_LARCH_MARK_LA, 20 ++ .reloc 21, R_LARCH_MARK_PCREL, 21 ++ .reloc 22, R_LARCH_SOP_PUSH_PCREL, 22 ++ .reloc 23, R_LARCH_SOP_PUSH_ABSOLUTE, 23 ++ .reloc 24, R_LARCH_SOP_PUSH_DUP, 24 ++ .reloc 25, R_LARCH_SOP_PUSH_GPREL, 25 ++ .reloc 26, R_LARCH_SOP_PUSH_TLS_TPREL, 26 ++ .reloc 27, R_LARCH_SOP_PUSH_TLS_GOT, 27 ++ .reloc 28, R_LARCH_SOP_PUSH_TLS_GD, 28 ++ .reloc 29, R_LARCH_SOP_PUSH_PLT_PCREL, 29 ++ .reloc 30, R_LARCH_SOP_ASSERT, 30 ++ .reloc 31, R_LARCH_SOP_NOT, 31 ++ .reloc 32, R_LARCH_SOP_SUB, 32 ++ .reloc 33, R_LARCH_SOP_SL, 33 ++ .reloc 34, R_LARCH_SOP_SR, 34 ++ .reloc 35, R_LARCH_SOP_ADD, 35 ++ .reloc 36, R_LARCH_SOP_AND, 36 ++ .reloc 37, R_LARCH_SOP_IF_ELSE, 37 ++ .reloc 38, R_LARCH_SOP_POP_32_S_10_5, 38 ++ .reloc 39, R_LARCH_SOP_POP_32_U_10_12, 39 ++ .reloc 40, R_LARCH_SOP_POP_32_S_10_12, 40 ++ .reloc 41, R_LARCH_SOP_POP_32_S_10_16, 41 ++ .reloc 42, R_LARCH_SOP_POP_32_S_10_16_S2, 42 ++ .reloc 43, R_LARCH_SOP_POP_32_S_5_20, 43 ++ .reloc 44, R_LARCH_SOP_POP_32_S_0_5_10_16_S2, 44 ++ .reloc 45, R_LARCH_SOP_POP_32_S_0_10_10_16_S2, 45 ++ .reloc 46, R_LARCH_SOP_POP_32_U, 46 ++ .reloc 47, R_LARCH_ADD8, 47 ++ .reloc 48, R_LARCH_ADD16, 48 ++ .reloc 49, R_LARCH_ADD24, 49 ++ .reloc 50, R_LARCH_ADD32, 50 ++ .reloc 51, R_LARCH_ADD64, 51 ++ .reloc 52, R_LARCH_SUB8, 52 ++ .reloc 53, R_LARCH_SUB16, 53 ++ .reloc 54, R_LARCH_SUB24, 54 ++ .reloc 55, R_LARCH_SUB32, 55 ++ .reloc 56, R_LARCH_SUB64, 56 ++ .reloc 57, R_LARCH_GNU_VTINHERIT, 57 ++ .reloc 58, R_LARCH_GNU_VTENTRY, 58 ++ ++# CHECK: Relocations [ ++# CHECK-NEXT: Section ({{.*}}) .rela.text { ++# CHECK-NEXT: 0x0 R_LARCH_NONE - 0x0 ++# CHECK-NEXT: 0x1 R_LARCH_32 - 0x1 ++# CHECK-NEXT: 0x2 R_LARCH_64 - 0x2 ++# CHECK-NEXT: 0x3 R_LARCH_RELATIVE - 0x3 ++# CHECK-NEXT: 0x4 R_LARCH_COPY - 0x4 ++# CHECK-NEXT: 0x5 R_LARCH_JUMP_SLOT - 0x5 ++# CHECK-NEXT: 0x6 R_LARCH_TLS_DTPMOD32 - 0x6 ++# CHECK-NEXT: 0x7 R_LARCH_TLS_DTPMOD64 - 0x7 ++# CHECK-NEXT: 0x8 R_LARCH_TLS_DTPREL32 - 0x8 ++# CHECK-NEXT: 0x9 R_LARCH_TLS_DTPREL64 - 0x9 ++# CHECK-NEXT: 0xA R_LARCH_TLS_TPREL32 - 0xA ++# CHECK-NEXT: 0xB R_LARCH_TLS_TPREL64 - 0xB ++# CHECK-NEXT: 0xC R_LARCH_IRELATIVE - 0xC ++# CHECK-NEXT: 0xD R_LARCH_NONE - 0xD ++# CHECK-NEXT: 0xE R_LARCH_32 - 0xE ++# CHECK-NEXT: 0xF R_LARCH_64 - 0xF ++# CHECK-NEXT: 0x14 R_LARCH_MARK_LA - 0x14 ++# CHECK-NEXT: 0x15 R_LARCH_MARK_PCREL - 0x15 ++# CHECK-NEXT: 0x16 R_LARCH_SOP_PUSH_PCREL - 0x16 ++# CHECK-NEXT: 0x17 R_LARCH_SOP_PUSH_ABSOLUTE - 0x17 ++# CHECK-NEXT: 0x18 R_LARCH_SOP_PUSH_DUP - 0x18 ++# CHECK-NEXT: 0x19 R_LARCH_SOP_PUSH_GPREL - 0x19 ++# CHECK-NEXT: 0x1A R_LARCH_SOP_PUSH_TLS_TPREL - 0x1A ++# CHECK-NEXT: 0x1B R_LARCH_SOP_PUSH_TLS_GOT - 0x1B ++# CHECK-NEXT: 0x1C R_LARCH_SOP_PUSH_TLS_GD - 0x1C ++# CHECK-NEXT: 0x1D R_LARCH_SOP_PUSH_PLT_PCREL - 0x1D ++# CHECK-NEXT: 0x1E R_LARCH_SOP_ASSERT - 0x1E ++# CHECK-NEXT: 0x1F R_LARCH_SOP_NOT - 0x1F ++# CHECK-NEXT: 0x20 R_LARCH_SOP_SUB - 0x20 ++# CHECK-NEXT: 0x21 R_LARCH_SOP_SL - 0x21 ++# CHECK-NEXT: 0x22 R_LARCH_SOP_SR - 0x22 ++# CHECK-NEXT: 0x23 R_LARCH_SOP_ADD - 0x23 ++# CHECK-NEXT: 0x24 R_LARCH_SOP_AND - 0x24 ++# CHECK-NEXT: 0x25 R_LARCH_SOP_IF_ELSE - 0x25 ++# CHECK-NEXT: 0x26 R_LARCH_SOP_POP_32_S_10_5 - 0x26 ++# CHECK-NEXT: 0x27 R_LARCH_SOP_POP_32_U_10_12 - 0x27 ++# CHECK-NEXT: 0x28 R_LARCH_SOP_POP_32_S_10_12 - 0x28 ++# CHECK-NEXT: 0x29 R_LARCH_SOP_POP_32_S_10_16 - 0x29 ++# CHECK-NEXT: 0x2A R_LARCH_SOP_POP_32_S_10_16_S2 - 0x2A ++# CHECK-NEXT: 0x2B R_LARCH_SOP_POP_32_S_5_20 - 0x2B ++# CHECK-NEXT: 0x2C R_LARCH_SOP_POP_32_S_0_5_10_16_S2 - 0x2C ++# CHECK-NEXT: 0x2D R_LARCH_SOP_POP_32_S_0_10_10_16_S2 - 0x2D ++# CHECK-NEXT: 0x2E R_LARCH_SOP_POP_32_U - 0x2E ++# CHECK-NEXT: 0x2F R_LARCH_ADD8 - 0x2F ++# CHECK-NEXT: 0x30 R_LARCH_ADD16 - 0x30 ++# CHECK-NEXT: 0x31 R_LARCH_ADD24 - 0x31 ++# CHECK-NEXT: 0x32 R_LARCH_ADD32 - 0x32 ++# CHECK-NEXT: 0x33 R_LARCH_ADD64 - 0x33 ++# CHECK-NEXT: 0x34 R_LARCH_SUB8 - 0x34 ++# CHECK-NEXT: 0x35 R_LARCH_SUB16 - 0x35 ++# CHECK-NEXT: 0x36 R_LARCH_SUB24 - 0x36 ++# CHECK-NEXT: 0x37 R_LARCH_SUB32 - 0x37 ++# CHECK-NEXT: 0x38 R_LARCH_SUB64 - 0x38 ++# CHECK-NEXT: 0x39 R_LARCH_GNU_VTINHERIT - 0x39 ++# CHECK-NEXT: 0x3A R_LARCH_GNU_VTENTRY - 0x3A ++# CHECK-NEXT: } ++# CHECK-NEXT: ] +diff --git a/llvm/test/tools/llvm-profgen/lit.local.cfg b/llvm/test/tools/llvm-profgen/lit.local.cfg +index 197150e22..0ca12783a 100644 +--- a/llvm/test/tools/llvm-profgen/lit.local.cfg ++++ b/llvm/test/tools/llvm-profgen/lit.local.cfg +@@ -3,5 +3,5 @@ import lit.util + + config.suffixes = ['.test', '.ll', '.s', '.yaml'] + +-if not 'X86' in config.root.targets: ++if not ('X86' in config.root.targets and 'LoongArch' in config.root.targets): + config.unsupported = True +diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp +index eef5b8eb8..ceac76307 100644 +--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp ++++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp +@@ -196,9 +196,6 @@ void ProfiledBinary::load() { + exitWithError("not a valid Elf image", Path); + + TheTriple = Obj->makeTriple(); +- // Current only support X86 +- if (!TheTriple.isX86()) +- exitWithError("unsupported target", TheTriple.getTriple()); + LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); + + // Find the preferred load address for text sections. diff --git a/llvm.spec b/llvm.spec index 6aff472..9296f79 100644 --- a/llvm.spec +++ b/llvm.spec @@ -1,4 +1,4 @@ -%define anolis_release .0.3 +%define anolis_release .0.4 # We are building with clang for faster/lower memory LTO builds. # See https://docs.fedoraproject.org/en-US/packaging-guidelines/#_compiler_macros %global toolchain clang @@ -111,6 +111,7 @@ Source6: lit.fedora.cfg.py Patch2: 0001-XFAIL-missing-abstract-variable.ll-test-on-ppc64le.patch Patch3: 0001-Support-LoongArch.patch Patch4: 0002-Add-LoongArch-Support-for-ObjectYAML.patch +Patch5: 0003-LoongArch-support-compiler-rt-and-fix-some-issues.patch # RHEL-specific patches. Patch101: 0001-Deactivate-markdown-doc.patch @@ -602,6 +603,10 @@ fi %endif %changelog +* Thu Dec 07 2023 Chen Li - 15.0.7-1.0.4 +- LoongArch: Release 15.0.7-1.0.4 including support compiler-rt and + other changes + * Thu Aug 17 2023 Chen Li - 15.0.7-1.0.3 - backport: [SelectionDAG] Do not salvage with vector node - [LoongArch] Add support for ObjectYAML -- Gitee